Candidate release of source code.

This commit is contained in:
Dan 2019-03-26 13:45:32 -04:00
parent db81e6b3b0
commit 79d8f164f8
12449 changed files with 2800756 additions and 16 deletions

View file

@ -0,0 +1,351 @@
lexer grammar BaseLexer;
options {
superClass = AbstractSleighLexer;
tokenVocab = SleighLexer;
}
tokens {
OP_ADD;
OP_ADDRESS_OF;
OP_ALIGNMENT;
OP_AND;
OP_APPLY;
OP_ARGUMENTS;
OP_ASSIGN;
OP_BIG;
OP_BIN_CONSTANT;
OP_BITRANGE;
OP_BITRANGE2;
OP_BITRANGES;
OP_BIT_PATTERN;
OP_BOOL_AND;
OP_BOOL_OR;
OP_BOOL_XOR;
OP_BUILD;
OP_CALL;
OP_CONCATENATE;
OP_CONSTRUCTOR;
OP_CONTEXT;
OP_CONTEXT_BLOCK;
OP_CROSSBUILD;
OP_CTLIST;
OP_DEC;
OP_DECLARATIVE_SIZE;
OP_DEC_CONSTANT;
OP_DEFAULT;
OP_DEREFERENCE;
OP_DISPLAY;
OP_DIV;
OP_ELLIPSIS;
OP_ELLIPSIS_RIGHT;
OP_EMPTY_LIST;
OP_ENDIAN;
OP_EQUAL;
OP_EXPORT;
OP_FADD;
OP_FDIV;
OP_FEQUAL;
OP_FGREAT;
OP_FGREATEQUAL;
OP_FIELDDEF;
OP_FIELDDEFS;
OP_FIELD_MODS;
OP_FLESS;
OP_FLESSEQUAL;
OP_FMULT;
OP_FNEGATE;
OP_FNOTEQUAL;
OP_FSUB;
OP_GOTO;
OP_GREAT;
OP_GREATEQUAL;
OP_HEX;
OP_HEX_CONSTANT;
OP_IDENTIFIER;
OP_IDENTIFIER_LIST;
OP_IF;
OP_INTBLIST;
OP_INVERT;
OP_JUMPDEST_ABSOLUTE;
OP_JUMPDEST_DYNAMIC;
OP_JUMPDEST_LABEL;
OP_JUMPDEST_RELATIVE;
OP_JUMPDEST_SYMBOL;
OP_LABEL;
OP_LEFT;
OP_LESS;
OP_LESSEQUAL;
OP_LITTLE;
OP_LOCAL;
OP_MACRO;
OP_MULT;
OP_NAMES;
OP_NEGATE;
OP_NIL;
OP_NOFLOW;
OP_NOP;
OP_NOT;
OP_NOTEQUAL;
OP_NOT_DEFAULT;
OP_NO_CONTEXT_BLOCK;
OP_NO_FIELD_MOD;
OP_OR;
OP_PARENTHESIZED;
OP_PCODE;
OP_PCODEOP;
OP_QSTRING;
OP_REM;
OP_RETURN;
OP_RIGHT;
OP_SDIV;
OP_SECTION_LABEL;
OP_SEMANTIC;
OP_SEQUENCE;
OP_SGREAT;
OP_SGREATEQUAL;
OP_SIGNED;
OP_SIZING_SIZE;
OP_SIZE;
OP_SLESS;
OP_SLESSEQUAL;
OP_SPACE;
OP_SPACEMODS;
OP_SREM;
OP_SRIGHT;
OP_STRING;
OP_STRING_OR_IDENT_LIST;
OP_SUB;
OP_SUBTABLE;
OP_TABLE;
OP_TOKEN;
OP_TRUNCATION_SIZE;
OP_TYPE;
OP_UNIMPL;
OP_VALUES;
OP_VARIABLES;
OP_VARNODE;
OP_WHITESPACE;
OP_WILDCARD;
OP_WITH;
OP_WORDSIZE;
OP_XOR;
}
/**
* This lexer represents the rules that are common to all sleigh lexers. It is also the "default"
* lexer used at the start of the .slaspec file being compiled. It specifies the tree tokens output
* within the AST by the parsers as well as all the keywords used throughout the language. Note
* that 'is' and 'if' are treated as reserved words in the display and semantics parsers,
* respectively.
*/
// Preprocessor-generated directives
fragment
PP_ESCAPE
: '\b'
;
PP_POSITION
: PP_ESCAPE ~('\n'|PP_ESCAPE)* PP_ESCAPE { setText(getText().substring(1, getText().length()-1)); preprocess(getText()); $channel = PREPROC; }
;
// Reserved words and keywords
RES_WITH : 'with';
KEY_ALIGNMENT : 'alignment';
KEY_ATTACH : 'attach';
KEY_BIG : 'big';
KEY_BITRANGE : 'bitrange';
KEY_BUILD : 'build';
KEY_CALL : 'call';
KEY_CONTEXT : 'context';
KEY_CROSSBUILD : 'crossbuild';
KEY_DEC : 'dec';
KEY_DEFAULT : 'default';
KEY_DEFINE : 'define';
KEY_ENDIAN : 'endian';
KEY_EXPORT : 'export';
KEY_GOTO : 'goto';
KEY_HEX : 'hex';
KEY_LITTLE : 'little';
KEY_LOCAL : 'local';
KEY_MACRO : 'macro';
KEY_NAMES : 'names';
KEY_NOFLOW : 'noflow';
KEY_OFFSET : 'offset';
KEY_PCODEOP : 'pcodeop';
KEY_RETURN : 'return';
KEY_SIGNED : 'signed';
KEY_SIZE : 'size';
KEY_SPACE : 'space';
KEY_TOKEN : 'token';
KEY_TYPE : 'type';
KEY_UNIMPL : 'unimpl';
KEY_VALUES : 'values';
KEY_VARIABLES : 'variables';
KEY_WORDSIZE : 'wordsize';
// Grouping, block, and sectioning symbols
LBRACE : '{';
RBRACE : '}';
LBRACKET : '[';
RBRACKET : ']';
LPAREN : '(';
RPAREN : ')';
// Miscellaneous
ELLIPSIS : '...';
UNDERSCORE : '_';
COLON : ':';
COMMA : ',';
EXCLAIM : '!';
TILDE : '~';
SEMI : ';';
// ----------
// Operators:
// ----------
ASSIGN : '=';
// Comparisons
EQUAL : '==';
NOTEQUAL : '!=';
LESS : '<';
GREAT : '>';
LESSEQUAL : '<=';
GREATEQUAL : '>=';
// Boolean and bitwise logic operations
BOOL_OR : '||';
BOOL_XOR : '^^';
BOOL_AND : '&&';
PIPE : '|';
CARET : '^';
AMPERSAND : '&';
// Shifting operations
LEFT : '<<';
RIGHT : '>>';
// Arithmetic operations
PLUS : '+';
MINUS : '-';
ASTERISK : '*';
SLASH : '/';
PERCENT : '%';
// Explicitly named boolean operations
SPEC_OR : '$or';
SPEC_AND : '$and';
SPEC_XOR : '$xor';
// IDs, Literals
IDENTIFIER
: ALPHAUP (ALPHAUP | DIGIT)*
;
fragment
ALPHA
: 'A'..'Z'
| 'a'..'z'
;
fragment
ALPHAUP
: ALPHA
| '_'
| '.'
;
fragment
DIGIT
: '0'..'9'
;
QSTRING
: '"' (ESCAPE | ~('\\' | '"'))* '"' { setText(getText().substring(1, getText().length()-1)); }
;
fragment
ESCAPE
: '\\' ('b' | 't' | 'n' | 'f' | 'r' | '\"' | '\'' | '\\')
| UNICODE_ESCAPE
| OCTAL_ESCAPE
;
fragment
OCTAL_ESCAPE
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UNICODE_ESCAPE
: '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
;
fragment
HEXDIGIT
: DIGIT
| 'a'..'f'
| 'A'..'F'
;
DEC_INT
: DIGIT+
;
HEX_INT
: '0x' (HEXDIGIT)+
;
BIN_INT
: '0b' (BINDIGIT)+
;
fragment
BINDIGIT
: '0'..'1'
;
// Ignored things, Errors
LINECOMMENT
: '#' ~('\n' | '\r')* EOL { $channel = COMMENT; }
;
fragment
EOL
: ('\r'? '\n')=> '\r'? '\n'
| '\r'
;
CPPCOMMENT
: '//'
{
SleighToken st = new SleighToken(_type, getText());
UnwantedTokenException ute = new UnwantedTokenException(0, input);
ute.token = st;
reportError(ute);
}
;
WS
: (' ' | '\t' | '\r' | '\n')+ { $channel = HIDDEN; }
;
UNKNOWN
: .
{
SleighToken st = new SleighToken(_type, getText());
UnwantedTokenException ute = new UnwantedTokenException(0, input);
ute.token = st;
reportError(ute);
}
;

View file

@ -0,0 +1,122 @@
grammar BooleanExpression;
tokens {
OP_OR = '||';
OP_XOR = '^^';
OP_AND = '&&';
OP_NOT = '!';
OP_EQ = '==';
OP_NEQ = '!=';
KEY_DEFINED = 'defined';
}
@members {
public ExpressionEnvironment env;
public static void main(String[] args) {
try {
CharStream input = new ANTLRFileStream(args[0]);
BooleanExpressionLexer lex = new BooleanExpressionLexer(input);
CommonTokenStream tokens = new CommonTokenStream(lex);
BooleanExpressionParser parser = new BooleanExpressionParser(tokens);
boolean result = parser.expression();
System.out.println(result);
} catch(Throwable t) {
t.printStackTrace();
}
}
}
expression returns [boolean b]
: e=expr EOF { $b = $e.b; }
;
expr returns [boolean b]
: e=expr_or { $b = $e.b; }
;
expr_or returns [boolean b]
: lhs=expr_xor { $b = $lhs.b; } (OP_OR rhs=expr_xor { $b = $b || $rhs.b; })*
;
expr_xor returns [boolean b]
: lhs=expr_and { $b = $lhs.b; } (OP_XOR rhs=expr_and { $b = $b ^ $rhs.b; })*
;
expr_and returns [boolean b]
: lhs=expr_not { $b = $lhs.b; } (OP_AND rhs=expr_not { $b = $b && $rhs.b; })*
;
expr_not returns [boolean b]
: OP_NOT e=expr_paren { $b = ! $e.b; }
| e=expr_paren { $b = $e.b; }
| e=expr_eq { $b = $e.b; }
| KEY_DEFINED '(' id=IDENTIFIER ')' { $b = env.lookup($id.text) != null; }
;
expr_paren returns [boolean b]
: '(' e=expr ')' { $b = $e.b; }
;
expr_eq returns [boolean b]
: lhs=expr_term OP_EQ rhs=expr_term { $b = env.equals(lhs, rhs); }
| lhs=expr_term OP_NEQ rhs=expr_term { $b = !env.equals(lhs, rhs); }
;
expr_term returns [String s]
: id=IDENTIFIER { $s = env.lookup($id.text);
if ($s == null)
env.reportError("Macro: "+ $id.text + " is undefined");
}
| qs=QSTRING {$s = $qs.text.substring(1, $qs.text.length() - 1); }
;
IDENTIFIER
: (ALPHA | '_' | DIGIT)+
;
QSTRING
: '"' (ESCAPE | ~('\\' | '"'))* '"'
;
fragment
ESCAPE
: '\\' ('b' | 't' | 'n' | 'f' | 'r' | '\"' | '\'' | '\\')
| UNICODE_ESCAPE
| OCTAL_ESCAPE
;
fragment
OCTAL_ESCAPE
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UNICODE_ESCAPE
: '\\' 'u' HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
;
fragment
HEXDIGIT
: '0'..'9'
| 'a'..'f'
| 'A'..'F'
;
fragment
DIGIT
: '0'..'9'
;
fragment
ALPHA
: 'A'..'Z'
| 'a'..'z'
;
WS
: (' ' | '\t' | '\r' | '\n')+ { $channel = HIDDEN; }
;

View file

@ -0,0 +1,42 @@
lexer grammar DisplayLexer;
options {
superClass = AbstractSleighLexer;
tokenVocab = SleighLexer;
}
import BaseLexer;
@members {
@Override
public void setEnv(ParsingEnvironment env) {
super.setEnv(env);
gBaseLexer.setEnv(env);
}
}
/**
* This is the lexer used for the display portion of the sleigh grammar. It reserves the word 'is'
* so that it can clearly tell where the display portion ends. It also adds three special symbols
* that would not otherwise be recognized by the core lexer, so that language modelers can use them
* in assembly print pieces. Furthermore, it moves whitespace into the default channel so that
* language modelers can control whether or not whitespace is printed.
*/
// Characters without meaning except for the display portion
DISPCHAR
: '@' | '$' | '?'
;
// Override this, and parse # as a print piece
LINECOMMENT
: '#'
;
// Whitespace must be processed
WS
: (' ' | '\t' | '\r' | '\n')+
;
// Reserved words
RES_IS : 'is';

View file

@ -0,0 +1,84 @@
parser grammar DisplayParser;
options {
superClass = AbstractSleighParser;
}
/**
* This is the parser used in the display portion. It's root rule is display, which will swap in
* the corresponding DisplayLexer. Instead of overriding the lexer rules for operators and other
* special symbols, the parser simply recognizes them and just uses them for their characters.
*/
// See the README.txt regarding some restrictions on this rule.
display
: { lexer.pushMode(DISPLAY); } COLON pieces RES_IS { lexer.popMode(); } -> ^(OP_DISPLAY pieces)
;
pieces
: printpiece*
;
printpiece
: identifier
| whitespace
| concatenate
| qstring
| special
;
whitespace
: lc=WS -> ^(OP_WHITESPACE[$lc, "WS"] WS)
;
// Adjacent print pieces are already catenated, but two adjacent identifiers must be separated by
// something in the source. I suppose an empty string "" would also do, but this is clearer.
concatenate
: lc=CARET -> ^(OP_CONCATENATE[$lc])
;
qstring
: lc=QSTRING -> ^(OP_QSTRING[$lc, "QSTRING"] QSTRING)
;
special
: lc=DISPCHAR -> ^(OP_STRING[$lc, "DISPCHAR"] DISPCHAR)
| lc=LINECOMMENT -> ^(OP_STRING[$lc, "LINECOMMENT"] LINECOMMENT) // really, just the #
| lc=LBRACE -> ^(OP_STRING[$lc, "LBRACE"] LBRACE)
| lc=RBRACE -> ^(OP_STRING[$lc, "RBRACE"] RBRACE)
| lc=LBRACKET -> ^(OP_STRING[$lc, "LBRACKET"] LBRACKET)
| lc=RBRACKET -> ^(OP_STRING[$lc, "RBRACKET"] RBRACKET)
| lc=LPAREN -> ^(OP_STRING[$lc, "LPAREN"] LPAREN)
| lc=RPAREN -> ^(OP_STRING[$lc, "RPAREN"] RPAREN)
| lc=ELLIPSIS -> ^(OP_STRING[$lc, "ELLIPSIS"] ELLIPSIS)
| lc=EQUAL -> ^(OP_STRING[$lc, "EQUAL"] EQUAL)
| lc=NOTEQUAL -> ^(OP_STRING[$lc, "NOTEQUAL"] NOTEQUAL)
| lc=LESS -> ^(OP_STRING[$lc, "LESS"] LESS)
| lc=GREAT -> ^(OP_STRING[$lc, "GREAT"] GREAT)
| lc=LESSEQUAL -> ^(OP_STRING[$lc, "LESSEQUAL"] LESSEQUAL)
| lc=GREATEQUAL -> ^(OP_STRING[$lc, "GREATEQUAL"] GREATEQUAL)
| lc=ASSIGN -> ^(OP_STRING[$lc, "ASSIGN"] ASSIGN)
| lc=COLON -> ^(OP_STRING[$lc, "COLON"] COLON)
| lc=COMMA -> ^(OP_STRING[$lc, "COMMA"] COMMA)
| lc=ASTERISK -> ^(OP_STRING[$lc, "ASTERISK"] ASTERISK)
| lc=BOOL_OR -> ^(OP_STRING[$lc, "BOOL_OR"] BOOL_OR)
| lc=BOOL_XOR -> ^(OP_STRING[$lc, "BOOL_XOR"] BOOL_XOR)
| lc=BOOL_AND -> ^(OP_STRING[$lc, "BOOL_AND"] BOOL_AND)
| lc=PIPE -> ^(OP_STRING[$lc, "PIPE"] PIPE)
| lc=AMPERSAND -> ^(OP_STRING[$lc, "AMPERSAND"] AMPERSAND)
| lc=LEFT -> ^(OP_STRING[$lc, "LEFT"] LEFT)
| lc=RIGHT -> ^(OP_STRING[$lc, "RIGHT"] RIGHT)
| lc=PLUS -> ^(OP_STRING[$lc, "PLUS"] PLUS)
| lc=MINUS -> ^(OP_STRING[$lc, "MINUS"] MINUS)
| lc=SLASH -> ^(OP_STRING[$lc, "SLASH"] SLASH)
| lc=PERCENT -> ^(OP_STRING[$lc, "PERCENT"] PERCENT)
| lc=EXCLAIM -> ^(OP_STRING[$lc, "EXCLAIM"] EXCLAIM)
| lc=TILDE -> ^(OP_STRING[$lc, "TILDE"] TILDE)
| lc=SEMI -> ^(OP_STRING[$lc, "SEMI"] SEMI)
| lc=SPEC_OR -> ^(OP_STRING[$lc, "SPEC_OR"] SPEC_OR)
| lc=SPEC_AND -> ^(OP_STRING[$lc, "SPEC_AND"] SPEC_AND)
| lc=SPEC_XOR -> ^(OP_STRING[$lc, "SPEC_XOR"] SPEC_XOR)
| lc=DEC_INT -> ^(OP_STRING[$lc, "DEC_INT"] DEC_INT)
| lc=HEX_INT -> ^(OP_STRING[$lc, "HEX_INT"] HEX_INT)
| lc=BIN_INT -> ^(OP_STRING[$lc, "BIN_INT"] BIN_INT)
;

View file

@ -0,0 +1,146 @@
As of May 2017, the Java port of the SLEIGH compiler has been refactored some. This is in
preparation for the addition of the new "with" block. Perhaps some background as to why:
+----------------+
| The problem(s) |
+----------------+
Before refactoring, there was quite a bit of hacking to house the semantics and display parsers in
separate files from the overall parser. There were a number of reasons for doing this that I can
recall:
1) It's good software engineering practice to separate large components into smaller related
components (Encapsulation).
2) There exist use cases (e.g., PcodeCompiler) where the semantics parser is invoked apart from
the rest of the compiler.
3) Lexical analysis is sensitive to the elements being parsed, and ANTLRv3 does not support
modal lexing natively.
The solution before refactoring applied a technique which simply doesn't settle well in the world
of computational theory, languages, and automata. The original programmer wrote a lexing rule in
the main SLEIGH grammar that read something like this:
SEMANTICS: '{' { /* usurp the input with a new lexer and parser for semantics, parse the input
until "EOF", and then restore the input to the main lexer/parser */ } ;
and then, in the sub-grammar:
RBRACE: '}' { /* actually, emit EOF */ } ;
While this is clever and all, and theoretical complaints usually take a back seat to things that
work in practice, this introduced a few problems:
1) The injected code for the SEMANTICS rule is actually pretty complicated.
2) A lot of extra bookkeeping was necessary to print proper locations for errors.
3) The '{' and '}', though related, were not parsed by the same grammar.
4) [And this is the biggie:] SEMANTICS should really be a parser rule, not a lexer rule.
Because a lexer rule starting with '{' has been hacked to do context-free parsing things, adding a
with block that also uses braces is impossible. Anywhere '{' appears, the lexer immediately usurps
the input and tries parsing SEMANTICS according to a whole new parser/lexer. Sure, I could have
just used a different symbol, but that's just hacking around a hack. This needed to be fixed, and I
decided to pay the debt.
+---------+
| The fix |
+---------+
ANTLRv3 has a built-in mechanism for separating grammars into logical units, via "import". Granted,
there are restrictions on how it can be used, and there are some bookkeeping errors on the ANTLR
developers' part, but those can be worked around in ways that still preserve the soundness of the
grammars themselves. This will satisfy reasons 1 and 2 at the top. Reason 3 is a little more
difficult. Unfortunately, ANTLR doesn't support modal lexing until ANTLRv4; nonetheless, it's
relatively straightforward to add mode swapping mechanisms in ANTLRv3. Furthermore, native modal
lexing only permits the lexer to switch its own modes. For our use case, we'd need the parser to
control the mode of lexing. Again, with some custom classes, this is not too difficult, except that
we must be careful about the parser looking ahead. Here's what I've done, not necessarily in
chronological order:
First, I've factored out many of the @member actions, instead opting to implement them in an
abstract class. ANTLR provides an option to specify a class other than Lexer or Parser to extend
when generating Java source. There are two benefits: 1) I can keep most Java source in actual Java
source files instead of the grammar source, 2) The code is no longer duplicated among many parsers
can lexers.
Second, I've split each sub-grammar into separate lexer and parser. This is required becuase ANTLR
does not allow a "combined lexer/parser" to be imported by another lexer, parser, or combination.
Aside from avoiding situations that are difficult to define, ANTLR's restriction also enforces the
practice of separating components out. Note that BooleanExpression.g is unaffected by all of this,
since it's basically a stand-alone grammar.
There are now 3 lexers each with its own source:
BaseLexer.g: The "normal" lexer for .slaspec files
DisplayLexer.g imports SleighLexer.g: The lexer for the display portion, e.g.:
':ADD op1,op2 is'
SemanticLexer.g imports SleighLexer.g: The lexer for the semantics portion, e.g.:
'{ export *[register]:4 reg; }'
See their source for additional documentation. Each of these lexers stands in for a "mode" of the
actual root lexer, SleighLexer. Despite it's name, it is not generated by ANTLR. It is a POJO that
tracks a stack of modes. When the next token is requested, the mode at the top of the stack
determines which child lexer will be invoked. So long as the mode is changed in places that aren't
sensitive to lookahead, the parser can effectively inform the lexer mode. It is also a smart idea
to use an unbuffered token stream, because the buffered one may be tempted to look ahead where not
necessary. Thus, I've switched uses of CommonTokenStream to UnbufferedTokenStream. Proper channel
filtering is provided by LexerMultiplexer, which SleighLexer extends.
Similarly, there are now 3 parsers each with its own source:
DisplayParser.g: The parser for the display portion
SemanticParser.g: The parser for the semantics portion
SleighParser imports DisplayParser, SemanticParser: The root parser for .slaspec files
See their source for additional documentation. The ANTLR-provided import mechanism is sufficient to
glue these parser grammars together. Each is given a pointer to the SleighLexer that feeds the root
parser so that parser rules can control the lexing mode. Rules with such changes MUST NOT change
the mode at a position past which the parser must look ahead when choosing an alternative, e.g.:
display
: ':' { lexer.pushMode(DISPLAY); } pieces 'is' { lexer.popMode(); }
| ':' 'is'
;
pieces
: pieces+
;
This would causes a problem because both alternatives start with the prefix ':'. Thus, the parser
will look ahead one token. For the first alternative, it's potentially looking into pieces, which
lies beyond the mode switch. Note, however, that the injected code is not execute until the
alternative is actually chosen, so that look-ahead token may be lexed in the wrong mode. If it
sees 'is', it chooses the second alternative without a problem. But, if it sees something else,
it will choose the first alternative. Unfortunately, it does not know to drop its lookahead token,
nor would it be sound to, since the decision at this point could be flawed anyway. My
recommendation is to keep the mode transitions in rules that have a single alternative. This alone
is not sufficient since lookahead can be a complicated thing, but this usually helps. For the above
example, the solution is to allow pieces to produce epsilon, and remove the second alternative
altogether:
display
: ':' { lexer.pushMode(DISPLAY); } pieces 'is' {lexer.popMode(); }
;
pieces
: pieces*
;
This way, we can ensure the parser need only see the ':' to decide that it will parse according to
the display rule. It will properly change the lexing mode before asking the lexer for any token
after the colon. While it did require rethinking the grammar just slightly, the overall result is
actually pretty elegant compared to the previous. Additionally, I was able to remove all of the
extra location bookkeeping for the error reporting.
There's still a small issue with the build process, though. Because these lexers and grammars must
all cooperate during sleigh compilation, they must share a common token vocabulary. In other words,
the DisplayLexer must know about SemanticLexer's tokens even though it never refers to them itself,
simply so that they don't have overlapping numbers. To resolve this, a dummy lexer, called
SleighLexer.g is written to import the other lexers. Because of an ANTLR restriction, the dummy
lexer must specify at least one lexing rule. Building this lexer causes ANTLR to output a token
vocabulary containing all possible tokens from all the lexers. Those lexers then take this
vocabulary as their own. To avoid any confusion, the Gradle script deletes the Java source for the
dummy lexer, leaving just its .tokens file.
Additionally, there's a bit of a glitch with the way ANTLR handles "@header" actions with imported
grammars. A root grammar specifying its own "@header" action cannot import a grammar with another
"@header" action. Furthermore, the "@header" action is not actually inherited by the importing
grammar. I imagine this is an oversight on the ANTLR developers' parts. The "@header" is usually
needed to specify the package of the output source. ANTLRv4 mitigates this problem by providing a
'-package' commnd-line option, so that most "@header"s are not necessary, but ANTLRv3 doesn't have
this. Thus, I programmed the build script to insert the package header after calling ANTLR.

View file

@ -0,0 +1,56 @@
lexer grammar SemanticLexer;
options {
superClass = AbstractSleighLexer;
tokenVocab = SleighLexer;
}
import BaseLexer;
@members {
@Override
public void setEnv(ParsingEnvironment env) {
super.setEnv(env);
gBaseLexer.setEnv(env);
}
}
/**
* This is the lexer used for the semantic portion of the sleigh grammar. It reserves the word 'if'
* so that, e.g., 'if(a == b) ...', does not appear to be a call to a function named 'if'. All of
* the operators that start with letters are lexed here only. If they were in the core lexer, then
* ANTLR would not be able to lex 'f=0' properly. It would anticipate the 'f==' token, but detect a
* 'mismatch' when it finds a '0' instead of another '='. ANTLRs lexers do not backtrack, so it
* will never even try IDENTIFIER as it should. Worse yet, switching the lexer into "filter" mode
* causes it to mis-lex any id starting with a keyword, e.g., contextreg is lexed:
* KEY_CONTEXT, ID:reg instead of ID:contextreg. Pity. Thus, we only enable these tokens when we
* know we're parsing the semantic portion.
*/
// Floating-point comparisons
FEQUAL : 'f==';
FNOTEQUAL : 'f!=';
FLESS : 'f<';
FGREAT : 'f>';
FLESSEQUAL : 'f<=';
FGREATEQUAL : 'f>=';
// Floating-point operations
FPLUS : 'f+';
FMINUS : 'f-';
FMULT : 'f*';
FDIV : 'f/';
// Signed comparisons
SLESS : 's<';
SGREAT : 's>';
SLESSEQUAL : 's<=';
SGREATEQUAL : 's>=';
// Signed operations
SRIGHT : 's>>';
SDIV : 's/';
SREM : 's%';
// Reserved words
RES_IF : 'if';

View file

@ -0,0 +1,343 @@
parser grammar SemanticParser;
options {
superClass = AbstractSleighParser;
}
/**
* This is the parser used in the semantics portion. It's root rule is semanticbody, which will
* swap in the corresponding SemanticLexer.
*/
// See the README.txt regarding some restrictions on this rule.
semanticbody
: LBRACE { lexer.pushMode(SEMANTIC); } semantic RBRACE { lexer.popMode(); } -> semantic
;
semantic
: code_block -> ^(OP_SEMANTIC code_block)
;
code_block
: statements
| -> ^(OP_NOP)
;
// with this change, export can ONLY be followed by a section label,
// and it MUST occur only in the first, default, unnamed section
statements
: statement+
;
label
: lc=LESS identifier GREAT -> ^(OP_LABEL[$lc] identifier)
;
section_def
: lc=LEFT identifier RIGHT -> ^(OP_SECTION_LABEL[$lc] identifier)
;
statement
@init {
boolean empty = false;
}
: ( assignment
| declaration
| funcall
| build_stmt
| crossbuild_stmt
| goto_stmt
| cond_stmt
| call_stmt
| export
| return_stmt
| {
empty = true;
}
) lc=SEMI! {
if(empty)
bail("Empty statement at " + ((SleighToken) $lc).getLocation());
}
| label
| section_def
| outererror
;
outererror
: (lc=EQUAL
| lc=NOTEQUAL
| lc=FEQUAL
| lc=FNOTEQUAL
| lc=LESSEQUAL
| lc=GREATEQUAL
| lc=SLESS
| lc=SGREAT
| lc=SLESSEQUAL
| lc=SGREATEQUAL
| lc=FLESS
| lc=FGREAT
| lc=FLESSEQUAL
| lc=FGREATEQUAL
| lc=ASSIGN
| lc=COLON
| lc=COMMA
| lc=RBRACKET
| lc=BOOL_OR
| lc=BOOL_XOR
| lc=BOOL_AND
| lc=PIPE
| lc=CARET
| lc=AMPERSAND
| lc=SRIGHT
| lc=PLUS
| lc=MINUS
| lc=FPLUS
| lc=FMINUS
| lc=SLASH
| lc=PERCENT
| lc=SDIV
| lc=SREM
| lc=FMULT
| lc=FDIV
| lc=TILDE
| lc=LPAREN
| lc=RPAREN) {
UnwantedTokenException ute = new UnwantedTokenException(0, input);
ute.token = lc;
reportError(ute);
}
;
assignment
: lb=KEY_LOCAL lvalue lc=ASSIGN expr -> ^(OP_LOCAL[$lb] OP_ASSIGN[$lc] lvalue expr)
| lvalue lc=ASSIGN expr -> ^(OP_ASSIGN[$lc] lvalue expr)
;
declaration
: lb=KEY_LOCAL identifier lc=COLON constant -> ^(OP_LOCAL[$lb] identifier constant)
| lb=KEY_LOCAL identifier -> ^(OP_LOCAL[$lb] identifier)
;
lvalue
: sembitrange
| identifier lc=COLON constant -> ^(OP_DECLARATIVE_SIZE[$lc] identifier constant)
| identifier
| sizedstar^ expr
;
sembitrange
: identifier lc=LBRACKET a=constant COMMA b=constant RBRACKET -> ^(OP_BITRANGE[$lc] identifier $a $b)
;
sizedstar
: lc=ASTERISK LBRACKET identifier RBRACKET COLON constant -> ^(OP_DEREFERENCE[$lc] identifier constant)
| lc=ASTERISK LBRACKET identifier RBRACKET -> ^(OP_DEREFERENCE[$lc] identifier)
| lc=ASTERISK COLON constant -> ^(OP_DEREFERENCE[$lc] constant)
| lc=ASTERISK -> ^(OP_DEREFERENCE[$lc])
;
funcall
: expr_apply
;
build_stmt
: lc=KEY_BUILD identifier -> ^(OP_BUILD[$lc] identifier)
;
crossbuild_stmt
: lc=KEY_CROSSBUILD varnode COMMA identifier-> ^(OP_CROSSBUILD[$lc] varnode identifier)
;
goto_stmt
: lc=KEY_GOTO jumpdest -> ^(OP_GOTO[$lc] jumpdest)
;
jumpdest
: identifier -> ^(OP_JUMPDEST_SYMBOL identifier)
| LBRACKET expr RBRACKET -> ^(OP_JUMPDEST_DYNAMIC expr)
| integer -> ^(OP_JUMPDEST_ABSOLUTE integer)
| constant LBRACKET identifier RBRACKET -> ^(OP_JUMPDEST_RELATIVE constant identifier)
| label -> ^(OP_JUMPDEST_LABEL label)
;
cond_stmt
: lc=RES_IF expr goto_stmt -> ^(OP_IF[$lc] expr goto_stmt)
;
call_stmt
: lc=KEY_CALL jumpdest -> ^(OP_CALL[$lc] jumpdest)
;
return_stmt
: lc=KEY_RETURN LBRACKET expr RBRACKET -> ^(OP_RETURN[$lc] expr)
;
sizedexport
: sizedstar^ identifier
;
export
: lc=KEY_EXPORT sizedexport -> ^(OP_EXPORT[$lc] sizedexport)
| lc=KEY_EXPORT varnode -> ^(OP_EXPORT[$lc] varnode)
;
expr
: expr_boolor
;
expr_boolor
: expr_booland ( expr_boolor_op^ expr_booland )*
;
expr_boolor_op
: lc=BOOL_OR -> ^(OP_BOOL_OR[$lc])
;
expr_booland
: expr_or ( booland_op^ expr_or )*
;
booland_op
: lc=BOOL_AND -> ^(OP_BOOL_AND[$lc])
| lc=BOOL_XOR -> ^(OP_BOOL_XOR[$lc])
;
expr_or
: expr_xor ( expr_or_op^ expr_xor )*
;
expr_or_op
: lc=PIPE -> ^(OP_OR[$lc])
;
expr_xor
: expr_and ( expr_xor_op^ expr_and )*
;
expr_xor_op
: lc=CARET -> ^(OP_XOR[$lc])
;
expr_and
: expr_eq ( expr_and_op^ expr_eq )*
;
expr_and_op
: lc=AMPERSAND -> ^(OP_AND[$lc])
;
expr_eq
: expr_comp ( eq_op^ expr_comp )*
;
eq_op
: lc=EQUAL -> ^(OP_EQUAL[$lc])
| lc=NOTEQUAL -> ^(OP_NOTEQUAL[$lc])
| lc=FEQUAL -> ^(OP_FEQUAL[$lc])
| lc=FNOTEQUAL -> ^(OP_FNOTEQUAL[$lc])
;
expr_comp
: expr_shift ( compare_op^ expr_shift )*
;
compare_op
: lc=LESS -> ^(OP_LESS[$lc])
| lc=GREATEQUAL -> ^(OP_GREATEQUAL[$lc])
| lc=LESSEQUAL -> ^(OP_LESSEQUAL[$lc])
| lc=GREAT -> ^(OP_GREAT[$lc])
| lc=SLESS -> ^(OP_SLESS[$lc])
| lc=SGREATEQUAL -> ^(OP_SGREATEQUAL[$lc])
| lc=SLESSEQUAL -> ^(OP_SLESSEQUAL[$lc])
| lc=SGREAT -> ^(OP_SGREAT[$lc])
| lc=FLESS -> ^(OP_FLESS[$lc])
| lc=FGREATEQUAL -> ^(OP_FGREATEQUAL[$lc])
| lc=FLESSEQUAL -> ^(OP_FLESSEQUAL[$lc])
| lc=FGREAT -> ^(OP_FGREAT[$lc])
;
expr_shift
: expr_add ( shift_op^ expr_add )*
;
shift_op
: lc=LEFT -> ^(OP_LEFT[$lc])
| lc=RIGHT -> ^(OP_RIGHT[$lc])
| lc=SRIGHT -> ^(OP_SRIGHT[$lc])
;
expr_add
: expr_mult ( add_op^ expr_mult )*
;
add_op
: lc=PLUS -> ^(OP_ADD[$lc])
| lc=MINUS -> ^(OP_SUB[$lc])
| lc=FPLUS -> ^(OP_FADD[$lc])
| lc=FMINUS -> ^(OP_FSUB[$lc])
;
expr_mult
: expr_unary ( mult_op^ expr_unary )*
;
mult_op
: lc=ASTERISK -> ^(OP_MULT[$lc])
| lc=SLASH -> ^(OP_DIV[$lc])
| lc=PERCENT -> ^(OP_REM[$lc])
| lc=SDIV -> ^(OP_SDIV[$lc])
| lc=SREM -> ^(OP_SREM[$lc])
| lc=FMULT -> ^(OP_FMULT[$lc])
| lc=FDIV -> ^(OP_FDIV[$lc])
;
expr_unary
: unary_op^ ? expr_func
;
unary_op
: lc=EXCLAIM -> ^(OP_NOT[$lc])
| lc=TILDE -> ^(OP_INVERT[$lc])
| lc=MINUS -> ^(OP_NEGATE[$lc])
| lc=FMINUS -> ^(OP_FNEGATE[$lc])
| sizedstar
;
expr_func
: expr_apply
| expr_term
;
expr_apply
: identifier expr_operands -> ^(OP_APPLY identifier expr_operands?)
;
expr_operands
: LPAREN! (expr (COMMA! expr)* )? RPAREN!
;
expr_term
: varnode
| sembitrange
| lc=LPAREN expr RPAREN -> ^(OP_PARENTHESIZED[$lc, "(...)"] expr)
;
varnode
: integer
| identifier
| integer lc=COLON constant -> ^(OP_TRUNCATION_SIZE[$lc] integer constant)
| identifier lc=COLON constant -> ^(OP_BITRANGE2[$lc] identifier constant)
| lc=AMPERSAND fp=COLON constant varnode -> ^(OP_ADDRESS_OF[$lc] ^(OP_SIZING_SIZE[$fp] constant) varnode)
| lc=AMPERSAND varnode -> ^(OP_ADDRESS_OF[$lc] varnode)
;
constant
: integer
;
integer
: lc=HEX_INT -> ^(OP_HEX_CONSTANT[$lc, "HEX_INT"] HEX_INT)
| lc=DEC_INT -> ^(OP_DEC_CONSTANT[$lc, "DEC_INT"] DEC_INT)
| lc=BIN_INT -> ^(OP_BIN_CONSTANT[$lc, "BIN_INT"] BIN_INT)
;

View file

@ -0,0 +1,497 @@
tree grammar SleighEcho;
options {
ASTLabelType=CommonTree;
tokenVocab=SleighLexer;
}
@header {
import java.io.PrintStream;
import org.antlr.runtime.*;
import org.antlr.runtime.tree.*;
}
@members {
public PrintStream out = System.out;
void ot(String s) {
out.print(s);
}
void out(String s) {
out.println(s);
}
}
root
: endiandef
( definition
| constructorlike
)*
;
endiandef
: ^(OP_ENDIAN s=endian) { out("define endian=" + $s.text + ";"); }
;
endian
: OP_BIG
| OP_LITTLE
;
definition
: (aligndef
| tokendef
| contextdef
| spacedef
| varnodedef
| bitrangedef
| pcodeopdef
| valueattach
| nameattach
| varattach
)
;
aligndef
: ^(OP_ALIGNMENT i=integer) { out("define alignment=" + $i.value + ";"); }
;
tokendef
: ^(OP_TOKEN n=identifier i=integer { out("define token " + $n.value + "(" + $i.value + ")"); } fielddefs)
;
fielddefs
: ^(OP_FIELDDEFS fielddef*) { out(";"); }
;
fielddef
: ^(OP_FIELDDEF n=identifier s=integer e=integer f=fieldmods) { out(" " + $n.value + " = (" + $s.value + "," + $e.value + ")" + $f.value); }
;
fieldmods returns [String value]
: ^(OP_FIELD_MODS { $value = ""; } (n=fieldmod { $value += " " + $n.value; } )+ )
| OP_NO_FIELD_MOD { $value = ""; }
;
fieldmod returns [String value]
: OP_SIGNED { $value = "signed"; }
| OP_NOFLOW { $value = "noflow"; }
| OP_HEX { $value = "hex"; }
| OP_DEC { $value = "dec"; }
;
contextdef
: ^(OP_CONTEXT n=identifier { out("define context " + $n.value); } fielddefs)
;
spacedef
: ^(OP_SPACE n=identifier s=spacemods) { out("define space " + $n.value + $s.value + ";"); }
;
spacemods returns [String value]
@init { $value = ""; }
: ^(OP_SPACEMODS (s=spacemod { $value += " " + $s.value; })*)
;
spacemod returns [String value]
: t=typemod { $value = $t.value; }
| s=sizemod { $value = $s.value; }
| w=wordsizemod { $value = $w.value; }
| OP_DEFAULT { $value = "default"; }
;
typemod returns [String value]
: ^(OP_TYPE n=type) { $value = "type=" + $n.value; }
;
type returns [String value]
: n=identifier { $value = $n.value; }
;
sizemod returns [String value]
: ^(OP_SIZE i=integer) { $value = "size=" + $i.value; }
;
wordsizemod returns [String value]
: ^(OP_WORDSIZE i=integer) { $value = "wordsize=" + $i.value; }
;
varnodedef
: ^(OP_VARNODE n=identifier offset=integer size=integer l=identifierlist) { out("define " + $n.value + " offset=" + $offset.value + " size=" + $size.value + " " + $l.value + ";"); }
;
identifierlist returns [String value]
: ^(OP_IDENTIFIER_LIST { $value = "["; } (n=identifier { $value += " " + $n.value; } )+) { $value += " ]"; }
;
stringoridentlist returns [String value]
: ^(OP_STRING_OR_IDENT_LIST { $value = "["; } (n=stringorident { $value += " " + $n.value; } )+) { $value += " ]"; }
;
stringorident returns [String value]
: n=identifier { $value = $n.value; }
| s=qstring { $value = $s.value; }
;
bitrangedef
: ^(OP_BITRANGES { ot("define bitrange "); } bitranges)
;
bitranges
@init { String sp = ""; }
: (s=sbitrange { out(sp + s); sp = " "; })+
;
sbitrange returns [String value]
: ^(OP_BITRANGE a=identifier b=identifier i=integer j=integer) { $value = $a.value + " = " + $b.value + " [" + $i.value + "," + $j.value + "]"; }
;
pcodeopdef
: ^(OP_PCODEOP l=identifierlist) { out("define pcodeop " + $l.value + ";"); }
;
valueattach
: ^(OP_VALUES a=identifierlist b=intblist) { out("attach values " + $a.value + " " + $b.value + ";"); }
;
intblist returns [String value]
: ^(OP_INTBLIST { $value = "["; } (n=intbpart { $value += " " + $n.value; } )+) { $value += " ]"; }
;
intbpart returns [String value]
: OP_WILDCARD { $value = "_"; }
| ^(OP_NEGATE i=integer) { $value = "-" + $i.value; }
| i=integer { $value = $i.value; }
;
nameattach
: ^(OP_NAMES a=identifierlist b=stringoridentlist) { out("attach names " + $a.value + " " + $b.value + ";"); }
;
varattach
: ^(OP_VARIABLES a=identifierlist b=identifierlist) { out("attach variables " + $a.value + " " + $b.value + ";"); }
;
constructorlike
: macrodef
| constructor
;
macrodef
: ^(OP_MACRO n=identifier a=arguments { out("macro " + $n.value + "(" + $a.value + ")" ); } semantic)
;
arguments returns [String value]
: ^(OP_ARGUMENTS l=oplist) { $value = $l.value; }
| OP_EMPTY_LIST { $value = ""; }
;
oplist returns [String value]
@init { String comma = ""; $value = ""; }
: (n=identifier { $value += comma + $n.value; comma = ","; })+
;
constructor
: ^(OP_CONSTRUCTOR c=ctorstart b=bitpattern { ot($c.value + "is " + $b.value + " "); } contextblock ctorsemantic)
;
ctorsemantic
: ^(OP_PCODE semantic)
| ^(OP_PCODE OP_UNIMPL) { out(" unimpl"); }
;
bitpattern returns [String value]
: ^(OP_BIT_PATTERN p=pequation) { $value = $p.value; }
;
ctorstart returns [String value]
: ^(OP_SUBTABLE i=identifier d=display) { $value = $i.value + ":" + $d.value; }
| ^(OP_TABLE d=display) { $value = ":" + $d.value; }
;
display returns [String value]
: ^(OP_DISPLAY p=pieces) { $value = $p.value; }
;
pieces returns [String value]
@init { $value = ""; }
: (p=printpiece { $value += $p.value; })*
;
printpiece returns [String value]
: i=identifier { $value = $i.value; }
| w=whitespace { $value = $w.value; }
| OP_CONCATENATE { $value = "^"; }
| s=string { $value = $s.value; }
;
whitespace returns [String value]
: ^(OP_WHITESPACE s=.) { $value = $s.getText(); }
;
string returns [String value]
: ^(OP_STRING s=.) { $value = $s.getText(); }
| ^(OP_QSTRING s=.) { $value = "\"" + $s.getText() + "\""; }
;
pequation returns [String value]
: ^(OP_BOOL_OR l=pequation r=pequation) { $value = $l.value + " | " + $r.value; }
| ^(OP_SEQUENCE l=pequation r=pequation) { $value = $l.value + " ; " + $r.value; }
| ^(OP_BOOL_AND l=pequation r=pequation) { $value = $l.value + " & " + $r.value; }
| ^(OP_ELLIPSIS l=pequation) { $value = "... " + $l.value; }
| ^(OP_ELLIPSIS_RIGHT l=pequation) { $value = $l.value + " ..."; }
| ^(OP_EQUAL n=identifier x=pexpression2) { $value = $n.value + " = " + $x.value; }
| ^(OP_NOTEQUAL n=identifier x=pexpression2) { $value = $n.value + " != " + $x.value; }
| ^(OP_LESS n=identifier x=pexpression2) { $value = $n.value + " < " + $x.value; }
| ^(OP_LESSEQUAL n=identifier x=pexpression2) { $value = $n.value + " <= " + $x.value; }
| ^(OP_GREAT n=identifier x=pexpression2) { $value = $n.value + " > " + $x.value; }
| ^(OP_GREATEQUAL n=identifier x=pexpression2) { $value = $n.value + " >= " + $x.value; }
| n=identifier { $value = $n.value; }
| ^(OP_PARENTHESIZED l=pequation) { $value = "(" + $l.value + ")"; }
;
pexpression2 returns [String value]
: ^(OP_OR l=pexpression2 r=pexpression2) { $value = $l.value + " \$or " + $r.value; }
| ^(OP_XOR l=pexpression2 r=pexpression2) { $value = $l.value + " \$xor " + $r.value; }
| ^(OP_AND l=pexpression2 r=pexpression2) { $value = $l.value + " \$and " + $r.value; }
| ^(OP_LEFT l=pexpression2 r=pexpression2) { $value = $l.value + " << " + $r.value; }
| ^(OP_RIGHT l=pexpression2 r=pexpression2) { $value = $l.value + " >> " + $r.value; }
| ^(OP_ADD l=pexpression2 r=pexpression2) { $value = $l.value + " + " + $r.value; }
| ^(OP_SUB l=pexpression2 r=pexpression2) { $value = $l.value + " - " + $r.value; }
| ^(OP_MULT l=pexpression2 r=pexpression2) { $value = $l.value + " * " + $r.value; }
| ^(OP_DIV l=pexpression2 r=pexpression2) { $value = $l.value + " / " + $r.value; }
| ^(OP_NEGATE l=pexpression2) { $value = "-" + $l.value; }
| ^(OP_INVERT l=pexpression2) { $value = "~" + $l.value; }
| ^(OP_APPLY n=identifier o=pexpression2_operands) { $value = $n.value + "(" + $o.value + ")"; }
| n=identifier { $value = $n.value; }
| i=integer { $value = $i.value; }
| ^(OP_PARENTHESIZED l=pexpression2) { $value = "(" + $l.value + ")"; }
;
pexpression2_operands returns [String value]
@init { String comma = ""; $value = ""; }
: (e=pexpression2 { $value += comma + $e.value; comma = ","; })*
;
contextblock
: ^(OP_CONTEXT_BLOCK { ot("[ "); } statements { ot(" ]"); })
| OP_NO_CONTEXT_BLOCK
;
semantic
: ^(OP_SEMANTIC { out("{"); } code_block { out("}"); } )
;
code_block
: statements
| OP_NOP
;
statements
: ( { ot(" "); } statement)*
;
label returns [String value]
: ^(OP_LABEL n=variable) { $value = "<" + $n.value + ">"; }
;
section_label returns [String value]
: ^(OP_SECTION_LABEL n=variable) { $value = "<<" + $n.value + ">>"; }
;
statement
: assignment
| declaration
| funcall
| build_stmt
| crossbuild_stmt
| goto_stmt
| cond_stmt
| call_stmt
| export
| return_stmt
| l=label { out($l.value); }
| s=section_label { out($s.value); }
;
assignment
: ^(OP_ASSIGN l=lvalue e=expr) { out($l.value + " = " + $e.value + ";"); }
| ^(OP_LOCAL OP_ASSIGN l=lvalue e=expr) { out("local " + $l.value + " = " + $e.value + ";"); }
;
declaration
: ^(OP_LOCAL v=variable a=constant) { out("local " + $v.value + ":" + $a.value + ";"); }
| ^(OP_LOCAL v=variable) { out("local " + $v.value + ";"); }
;
lvalue returns [String value]
: b=bitrange { $value = $b.value; }
| ^(OP_DECLARATIVE_SIZE v=variable c=constant) { $value = $v.value + ":" + $c.value; }
| v=variable { $value = $v.value; }
| s=sizedstar { $value = $s.value; }
;
bitrange returns [String value]
: ^(OP_BITRANGE v=variable a=constant b=constant) { $value = $v.value + "[" + $a.value + "," + $b.value + "]"; }
;
sizedstar returns [String value]
: ^(OP_DEREFERENCE v=variable c=constant e=expr) { $value = "*[" + $v.value + "]:" + $c.value + " " + $e.value; }
| ^(OP_DEREFERENCE v=variable e=expr) { $value = "*[" + $v.value + "] " + $e.value; }
| ^(OP_DEREFERENCE c=constant e=expr) { $value = "*:" + $c.value + " " + $e.value; }
| ^(OP_DEREFERENCE e=expr) { $value = "* " + $e.value; }
;
funcall
: e=expr_apply { out($e.value + ";"); }
;
build_stmt
: ^(OP_BUILD v=variable) { out("build " + $v.value + ";"); }
;
crossbuild_stmt
: ^(OP_CROSSBUILD v=varnode n=variable) { out("crossbuild " + $v.value + ", " + $n.value + ";"); }
;
goto_stmt
: ^(OP_GOTO j=jumpdest) { out("goto " + $j.value + ";"); }
;
jumpdest returns [String value]
: ^(OP_JUMPDEST_SYMBOL v=variable) { $value = $v.value; }
| ^(OP_JUMPDEST_DYNAMIC e=expr) { $value = "[" + $e.value + "]"; }
| ^(OP_JUMPDEST_ABSOLUTE i=integer) { $value = $i.value; }
| ^(OP_JUMPDEST_RELATIVE c=constant v=variable) { $value = $c.value + "[" + $v.value + "]"; }
| ^(OP_JUMPDEST_LABEL l=label) { $value = $l.value; }
;
cond_stmt
: ^(OP_IF e=expr { ot("if (" + $e.value + ") "); } goto_stmt)
;
call_stmt
: ^(OP_CALL j=jumpdest) { out("call " + $j.value + ";"); }
;
return_stmt
: ^(OP_RETURN e=expr) { out("return [" + $e.value + "];"); }
| OP_RETURN { out("return;"); }
;
export
: ^(OP_EXPORT e=expr) { out("export " + $e.value + ";"); }
;
expr returns [String value]
: ^(OP_BOOL_OR l=expr r=expr) { $value = $l.value + " || " + $r.value; }
| ^(OP_BOOL_XOR l=expr r=expr) { $value = $l.value + " ^^ " + $r.value; }
| ^(OP_BOOL_AND l=expr r=expr) { $value = $l.value + " && " + $r.value; }
| ^(OP_OR l=expr r=expr) { $value = $l.value + " | " + $r.value; }
| ^(OP_XOR l=expr r=expr) { $value = $l.value + " ^ " + $r.value; }
| ^(OP_AND l=expr r=expr) { $value = $l.value + " & " + $r.value; }
| ^(OP_EQUAL l=expr r=expr) { $value = $l.value + " == " + $r.value; }
| ^(OP_NOTEQUAL l=expr r=expr) { $value = $l.value + " != " + $r.value; }
| ^(OP_FEQUAL l=expr r=expr) { $value = $l.value + " f== " + $r.value; }
| ^(OP_FNOTEQUAL l=expr r=expr) { $value = $l.value + " f!= " + $r.value; }
| ^(OP_LESS l=expr r=expr) { $value = $l.value + " < " + $r.value; }
| ^(OP_GREATEQUAL l=expr r=expr) { $value = $l.value + " >= " + $r.value; }
| ^(OP_LESSEQUAL l=expr r=expr) { $value = $l.value + " <= " + $r.value; }
| ^(OP_GREAT l=expr r=expr) { $value = $l.value + " > " + $r.value; }
| ^(OP_SLESS l=expr r=expr) { $value = $l.value + " s< " + $r.value; }
| ^(OP_SGREATEQUAL l=expr r=expr) { $value = $l.value + " s>= " + $r.value; }
| ^(OP_SLESSEQUAL l=expr r=expr) { $value = $l.value + " s<= " + $r.value; }
| ^(OP_SGREAT l=expr r=expr) { $value = $l.value + " s> " + $r.value; }
| ^(OP_FLESS l=expr r=expr) { $value = $l.value + " f< " + $r.value; }
| ^(OP_FGREATEQUAL l=expr r=expr) { $value = $l.value + " f>= " + $r.value; }
| ^(OP_FLESSEQUAL l=expr r=expr) { $value = $l.value + " f<= " + $r.value; }
| ^(OP_FGREAT l=expr r=expr) { $value = $l.value + " f> " + $r.value; }
| ^(OP_LEFT l=expr r=expr) { $value = $l.value + " << " + $r.value; }
| ^(OP_RIGHT l=expr r=expr) { $value = $l.value + " >> " + $r.value; }
| ^(OP_SRIGHT l=expr r=expr) { $value = $l.value + " s>> " + $r.value; }
| ^(OP_ADD l=expr r=expr) { $value = $l.value + " + " + $r.value; }
| ^(OP_SUB l=expr r=expr) { $value = $l.value + " - " + $r.value; }
| ^(OP_FADD l=expr r=expr) { $value = $l.value + " f+ " + $r.value; }
| ^(OP_FSUB l=expr r=expr) { $value = $l.value + " f- " + $r.value; }
| ^(OP_MULT l=expr r=expr) { $value = $l.value + " * " + $r.value; }
| ^(OP_DIV l=expr r=expr) { $value = $l.value + " / " + $r.value; }
| ^(OP_REM l=expr r=expr) { $value = $l.value + " \% " + $r.value; }
| ^(OP_SDIV l=expr r=expr) { $value = $l.value + " s/ " + $r.value; }
| ^(OP_SREM l=expr r=expr) { $value = $l.value + " s\% " + $r.value; }
| ^(OP_FMULT l=expr r=expr) { $value = $l.value + " f* " + $r.value; }
| ^(OP_FDIV l=expr r=expr) { $value = $l.value + " f/ " + $r.value; }
| ^(OP_NOT l=expr) { $value = "!" + $l.value; }
| ^(OP_INVERT l=expr) { $value = "~" + $l.value; }
| ^(OP_NEGATE l=expr) { $value = "-" + $l.value; }
| ^(OP_FNEGATE l=expr) { $value = "f- " + $l.value; }
| s=sizedstar { $value = $s.value; }
| a=expr_apply { $value = $a.value; }
| v=varnode { $value = $v.value; }
| b=bitrange { $value = $b.value; }
| ^(OP_PARENTHESIZED l=expr) { $value = "(" + $l.value + ")"; }
| ^(OP_BITRANGE2 n=identifier i=integer) { $value = $n.value + ":" + $i.value; }
;
expr_apply returns [String value]
: ^(OP_APPLY n=identifier o=expr_operands) { $value = $n.value + "(" + $o.value + ")"; }
;
expr_operands returns [String value]
@init {
String comma = "";
$value = "";
}
: (e=expr { $value += comma + $e.value; comma = ","; })*
;
varnode returns [String value]
: s=symbol { $value = $s.value; }
| ^(OP_TRUNCATION_SIZE s=symbol c=constant) { $value = $s.value + ":" + $c.value; }
| ^(OP_ADDRESS_OF ^(OP_SIZING_SIZE c=constant) v=varnode) { $value = "&:" + $c.value + " " + $v.value; }
| ^(OP_ADDRESS_OF v=varnode) { $value = "&" + " " + $v.value; }
;
symbol returns [String value]
: n=identifier {$value = $n.value; }
| i=integer { $value = $i.value; }
;
variable returns [String value]
: n=identifier { $value = $n.value; }
;
constant returns [String value]
: i=integer { $value = $i.value; }
;
qstring returns [String value]
: ^(OP_QSTRING s=.) { $value = "\"" + $s.getText() + "\""; }
;
identifier returns [String value]
: ^(OP_IDENTIFIER s=.) { $value = $s.getText(); }
| OP_WILDCARD { $value = "_"; }
;
integer returns [String value]
: ^(OP_HEX_CONSTANT s=.) { $value = $s.getText(); }
| ^(OP_DEC_CONSTANT s=.) { $value = $s.getText(); }
| ^(OP_BIN_CONSTANT s=.) { $value = $s.getText(); }
;

View file

@ -0,0 +1,20 @@
lexer grammar SleighLexer;
options {
superClass = AbstractSleighLexer;
}
import BaseLexer, DisplayLexer, SemanticLexer;
/**
* See README.txt (near the bottom) for more information.
*
* This grammar exists solely to tease a combined token vocabulary from all of the lexers. The
* resulting vocabulary is used by all of the other grammars (parsers and lexers) except for
* BooleanExpression.
*
* The output of this grammar (except for the .tokens file) is discarded by build.gradle.
*/
// A dummy rule. It reuses a name to avoid adding an unnecessary name to the vocabulary.
UNKNOWN: '*****************************************';

View file

@ -0,0 +1,584 @@
parser grammar SleighParser;
options {
output = AST;
ASTLabelType = CommonTree;
tokenVocab = SleighLexer;
superClass = AbstractSleighParser;
}
import DisplayParser, SemanticParser;
@members {
@Override
public void setLexer(SleighLexer lexer) {
super.setLexer(lexer);
gDisplayParser.setLexer(lexer);
gSemanticParser.setLexer(lexer);
}
}
/**
* This is the root parser for a .slaspec file. Its root rule is spec.
*/
spec
@after {
if (env.getParsingErrors() > 0) {
bail("Abort");
}
}
: {
if (env.getLexingErrors() > 0) {
bail("Abort");
}
}
endiandef
( definition
| constructorlike
)* EOF
;
endiandef
: lc=KEY_DEFINE KEY_ENDIAN ASSIGN endian SEMI -> ^(OP_ENDIAN[$lc,"define endian"] endian)
;
endian
: lc=KEY_BIG -> OP_BIG[$lc]
| lc=KEY_LITTLE -> OP_LITTLE[$lc]
;
definition
: (aligndef
| tokendef
| contextdef
| spacedef
| varnodedef
| bitrangedef
| pcodeopdef
| valueattach
| nameattach
| varattach) SEMI!
;
aligndef
: lc=KEY_DEFINE KEY_ALIGNMENT ASSIGN integer -> ^(OP_ALIGNMENT[$lc, "define alignment"] integer)
;
tokendef
: lc=KEY_DEFINE KEY_TOKEN identifier LPAREN integer rp=RPAREN fielddefs[$rp] -> ^(OP_TOKEN[$lc, "define token"] identifier integer fielddefs)
;
fielddefs[Token lc]
: fielddef* -> ^(OP_FIELDDEFS[lc, "field definitions"] fielddef*)
;
fielddef
: strict_id lc=ASSIGN LPAREN s=integer COMMA e=integer rp=RPAREN fieldmods[$rp] -> ^(OP_FIELDDEF[$lc, "field definition"] strict_id $s $e fieldmods)
;
fieldmods[Token it]
: fieldmod+ -> ^(OP_FIELD_MODS[it, "field modifiers"] fieldmod+)
| -> OP_NO_FIELD_MOD[it, "<no field mod>"]
;
fieldmod
: lc=KEY_SIGNED -> OP_SIGNED[$lc]
| lc=KEY_HEX -> OP_HEX[$lc]
| lc=KEY_DEC -> OP_DEC[$lc]
;
contextfielddefs[Token lc]
: contextfielddef* -> ^(OP_FIELDDEFS[lc, "field definitions"] contextfielddef*)
;
contextfielddef
: identifier lc=ASSIGN LPAREN s=integer COMMA e=integer rp=RPAREN contextfieldmods[$rp] -> ^(OP_FIELDDEF[$lc, "field definition"] identifier $s $e contextfieldmods)
;
contextfieldmods[Token it]
: contextfieldmod+ -> ^(OP_FIELD_MODS[it, "context field modifiers"] contextfieldmod+)
| -> OP_NO_FIELD_MOD[it, "<no field mod>"]
;
contextfieldmod
: lc=KEY_SIGNED -> OP_SIGNED[$lc]
| lc=KEY_NOFLOW -> OP_NOFLOW[$lc]
| lc=KEY_HEX -> OP_HEX[$lc]
| lc=KEY_DEC -> OP_DEC[$lc]
;
contextdef
: lc=KEY_DEFINE rp=KEY_CONTEXT identifier contextfielddefs[$rp] -> ^(OP_CONTEXT[$lc, "define context"] identifier contextfielddefs)
;
spacedef
: lc=KEY_DEFINE KEY_SPACE identifier spacemods[$lc] -> ^(OP_SPACE[$lc, "define space"] identifier spacemods)
;
spacemods[Token lc]
: spacemod* -> ^(OP_SPACEMODS[$lc, "space modifier"] spacemod*)
;
spacemod
: typemod
| sizemod
| wordsizemod
| lc=KEY_DEFAULT -> OP_DEFAULT[$lc]
;
typemod
: lc=KEY_TYPE ASSIGN type -> ^(OP_TYPE[$lc] type)
;
type
: identifier
;
sizemod
: lc=KEY_SIZE ASSIGN integer -> ^(OP_SIZE[$lc] integer)
;
wordsizemod
: lc=KEY_WORDSIZE ASSIGN integer -> ^(OP_WORDSIZE[$lc] integer)
;
varnodedef
: lc=KEY_DEFINE identifier KEY_OFFSET ASSIGN offset=integer KEY_SIZE rb=ASSIGN size=integer identifierlist[$rb]
-> ^(OP_VARNODE[$lc, "define varnode"] identifier $offset $size identifierlist)
;
bitrangedef
: lc=KEY_DEFINE KEY_BITRANGE bitranges -> ^(OP_BITRANGES[$lc, "define bitrange"] bitranges)
;
bitranges
: bitrange+
;
bitrange
: a=identifier lc=ASSIGN b=identifier LBRACKET i=integer COMMA j=integer RBRACKET -> ^(OP_BITRANGE[$lc, "bitrange definition"] $a $b $i $j)
;
pcodeopdef
: lc=KEY_DEFINE rb=KEY_PCODEOP identifierlist[$rb] -> ^(OP_PCODEOP[$lc, "define pcodeop"] identifierlist)
;
valueattach
: lc=KEY_ATTACH rp=KEY_VALUES identifierlist[$rp] intblist[$rp] -> ^(OP_VALUES[$lc, "attach values"] identifierlist intblist)
;
nameattach
: lc=KEY_ATTACH rp=KEY_NAMES a=identifierlist[$rp] b=stringoridentlist[$rp] -> ^(OP_NAMES[$lc, "attach names"] $a $b)
;
varattach
: lc=KEY_ATTACH rp=KEY_VARIABLES a=identifierlist[$rp] b=identifierlist[$rp] -> ^(OP_VARIABLES[$lc, "attach variables"] $a $b)
;
identifierlist[Token lc]
: LBRACKET id_or_wild+ RBRACKET -> ^(OP_IDENTIFIER_LIST[$lc, "identifier list"] id_or_wild+)
| id_or_wild -> ^(OP_IDENTIFIER_LIST[$lc, "identifier list"] id_or_wild)
;
stringoridentlist[Token lc]
: LBRACKET stringorident+ RBRACKET -> ^(OP_STRING_OR_IDENT_LIST[$lc, "string or identifier list"] stringorident+)
| stringorident -> ^(OP_STRING_OR_IDENT_LIST[$lc, "string or identifier list"] stringorident)
;
stringorident
: id_or_wild
| qstring
;
intblist[Token lc]
: LBRACKET intbpart+ RBRACKET -> ^(OP_INTBLIST[$lc, "integer or wildcard list"] intbpart+)
| neginteger -> ^(OP_INTBLIST[$lc, "integer or wildcard list"] neginteger)
;
intbpart
: neginteger
| lc=UNDERSCORE -> OP_WILDCARD[$lc]
;
neginteger
: integer
| lc=MINUS integer -> ^(OP_NEGATE[$lc] integer)
;
constructorlike
: macrodef
| withblock
| constructor
;
macrodef
: lc=KEY_MACRO identifier lp=LPAREN arguments[$lp] RPAREN semanticbody -> ^(OP_MACRO[$lc, "macro"] identifier arguments semanticbody)
;
arguments[Token lc]
: oplist -> ^(OP_ARGUMENTS[$lc, "arguments"] oplist)
| -> ^(OP_EMPTY_LIST[$lc, "no arguments"])
;
oplist
: identifier (COMMA! identifier)*
;
withblock
: lc=RES_WITH id_or_nil COLON bitpat_or_nil contextblock LBRACE constructorlikelist RBRACE
-> ^(OP_WITH[$lc, "with"] id_or_nil bitpat_or_nil contextblock constructorlikelist)
;
id_or_nil
: identifier
| -> ^(OP_NIL)
;
bitpat_or_nil
: bitpattern
| -> ^(OP_NIL)
;
def_or_conslike
: definition
| constructorlike
;
constructorlikelist
: def_or_conslike* -> ^(OP_CTLIST def_or_conslike* )
;
constructor
: ctorstart bitpattern contextblock ctorsemantic -> ^(OP_CONSTRUCTOR ctorstart bitpattern contextblock ctorsemantic)
;
ctorsemantic
: semanticbody -> ^(OP_PCODE semanticbody)
| lc=KEY_UNIMPL -> ^(OP_PCODE[$lc] OP_UNIMPL[$lc])
;
bitpattern
: pequation -> ^(OP_BIT_PATTERN pequation)
;
ctorstart
: identifier display -> ^(OP_SUBTABLE identifier display)
| display -> ^(OP_TABLE display)
;
contextblock
: lc=LBRACKET ctxstmts RBRACKET -> ^(OP_CONTEXT_BLOCK[$lc, "[...]"] ctxstmts)
| -> ^(OP_NO_CONTEXT_BLOCK)
;
ctxstmts
: ctxstmt*
;
ctxstmt
: ctxassign SEMI!
| pfuncall SEMI!
;
ctxassign
: ctxlval lc=ASSIGN pexpression -> ^(OP_ASSIGN[$lc] ctxlval pexpression)
;
ctxlval
: identifier
;
pfuncall
: pexpression_apply
;
pequation
: pequation_or
;
pequation_or
: pequation_seq ( pequation_or_op^ pequation_seq )*
;
pequation_or_op
: lc=PIPE -> ^(OP_BOOL_OR[$lc])
;
pequation_seq
: pequation_and ( pequation_seq_op^ pequation_and )*
;
pequation_seq_op
: lc=SEMI -> ^(OP_SEQUENCE[$lc])
;
pequation_and
: pequation_ellipsis ( pequation_and_op^ pequation_ellipsis )*
;
pequation_and_op
: lc=AMPERSAND -> ^(OP_BOOL_AND[$lc])
;
pequation_ellipsis
: lc=ELLIPSIS pequation_ellipsis_right -> ^(OP_ELLIPSIS[$lc] pequation_ellipsis_right)
| pequation_ellipsis_right
;
pequation_ellipsis_right
: (pequation_atomic ELLIPSIS)=> pequation_atomic lc=ELLIPSIS -> ^(OP_ELLIPSIS_RIGHT[$lc] pequation_atomic)
| pequation_atomic
;
pequation_atomic
: constraint
| lc=LPAREN pequation RPAREN -> ^(OP_PARENTHESIZED[$lc,"(...)"] pequation)
;
constraint
: identifier (constraint_op^ pexpression2)?
;
constraint_op
: lc=ASSIGN -> ^(OP_EQUAL[$lc])
| lc=NOTEQUAL -> ^(OP_NOTEQUAL[$lc])
| lc=LESS -> ^(OP_LESS[$lc])
| lc=LESSEQUAL -> ^(OP_LESSEQUAL[$lc])
| lc=GREAT -> ^(OP_GREAT[$lc])
| lc=GREATEQUAL -> ^(OP_GREATEQUAL[$lc])
;
pexpression
: pexpression_or
;
pexpression_or
: pexpression_xor (pexpression_or_op^ pexpression_xor)*
;
pexpression_or_op
: lc=PIPE -> ^(OP_OR[$lc])
| lc=SPEC_OR -> ^(OP_OR[$lc])
;
pexpression_xor
: pexpression_and (pexpression_xor_op^ pexpression_and)*
;
pexpression_xor_op
: lc=CARET -> ^(OP_XOR[$lc])
| lc=SPEC_XOR -> ^(OP_XOR[$lc])
;
pexpression_and
: pexpression_shift (pexpression_and_op^ pexpression_shift)*
;
pexpression_and_op
: lc=AMPERSAND -> ^(OP_AND[$lc])
| lc=SPEC_AND -> ^(OP_AND[$lc])
;
pexpression_shift
: pexpression_add (pexpression_shift_op^ pexpression_add)*
;
pexpression_shift_op
: lc=LEFT -> ^(OP_LEFT[$lc])
| lc=RIGHT -> ^(OP_RIGHT[$lc])
;
pexpression_add
: pexpression_mult (pexpression_add_op^ pexpression_mult)*
;
pexpression_add_op
: lc=PLUS -> ^(OP_ADD[$lc])
| lc=MINUS -> ^(OP_SUB[$lc])
;
pexpression_mult
: pexpression_unary (pexpression_mult_op^ pexpression_unary)*
;
pexpression_mult_op
: lc=ASTERISK -> ^(OP_MULT[$lc])
| lc=SLASH -> ^(OP_DIV[$lc])
;
pexpression_unary
: pexpression_unary_op^ pexpression_term
| pexpression_func
;
pexpression_unary_op
: lc=MINUS -> ^(OP_NEGATE[$lc])
| lc=TILDE -> ^(OP_INVERT[$lc])
;
pexpression_func
: pexpression_apply
| pexpression_term
;
pexpression_apply
: identifier pexpression_operands -> ^(OP_APPLY identifier pexpression_operands?)
;
pexpression_operands
: LPAREN! (pexpression (COMMA! pexpression)* )? RPAREN!
;
pexpression_term
: identifier
| integer
| lc=LPAREN pexpression RPAREN -> ^(OP_PARENTHESIZED[$lc, "(...)"] pexpression)
;
pexpression2
: pexpression2_or
;
pexpression2_or
: pexpression2_xor (pexpression2_or_op^ pexpression2_xor)*
;
pexpression2_or_op
: lc=SPEC_OR -> ^(OP_OR[$lc])
;
pexpression2_xor
: pexpression2_and (pexpression2_xor_op^ pexpression2_and)*
;
pexpression2_xor_op
: lc=SPEC_XOR -> ^(OP_XOR[$lc])
;
pexpression2_and
: pexpression2_shift (pexpression2_and_op^ pexpression2_shift)*
;
pexpression2_and_op
: lc=SPEC_AND -> ^(OP_AND[$lc])
;
pexpression2_shift
: pexpression2_add (pexpression2_shift_op^ pexpression2_add)*
;
pexpression2_shift_op
: lc=LEFT -> ^(OP_LEFT[$lc])
| lc=RIGHT -> ^(OP_RIGHT[$lc])
;
pexpression2_add
: pexpression2_mult (pexpression2_add_op^ pexpression2_mult)*
;
pexpression2_add_op
: lc=PLUS -> ^(OP_ADD[$lc])
| lc=MINUS -> ^(OP_SUB[$lc])
;
pexpression2_mult
: pexpression2_unary (pexpression2_mult_op^ pexpression2_unary)*
;
pexpression2_mult_op
: lc=ASTERISK -> ^(OP_MULT[$lc])
| lc=SLASH -> ^(OP_DIV[$lc])
;
pexpression2_unary
: pexpression2_unary_op^ pexpression2_term
| pexpression2_func
;
pexpression2_unary_op
: lc=MINUS -> ^(OP_NEGATE[$lc])
| lc=TILDE -> ^(OP_INVERT[$lc])
;
pexpression2_func
: pexpression2_apply
| pexpression2_term
;
pexpression2_apply
: identifier pexpression2_operands -> ^(OP_APPLY identifier pexpression2_operands?)
;
pexpression2_operands
: LPAREN! (pexpression2 (COMMA! pexpression2)* )? RPAREN!
;
pexpression2_term
: identifier
| integer
| lc=LPAREN pexpression2 RPAREN -> ^(OP_PARENTHESIZED[$lc, "(...)"] pexpression2)
;
qstring
: lc=QSTRING -> ^(OP_QSTRING[$lc, "QSTRING"] QSTRING)
;
id_or_wild
: identifier
| wildcard
;
wildcard
: lc=UNDERSCORE -> OP_WILDCARD[$lc]
;
identifier
: strict_id
| key_as_id
;
key_as_id
: lc=KEY_ALIGNMENT -> ^(OP_IDENTIFIER[$lc, "KEY_ALIGNMENT"] KEY_ALIGNMENT)
| lc=KEY_ATTACH -> ^(OP_IDENTIFIER[$lc, "KEY_ATTACH"] KEY_ATTACH)
| lc=KEY_BIG -> ^(OP_IDENTIFIER[$lc, "KEY_BIG"] KEY_BIG)
| lc=KEY_BITRANGE -> ^(OP_IDENTIFIER[$lc, "KEY_BITRANGE"] KEY_BITRANGE)
| lc=KEY_BUILD -> ^(OP_IDENTIFIER[$lc, "KEY_BUILD"] KEY_BUILD)
| lc=KEY_CALL -> ^(OP_IDENTIFIER[$lc, "KEY_CALL"] KEY_CALL) // appeared in printpiece
| lc=KEY_CONTEXT -> ^(OP_IDENTIFIER[$lc, "KEY_CONTEXT"] KEY_CONTEXT)
| lc=KEY_CROSSBUILD -> ^(OP_IDENTIFIER[$lc, "KEY_CROSSBUILD"] KEY_CROSSBUILD)
| lc=KEY_DEC -> ^(OP_IDENTIFIER[$lc, "KEY_DEC"] KEY_DEC) // appeared in printpiece
| lc=KEY_DEFAULT -> ^(OP_IDENTIFIER[$lc, "KEY_DEFAULT"] KEY_DEFAULT)
| lc=KEY_DEFINE -> ^(OP_IDENTIFIER[$lc, "KEY_DEFINE"] KEY_DEFINE)
| lc=KEY_ENDIAN -> ^(OP_IDENTIFIER[$lc, "KEY_ENDIAN"] KEY_ENDIAN)
| lc=KEY_EXPORT -> ^(OP_IDENTIFIER[$lc, "KEY_EXPORT"] KEY_EXPORT)
| lc=KEY_GOTO -> ^(OP_IDENTIFIER[$lc, "KEY_GOTO"] KEY_GOTO)
| lc=KEY_HEX -> ^(OP_IDENTIFIER[$lc, "KEY_HEX"] KEY_HEX)
| lc=KEY_LITTLE -> ^(OP_IDENTIFIER[$lc, "KEY_LITTLE"] KEY_LITTLE)
| lc=KEY_LOCAL -> ^(OP_IDENTIFIER[$lc, "KEY_LOCAL"] KEY_LOCAL)
| lc=KEY_MACRO -> ^(OP_IDENTIFIER[$lc, "KEY_MACRO"] KEY_MACRO)
| lc=KEY_NAMES -> ^(OP_IDENTIFIER[$lc, "KEY_NAMES"] KEY_NAMES)
| lc=KEY_NOFLOW -> ^(OP_IDENTIFIER[$lc, "KEY_NOFLOW"] KEY_NOFLOW)
| lc=KEY_OFFSET -> ^(OP_IDENTIFIER[$lc, "KEY_OFFSET"] KEY_OFFSET)
| lc=KEY_PCODEOP -> ^(OP_IDENTIFIER[$lc, "KEY_PCODEOP"] KEY_PCODEOP)
| lc=KEY_RETURN -> ^(OP_IDENTIFIER[$lc, "KEY_RETURN"] KEY_RETURN)
| lc=KEY_SIGNED -> ^(OP_IDENTIFIER[$lc, "KEY_SIGNED"] KEY_SIGNED)
| lc=KEY_SIZE -> ^(OP_IDENTIFIER[$lc, "KEY_SIZE"] KEY_SIZE)
| lc=KEY_SPACE -> ^(OP_IDENTIFIER[$lc, "KEY_SPACE"] KEY_SPACE)
| lc=KEY_TOKEN -> ^(OP_IDENTIFIER[$lc, "KEY_TOKEN"] KEY_TOKEN)
| lc=KEY_TYPE -> ^(OP_IDENTIFIER[$lc, "KEY_TYPE"] KEY_TYPE)
| lc=KEY_UNIMPL -> ^(OP_IDENTIFIER[$lc, "KEY_UNIMPL"] KEY_UNIMPL)
| lc=KEY_VALUES -> ^(OP_IDENTIFIER[$lc, "KEY_VALUES"] KEY_VALUES)
| lc=KEY_VARIABLES -> ^(OP_IDENTIFIER[$lc, "KEY_VARIABLES"] KEY_VARIABLES)
| lc=KEY_WORDSIZE -> ^(OP_IDENTIFIER[$lc, "KEY_WORDSIZE"] KEY_WORDSIZE)
;
strict_id
: lc=IDENTIFIER -> ^(OP_IDENTIFIER[$lc, "IDENTIFIER"] IDENTIFIER)
;
integer
: lc=HEX_INT -> ^(OP_HEX_CONSTANT[$lc, "HEX_INT"] HEX_INT)
| lc=DEC_INT -> ^(OP_DEC_CONSTANT[$lc, "DEC_INT"] DEC_INT)
| lc=BIN_INT -> ^(OP_BIN_CONSTANT[$lc, "BIN_INT"] BIN_INT)
;

View file

@ -0,0 +1,32 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra;
import ghidra.framework.ModuleInitializer;
import ghidra.program.model.data.CharsetInfo;
public class SoftwareModelingInitializer implements ModuleInitializer {
@Override
public void run() {
// Register user defined Charsets
CharsetInfo.reinitializeWithUserDefinedCharsets();
}
@Override
public String getName() {
return "SoftwareModeling Module";
}
}

View file

@ -0,0 +1,28 @@
/* ###
* IP: GHIDRA
* REVIEWED: YES
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.merge;
import ghidra.program.model.data.DataTypeManager;
public interface DataTypeManagerOwner {
/**
* Gets the associated data type manager.
* @return the data type manager.
*/
public DataTypeManager getDataTypeManager();
}

View file

@ -0,0 +1,198 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
import java.util.Collection;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOverflowException;
import ghidra.program.model.lang.InstructionBlock;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.mem.MemoryAccessException;
/**
* The primary interface for performing assembly in Ghidra.
*
* Use the {@link Assemblers} class to obtain a suitable implementation for a given program or
* language.
*/
public interface Assembler {
/**
* Assemble a sequence of instructions and place them at the given address.
*
* This method is only valid if the assembler is bound to a program. An instance may optionally
* implement this method without a program binding. In that case, the returned instruction
* block will refer to pseudo instructions.
*
* @note There must be an active transaction on the bound program for this method to succeed.
*
* @param at the location where the resulting instructions should be placed
* @param listing a new-line separated or array sequence of instructions
* @return the block of resulting instructions
* @throws AssemblySyntaxException a textual instruction is non well-formed
* @throws AssemblySemanticException a well-formed instruction cannot be assembled
* @throws MemoryAccessException there is an issue writing the result to program memory
* @throws AddressOverflowException the resulting block is beyond the valid address range
*/
public InstructionBlock assemble(Address at, String... listing) throws AssemblySyntaxException,
AssemblySemanticException, MemoryAccessException, AddressOverflowException;
/**
* Assemble a line instruction at the given address.
*
* This method is valid with or without a bound program. Even if bound, the program is not
* modified; however, the appropriate context information is taken from the bound program.
* Without a program, the language's default context is taken at the given location.
*
* @param at the location of the start of the instruction
* @param line the textual assembly code
* @return the binary machine code, suitable for placement at the given address
* @throws AssemblySyntaxException the textual instruction is not well-formed
* @throws AssemblySemanticException the the well-formed instruction cannot be assembled
*/
public byte[] assembleLine(Address at, String line)
throws AssemblySyntaxException, AssemblySemanticException;
/**
* Assemble a line instruction at the given address, assuming the given context.
*
* This method works like {@link #assembleLine(Address, String)} except that it allows you to
* override the assumed context at that location.
*
* @param at the location of the start of the instruction
* @param line the textual assembly code
* @param ctx the context register value at the start of the instruction
* @return the results of semantic resolution (from all parse results)
* @throws AssemblySyntaxException the textual instruction is not well-formed
* @throws AssemblySemanticException the well-formed instruction cannot be assembled
*/
public byte[] assembleLine(Address at, String line, AssemblyPatternBlock ctx)
throws AssemblySemanticException, AssemblySyntaxException;
/**
* Parse a line instruction.
*
* Generally, you should just use {@link #assembleLine(Address, String)}, but if you'd like
* access to the parse trees outside of an {@link AssemblySelector}, then this may be an
* acceptable option. Most notably, this is an excellent way to obtain suggestions for
* auto-completion.
*
* Each item in the returned collection is either a complete parse tree, or a syntax error
* Because all parse paths are attempted, it's possible to get many mixed results. For example,
* The input line may be a valid instruction; however, there may be suggestions to continue the
* line toward another valid instruction.
* @param line the line (or partial line) to parse
* @return the results of parsing
*/
public Collection<AssemblyParseResult> parseLine(String line);
/**
* Resolve a given parse tree at the given address, assuming the given context
*
* Each item in the returned collection is either a completely resolved instruction, or a
* semantic error. Because all resolutions are attempted, it's possible to get many mixed
* results.
*
* NOTE: The resolved instructions are given as masks and values. Where the mask does not
* cover, you can choose any value.
* @param parse a parse result giving a valid tree
* @param at the location of the start of the instruction
* @param ctx the context register value at the start of the instruction
* @return the results of semantic resolution
*/
public AssemblyResolutionResults resolveTree(AssemblyParseResult parse, Address at,
AssemblyPatternBlock ctx);
/**
* Resolve a given parse tree at the given address.
*
* Each item in the returned collection is either a completely resolved instruction, or a
* semantic error. Because all resolutions are attempted, it's possible to get many mixed
* results.
*
* NOTE: The resolved instructions are given as masks and values. Where the mask does not
* cover, you can choose any value.
* @param parse a parse result giving a valid tree
* @param at the location of the start of the instruction
* @return the results of semantic resolution
*/
public AssemblyResolutionResults resolveTree(AssemblyParseResult parse, Address at);
/**
* Assemble a line instruction at the given address.
*
* This method works like {@link #resolveLine(Address, String, AssemblyPatternBlock), except
* that it derives the context using {@link #getContextAt(Address)}.
* @param at the location of the start of the instruction
* @param line the textual assembly code
* @return the collection of semantic resolution results
* @throws AssemblySyntaxException the textual instruction is not well-formed
*/
public AssemblyResolutionResults resolveLine(Address at, String line)
throws AssemblySyntaxException;
/**
* Assemble a line instruction at the given address, assuming the given context.
*
* This method works like {@link #assembleLine(Address,String,AssemblyPatternBlock}, except
* that it returns all possible resolutions for the parse trees that pass the
* {@link AssemblySelector}.
* @param at the location of the start of the instruction
* @param line the textual assembly code
* @param ctx the context register value at the start of the instruction
* @return the collection of semantic resolution results
* @throws AssemblySyntaxException the textual instruction is not well-formed
*/
public AssemblyResolutionResults resolveLine(Address at, String line, AssemblyPatternBlock ctx)
throws AssemblySyntaxException;
/**
* Place a resolved (and fully-masked) instruction into the bound program.
*
* This method is not valid without a program binding. Also, this method must be called during
* a program database transaction.
* @param res the resolved and fully-masked instruction
* @param at the location of the start of the instruction
* @return the new {@link Instruction} code unit
* @throws MemoryAccessException there is an issue writing the result to program memory
*/
public Instruction patchProgram(AssemblyResolvedConstructor res, Address at)
throws MemoryAccessException;
/**
* Place an instruction into the bound program.
*
* This method is not valid without a program binding. Also, this method must be called during
* a program database transaction.
* @param insbytes the instruction data
* @param at the location of the start of the instruction
* @return the new {@link Instruction} code unit
* @throws MemoryAccessException there is an issue writing the result to program memory
*/
public Instruction patchProgram(byte[] insbytes, Address at) throws MemoryAccessException;
/**
* Get the context at a given address
*
* If there is a program binding, this will extract the actual context at the given address.
* Otherwise, it will obtain the default context at the given address for the language.
* @param addr the address
* @return the context
*/
public AssemblyPatternBlock getContextAt(Address addr);
}

View file

@ -0,0 +1,52 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.LanguageID;
import ghidra.program.model.listing.Program;
/**
* An interface to build an assembler for a given language
*/
public interface AssemblerBuilder {
/**
* Get the ID of the language for which this instance builds an assembler
* @return the language ID
*/
public LanguageID getLanguageID();
/**
* Get the language for which this instance builds an assembler
* @return the language
*/
public Language getLanguage();
/**
* Build an assembler with the given selector callback
* @param selector the selector callback
* @return the built assembler
*/
public Assembler getAssembler(AssemblySelector selector);
/**
* Build an assembler with the given selector callback and program binding
* @param selector the selector callback
* @param program the bound program
* @return the built assembler
*/
public Assembler getAssembler(AssemblySelector selector, Program program);
}

View file

@ -0,0 +1,121 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
import java.util.HashMap;
import java.util.Map;
import ghidra.app.plugin.assembler.sleigh.SleighAssemblerBuilder;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.LanguageID;
import ghidra.program.model.listing.Program;
/**
* The primary class for obtaining an {@link Assembler} for a Ghidra-supported language.
*
* The general flow is: First, obtain an assembler for a language or program. Second, call its
* {@link Assembler#assemble(Address, String)} and related methods to perform assembly. More
* advanced uses pass a {@link AssemblySelector} to control certain aspects of assembly instruction
* selection, and to obtain advanced diagnostics, like detailed errors and code completion.
*
* <pre>
* {@code
* Assembler asm = Assemblers.getAssembler(currentProgram);
* asm.assemble(currentAddress, "ADD ...");
* }
* </pre>
*/
public final class Assemblers {
private static Map<LanguageID, AssemblerBuilder> builders = new HashMap<>();
/**
* Get a builder for the given language, possibly using a cached one.
* @param lang the language
* @return the builder for that language, if successful
*/
protected static AssemblerBuilder getBuilderForLang(Language lang) {
AssemblerBuilder ab = builders.get(lang.getLanguageID());
if (ab != null) {
return ab;
}
if (lang instanceof SleighLanguage) {
ab = new SleighAssemblerBuilder((SleighLanguage) lang);
builders.put(lang.getLanguageID(), ab);
return ab;
}
throw new UnsupportedOperationException("Unsupported language type: " + lang.getClass());
}
/**
* Get an assembler for the given program.
*
* Provides an assembler suitable for the program's language, and bound to the program. Calls
* to its Assembler#assemble() function will cause modifications to the bound program. If this
* is the first time an assembler for the program's language has been requested, this function
* may take some time to build the assembler.
*
* @param selector a method to select a single result from many
* @param program the program for which an assembler is requested
* @return the assembler bound to the given program
*/
public static Assembler getAssembler(Program program, AssemblySelector selector) {
AssemblerBuilder b = getBuilderForLang(program.getLanguage());
return b.getAssembler(selector, program);
}
/**
* Get an assembler for the given language.
*
* Provides a suitable assembler for the given language. Only calls to its
* Assembler#assembleLine() method are valid. If this is the first time a language has been
* requested, this function may take some time to build the assembler. Otherwise, it returns a
* cached assembler.
*
* @param selector a method to select a single result from many
* @param lang the language for which an assembler is requested
* @return the assembler for the given language
*/
public static Assembler getAssembler(Language lang, AssemblySelector selector) {
AssemblerBuilder b = getBuilderForLang(lang);
return b.getAssembler(selector);
}
/**
* Get an assembler for the given program.
*
* @see #getAssembler(Program, AssemblySelector)
*
* @param program the program
* @return a suitable assembler
*/
public static Assembler getAssembler(Program program) {
return getAssembler(program, new AssemblySelector());
}
/**
* Get an assembler for the given language.
*
* @see #getAssembler(Language, AssemblySelector)
*
* @param lang the language
* @return a suitable assembler
*/
public static Assembler getAssembler(Language lang) {
return getAssembler(lang, new AssemblySelector());
}
}

View file

@ -0,0 +1,25 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
/**
* An exception for programmer errors regarding an assembler
*/
public class AssemblyError extends RuntimeException {
public AssemblyError(String message) {
super(message);
}
}

View file

@ -0,0 +1,29 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
/**
* A checked exception used for input errors regarding the assembler
*/
public class AssemblyException extends Exception {
public AssemblyException(String message) {
super(message);
}
public AssemblyException(String message, Throwable cause) {
super(message, cause);
}
}

View file

@ -0,0 +1,25 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
/**
* Thrown when a programmer selects an improper instruction during assembly
*/
public class AssemblySelectionError extends AssemblyError {
public AssemblySelectionError(String message) {
super(message);
}
}

View file

@ -0,0 +1,135 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
/**
* Provides a mechanism for pruning and selecting binary assembled instructions from the results
* of parsing textual assembly instructions. There are two opportunities: After parsing, but before
* semantic resolution, and after resolution. In the first opportunity, filtering is optional ---
* the user may discard any or all parse trees. The second is required, since only one instruction
* may be placed at the desired address --- the user must select one instruction among the many
* results, and if a mask is present, decide on a value for the omitted bits.
*
* Extensions of this class are also suitable for collecting diagnostic information about attempted
* assemblies. For example, an implementation may employ the syntax errors in order to produce
* code completion suggestions in a GUI.
*/
public class AssemblySelector {
protected Set<AssemblyParseResult> syntaxErrors = new TreeSet<>();
protected Set<AssemblyResolvedError> semanticErrors = new TreeSet<>();
/**
* A comparator on instruction length (shortest first), then bits lexicographically
*/
protected Comparator<AssemblyResolvedConstructor> compareBySizeThenBits = (a, b) -> {
int result;
result = a.getInstructionLength() - b.getInstructionLength();
if (result != 0) {
return result;
}
result =
SleighUtil.compareArrays(a.getInstruction().getVals(), b.getInstruction().getVals());
if (result != 0) {
return result;
}
return 0;
};
/**
* Filter a collection of parse trees.
*
* Generally, the assembly resolver considers every possible parsing of an assembly
* instruction. If, for some reason, the user wishes to ignore certain trees (perhaps for
* efficiency, or perhaps because a certain form of instruction is desired), entire parse
* trees may be pruned here.
*
* It's possible that no trees pass the filter. In this case, this method ought to throw an
* {@link AssemblySyntaxException}. Another option is to pass the erroneous result on for semantic
* analysis, in which case, the error is simply copied into an erroneous semantic result.
* Depending on preferences, this may simplify the overall filtering and error-handling logic.
*
* By default, no filtering is applied. If all the trees produce syntax errors, an exception is
* thrown.
*
* @param parse the collection of parse results (errors and trees).
* @return the filtered collection, optionally in-place.
* @throws AssemblySyntaxException if the selector wishes to forward one or more syntax errors
*/
public Collection<AssemblyParseResult> filterParse(Collection<AssemblyParseResult> parse)
throws AssemblySyntaxException {
boolean gotOne = false;
for (AssemblyParseResult pr : parse) {
if (pr.isError()) {
syntaxErrors.add(pr);
}
else {
gotOne = true;
}
}
if (!gotOne) {
throw new AssemblySyntaxException(syntaxErrors);
}
return parse;
}
/**
* Select an instruction from the possible results.
*
* Must select precisely one resolved constructor from the results given back by the assembly
* resolver. Precisely one. That means the mask of the returned result must consist of all 1s.
* Also, if no selection is suitable, an exception must be thrown.
*
* By default, this method selects the shortest instruction that is compatible with the given
* context and takes 0 for bits that fall outside the mask. If all possible resolutions produce
* errors, an exception is thrown.
*
* @param rr the collection of resolved constructors
* @param ctx the applicable context.
* @return a single resolved constructor with a full instruction mask.
* @throws AssemblySemanticException
*/
public AssemblyResolvedConstructor select(AssemblyResolutionResults rr,
AssemblyPatternBlock ctx) throws AssemblySemanticException {
List<AssemblyResolvedConstructor> sorted = new ArrayList<>();
// Select only non-erroneous results whose contexts are compatible.
for (AssemblyResolution ar : rr) {
if (ar.isError()) {
semanticErrors.add((AssemblyResolvedError) ar);
continue;
}
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
sorted.add(rc);
}
if (sorted.isEmpty()) {
throw new AssemblySemanticException(semanticErrors);
}
// Sort them
sorted.sort(compareBySizeThenBits);
// Pick just the first
AssemblyResolvedConstructor res = sorted.get(0);
// Just set the mask to ffs (effectively choosing 0 for the omitted bits)
return AssemblyResolution.resolved(res.getInstruction().fillMask(), res.getContext(),
"Selected", null);
}
}

View file

@ -0,0 +1,54 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
import java.util.Collection;
import java.util.Collections;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedError;
/**
* Thrown when all resolutions of an assembly instruction result in semantic errors.
*
* For SLEIGH, semantic errors amount to incompatible contexts
*/
public class AssemblySemanticException extends AssemblyException {
protected Set<AssemblyResolvedError> errors;
public AssemblySemanticException(String message) {
super(message);
}
/**
* Construct a semantic exception with the associated semantic errors
* @param errors the associated semantic errors
*/
public AssemblySemanticException(Set<AssemblyResolvedError> errors) {
super(StringUtils.join(errors, "\n"));
this.errors = errors;
}
/**
* Get the collection of associated semantic errors
* @return the collection
*/
public Collection<AssemblyResolvedError> getErrors() {
return Collections.unmodifiableCollection(errors);
}
}

View file

@ -0,0 +1,52 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler;
import java.util.Collection;
import java.util.Collections;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
/**
* Thrown when all parses of an assembly instruction result in syntax errors.
*/
public class AssemblySyntaxException extends AssemblyException {
protected Set<AssemblyParseResult> errors;
public AssemblySyntaxException(String message) {
super(message);
}
/**
* Construct a syntax exception with the associated syntax errors
* @param errors the associated syntax errors
*/
public AssemblySyntaxException(Set<AssemblyParseResult> errors) {
super(StringUtils.join(errors, "\n"));
this.errors = errors;
}
/**
* Get the collection of associated syntax errors
* @return the collection
*/
public Collection<AssemblyParseResult> getErrors() {
return Collections.unmodifiableCollection(errors);
}
}

View file

@ -0,0 +1,257 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh;
import java.util.*;
import ghidra.app.plugin.assembler.*;
import ghidra.app.plugin.assembler.sleigh.parse.*;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.program.disassemble.Disassembler;
import ghidra.program.disassemble.DisassemblerMessageListener;
import ghidra.program.model.address.*;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.*;
import ghidra.program.model.mem.Memory;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.symbol.Symbol;
import ghidra.program.model.symbol.SymbolIterator;
import ghidra.util.task.TaskMonitor;
/**
* An {@link Assembler} for a {@link SleighLanguage}.
*
* To obtain one of these, please use {@link SleighAssemblerBuilder}, or better yet, the static
* methods of {@link Assemblers}.
*/
public class SleighAssembler implements Assembler {
public static final int DEFAULT_MAX_RECURSION_DEPTH = 2; // TODO: Toss this
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
protected AssemblySelector selector;
protected Program program;
protected Listing listing;
protected Memory memory;
protected Disassembler dis;
protected AssemblyParser parser;
protected AssemblyDefaultContext defaultContext;
protected AssemblyContextGraph ctxGraph;
protected SleighLanguage lang;
/**
* Construct a SleighAssembler.
*
* @param selector a method of selecting one result from many
* @param program the program to bind to (must have same language as parser)
* @param parser the parser for the SLEIGH language
* @param defaultContext the default context for the language
* @param recGraphs the map of recursion graphs (shared by assemblers of this language)
*/
protected SleighAssembler(AssemblySelector selector, Program program, AssemblyParser parser,
AssemblyDefaultContext defaultContext, AssemblyContextGraph ctxGraph) {
this(selector, (SleighLanguage) program.getLanguage(), parser, defaultContext, ctxGraph);
this.program = program;
this.listing = program.getListing();
this.memory = program.getMemory();
this.dis = Disassembler.getDisassembler(program, TaskMonitor.DUMMY,
DisassemblerMessageListener.IGNORE);
}
/**
* Construct a SleighAssembler.
*
* @note This variant does not permit {@link #assemble(Address, String)}.
*
* @param selector a method of selecting one result from many
* @param lang the SLEIGH language (must be same as to create the parser)
* @param parser the parser for the SLEIGH language
* @param ctxGraph
* @param recGraphs the map of recursion graphs (shared by assemblers of this language)
*/
protected SleighAssembler(AssemblySelector selector, SleighLanguage lang, AssemblyParser parser,
AssemblyDefaultContext defaultContext, AssemblyContextGraph ctxGraph) {
this.selector = selector;
this.lang = lang;
this.parser = parser;
this.defaultContext = defaultContext;
this.ctxGraph = ctxGraph;
}
@Override
public Instruction patchProgram(AssemblyResolvedConstructor res, Address at)
throws MemoryAccessException {
if (!res.getInstruction().isFullMask()) {
throw new AssemblySelectionError("Selected instruction must have a full mask.");
}
return patchProgram(res.getInstruction().getVals(), at);
}
@Override
public Instruction patchProgram(byte[] insbytes, Address at) throws MemoryAccessException {
listing.clearCodeUnits(at, at.add(insbytes.length - 1), false);
memory.setBytes(at, insbytes);
dis.disassemble(at, new AddressSet(at));
return listing.getInstructionAt(at);
}
@Override
public InstructionBlock assemble(Address at, String... assembly) throws AssemblySyntaxException,
AssemblySemanticException, MemoryAccessException, AddressOverflowException {
InstructionBlock block = new InstructionBlock(at);
for (String part : assembly) {
for (String line : part.split("\n")) {
RegisterValue rv = program.getProgramContext().getDisassemblyContext(at);
dbg.println(rv);
AssemblyPatternBlock ctx = AssemblyPatternBlock.fromRegisterValue(rv);
ctx = ctx.fillMask();
byte[] insbytes = assembleLine(at, line, ctx);
if (insbytes == null) {
return null;
}
Instruction ins = patchProgram(insbytes, at);
block.addInstruction(ins);
at = at.addNoWrap(insbytes.length);
}
}
return block;
}
@Override
public byte[] assembleLine(Address at, String line)
throws AssemblySyntaxException, AssemblySemanticException {
AssemblyPatternBlock ctx = defaultContext.getDefaultAt(at);
ctx = ctx.fillMask();
return assembleLine(at, line, ctx);
}
@Override
public Collection<AssemblyParseResult> parseLine(String line) {
return parser.parse(line, getProgramLabels());
}
@Override
public AssemblyResolutionResults resolveTree(AssemblyParseResult parse, Address at) {
AssemblyPatternBlock ctx = getContextAt(at);
ctx = ctx.fillMask();
return resolveTree(parse, at, ctx);
}
@Override
public AssemblyResolutionResults resolveTree(AssemblyParseResult parse, Address at,
AssemblyPatternBlock ctx) {
if (parse.isError()) {
AssemblyResolutionResults results = new AssemblyResolutionResults();
AssemblyParseErrorResult err = (AssemblyParseErrorResult) parse;
results.add(AssemblyResolution.error(err.describeError(), "Parsing", null));
return results;
}
AssemblyParseAcceptResult acc = (AssemblyParseAcceptResult) parse;
AssemblyTreeResolver tr =
new AssemblyTreeResolver(lang, at.getOffset(), acc.getTree(), ctx, ctxGraph);
return tr.resolve();
}
@Override
public AssemblyResolutionResults resolveLine(Address at, String line)
throws AssemblySyntaxException {
return resolveLine(at, line, getContextAt(at).fillMask());
}
@Override
public AssemblyResolutionResults resolveLine(Address at, String line, AssemblyPatternBlock ctx)
throws AssemblySyntaxException {
if (!ctx.isFullMask()) {
throw new AssemblyError(
"Context must be fully-specified (full length, no shift, no unknowns)");
}
if (lang.getContextBaseRegister() != null &&
ctx.length() < lang.getContextBaseRegister().getMinimumByteSize()) {
throw new AssemblyError(
"Context must be fully-specified (full length, no shift, no unknowns)");
}
Collection<AssemblyParseResult> parse = parseLine(line);
parse = selector.filterParse(parse);
if (!parse.iterator().hasNext()) { // Iterator.isEmpty()???
throw new AssemblySelectionError(
"Must select at least one parse result. Report errors via AssemblySyntaxError");
}
AssemblyResolutionResults results = new AssemblyResolutionResults();
for (AssemblyParseResult p : parse) {
results.absorb(resolveTree(p, at, ctx));
}
return results;
}
@Override
public byte[] assembleLine(Address at, String line, AssemblyPatternBlock ctx)
throws AssemblySemanticException, AssemblySyntaxException {
AssemblyResolutionResults results = resolveLine(at, line, ctx);
AssemblyResolvedConstructor res = selector.select(results, ctx);
if (res == null) {
throw new AssemblySelectionError(
"Must select exactly one instruction. Report errors via AssemblySemanticError");
}
if (!res.getInstruction().isFullMask()) {
throw new AssemblySelectionError("Selected instruction must have a full mask.");
}
if (res.getContext().combine(ctx) == null) {
throw new AssemblySelectionError("Selected instruction must have compatible context");
}
return res.getInstruction().getVals();
}
/**
* A convenience to obtain a map of program labels strings to long values
* @return the map
*
* @TODO Use a Map<String, Address> instead so that, if possible, symbol values can be checked
* lest they be an invalid substitution for a given operand.
*/
protected Map<String, Long> getProgramLabels() {
Map<String, Long> labels = new HashMap<>();
for (Register reg : lang.getRegisters()) {
// TODO/HACK: There ought to be a better mechanism describing suitable symbolic
// substitutions for a given operand.
if (!"register".equals(reg.getAddressSpace().getName())) {
labels.put(reg.getName(), (long) reg.getOffset());
}
}
if (program != null) {
final SymbolIterator it = program.getSymbolTable().getAllSymbols(false);
while (it.hasNext()) {
Symbol sym = it.next();
labels.put(sym.getName(), sym.getAddress().getOffset());
}
}
return labels;
}
@Override
public AssemblyPatternBlock getContextAt(Address addr) {
if (program != null) {
RegisterValue rv = program.getProgramContext().getDisassemblyContext(addr);
return AssemblyPatternBlock.fromRegisterValue(rv);
}
return defaultContext.getDefaultAt(addr);
}
}

View file

@ -0,0 +1,442 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh;
import java.util.*;
import org.apache.commons.collections4.MultiValuedMap;
import org.apache.commons.collections4.multimap.HashSetValuedHashMap;
import ghidra.app.plugin.assembler.*;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParser;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyContextGraph;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyDefaultContext;
import ghidra.app.plugin.assembler.sleigh.symbol.*;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
import ghidra.app.plugin.languages.sleigh.SleighLanguages;
import ghidra.app.plugin.languages.sleigh.SubtableEntryVisitor;
import ghidra.app.plugin.processors.sleigh.*;
import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
import ghidra.app.plugin.processors.sleigh.symbol.*;
import ghidra.app.plugin.processors.sleigh.template.ConstructTpl;
import ghidra.app.plugin.processors.sleigh.template.HandleTpl;
import ghidra.program.model.lang.LanguageID;
import ghidra.program.model.listing.Program;
import ghidra.util.SystemUtilities;
/**
* An {@link AssemblerBuilder} capable of supporting almost any {@link SleighLanguage}
*
* To build an assembler, please use a static method of the {@link Assemblers} class.
*
* SLEIGH-based assembly is a bit of an experimental feature at this time. Nevertheless, it seems to
* have come along quite nicely. It's not quite as fast as disassembly, since after all, that's what
* SLEIGH was designed to do.
*
* Overall, the method is fairly simple, though its implementation is a bit more complex. First, we
* gather every pair of pattern and constructor by traversing the decision tree used by disassembly.
* We then use the "print pieces" to construct a context-free grammar. Each production is associated
* with the one-or-more constructors with the same sequence of print pieces. We then build a LALR(1)
* parser for the generated grammar. This now constitutes a generic parser for the given language.
* Note that this step takes some time, and may be better suited as a build-time step. Because
* SLEIGH specifications are not generally concerned with eliminating ambiguity of printed
* instructions (rather, it only does so for instruction bytes), we must consider that the grammar
* could be ambiguous. To handle this, the action/goto table is permitted multiple entries per cell,
* and we allow backtracking. There are also cases where tokens are not actually separated by
* spaces. For example, in the {@code ia.sinc} file, there is JMP ... and J^cc, meaning, the lexer
* must consider J as a token as well as JMP, introducing another source of possible backtracking.
* Despite that, parsing is completed fairly quickly.
*
* To assemble, we first parse the textual instruction, yielding zero or more parse trees. No parse
* trees implies an error. For each parse tree, we attempt to resolve the instruction bytes,
* starting at the leaves and working upwards while tracking and solving context changes. The
* context changes must be considered in reverse. We <em>read</em> the context register of the
* children (a disassembler would write). We then assume there is at most one variable in the
* expression, solve for it, and <em>write</em> the solution to the appropriate field (a
* disassembler would read). If no solution exists, a semantic error is logged. Since it's possible
* a production in the parse tree is associated with multiple constructors, different combinations
* of constructors are explored as we move upward in the tree. If all possible combinations yield
* semantic errors, then the overall result is an error.
*
* Some productions are "purely recursive," e.g., {@code :^instruction} lines in the SLEIGH. These
* are ignored during parser construction. Let such a production be given as I => I. When resolving
* the parse tree to bytes, and we encounter a production with I on the left hand side, we then
* consider the possible application of the production I => I and its consequential constructors.
* Ideally, we could repeat this indefinitely, stopping when all further applications result in
* semantic errors; however, there is no guarantee in the SLEIGH specification that such an
* algorithm will actually halt, so a maximum number (default of 1) of applications are attempted.
*
* After all the context changes and operands are resolved, we apply the constructor patterns and
* proceed up the tree. Thus, each branch yields zero or more "resolved constructors," which each
* specify two masked blocks of data: one for the instruction, and one for the context. These are
* passed up to the parent production, which, having obtained results from all its children,
* attempts to apply the corresponding constructors.
*
* Once we've resolved the root node, any resolved constructors returned are taken as successfully
* assembled instruction bytes. If applicable, the corresponding context registers are compared to
* the context at the target address in the program and filtered for compatibility.
*/
public class SleighAssemblerBuilder implements AssemblerBuilder {
protected static final DbgTimer dbg = SystemUtilities.isInTestingBatchMode() ? DbgTimer.INACTIVE : DbgTimer.ACTIVE;
protected SleighLanguage lang;
protected AssemblyGrammar grammar;
protected AssemblyDefaultContext defaultContext;
protected AssemblyContextGraph ctxGraph;
protected AssemblyParser parser;
protected boolean generated = false;
// A cache for symbols converted during grammar construction
protected Map<String, AssemblySymbol> builtSymbols = new HashMap<>();
/**
* Construct an assembler builder for the given SLEIGH language
*
* @param lang the language
*/
public SleighAssemblerBuilder(SleighLanguage lang) {
this.lang = lang;
}
/**
* Do the actual work to construct an assembler from a SLEIGH language
*
* @throws SleighException if there's an issue accessing the language
*/
protected void generateAssembler() throws SleighException {
if (generated) {
return;
}
generated = true;
try {
buildGrammar();
grammar.verify();
buildContext();
buildContextGraph();
buildParser();
}
catch (SleighException e) {
// Not sure this can actually happen here
throw e;
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
@Override
public LanguageID getLanguageID() {
return lang.getLanguageID();
}
@Override
public SleighLanguage getLanguage() {
return lang;
}
@Override
public SleighAssembler getAssembler(AssemblySelector selector) {
generateAssembler();
SleighAssembler asm = new SleighAssembler(selector, lang, parser, defaultContext, ctxGraph);
return asm;
}
@Override
public SleighAssembler getAssembler(AssemblySelector selector, Program program) {
generateAssembler();
return new SleighAssembler(selector, program, parser, defaultContext, ctxGraph);
}
/**
* Invert a varnode list to a map suitable for use with {@link AssemblyStringMapTerminal}
*
* @param vnlist the varnode list symbol
* @return the inverted string map
*/
protected MultiValuedMap<String, Integer> invVarnodeList(VarnodeListSymbol vnlist) {
MultiValuedMap<String, Integer> result = new HashSetValuedHashMap<>();
int index = -1;
for (VarnodeSymbol vnsym : vnlist.getVarnodeTable()) {
index++;
if (vnsym != null) {
// nulls are _ in the spec, meaning the index is undefined.
result.put(vnsym.getName(), index);
}
}
return result;
}
/**
* Invert a value map to a map suitable for use with {@link AssemblyNumericMapTerminal}
*
* @param vm the value map symbol
* @return the inverted numeric map
*/
protected Map<Long, Integer> invValueMap(ValueMapSymbol vm) {
Map<Long, Integer> result = new HashMap<>();
List<Long> map = vm.getMap();
for (int i = 0; i < map.size(); i++) {
long v = map.get(i);
result.put(v, i);
}
return result;
}
/**
* Invert a name table to a map suitable for use with {@link AssemblyStringMapTerminal}
*
* @param ns the name symbol
* @return the inverted string map
*/
protected MultiValuedMap<String, Integer> invNameSymbol(NameSymbol ns) {
MultiValuedMap<String, Integer> result = new HashSetValuedHashMap<>();
int index = -1;
for (String name : ns.getNameTable()) {
index++;
if (name != null) {
result.put(name, index);
}
}
return result;
}
/**
* Convert the given operand symbol to an {@link AssemblySymbol}
*
* For subtables, this results in a non-terminal, for all others, the result in a terminal.
*
* @param cons the constructor to which the operand belongs
* @param opsym the operand symbol to convert
* @return the converted assembly grammar symbol
*/
protected AssemblySymbol getSymbolFor(Constructor cons, OperandSymbol opsym) {
TripleSymbol defsym = opsym.getDefiningSymbol();
// If the symbol has no defining symbol, that means the name is only valid in the local
// scope. We must keep them unique.
String name;
if (defsym == null) {
name = cons.getParent().getName() + ":" + opsym.getName();
}
else {
name = opsym.getName();
}
AssemblySymbol built = builtSymbols.get(name);
if (built != null) {
return built;
}
if (defsym == null) {
built = new AssemblyNumericTerminal(name, getBitSize(cons, opsym));
}
else if (defsym instanceof SubtableSymbol) {
built = new AssemblyNonTerminal(name);
}
else if (defsym instanceof VarnodeListSymbol) {
built = new AssemblyStringMapTerminal(name, invVarnodeList((VarnodeListSymbol) defsym));
}
else if (defsym instanceof VarnodeSymbol) {
built = new AssemblyStringTerminal(name);
// Does this need to consume an operand? It seems not.
}
else if (defsym instanceof ValueMapSymbol) {
built = new AssemblyNumericMapTerminal(name, invValueMap((ValueMapSymbol) defsym));
}
else if (defsym instanceof NameSymbol) {
built = new AssemblyStringMapTerminal(name, invNameSymbol((NameSymbol) defsym));
}
else {
throw new RuntimeException("Unknown symbol for " + name + ": " + defsym);
}
builtSymbols.put(name, built);
return built;
}
/**
* Obtain the size in bits of a textual operand.
*
* This is a little odd, since the variables in pattern expressions do not have an explicit
* size. However, the value exported by a constructor's pCode may have an explicit size given
* (in bytes). Thus, there is a special case, where a constructor prints just one operand and
* exports that same operand with an explicit size. In that case, the size of the operand is
* printed according to that exported size.
*
* For disassembly, this information is used simply to truncate the bits before they are
* displayed. For assembly, we must do two things: 1) Ensure that the provided value fits in the
* given size, and 2) Mask the goal when solving the pattern expression for the operand.
*
* @param cons the constructor from which the production is being derived
* @param opsym the operand symbol corresponding to the grammatical symbol, whose size we wish
* to determine.
* @return the size of the operand in bits
*/
protected int getBitSize(Constructor cons, OperandSymbol opsym) {
ConstructTpl ctpl = cons.getTempl();
if (null == ctpl) {
// No pcode, no size specification
return 0;
}
HandleTpl htpl = ctpl.getResult();
if (null == htpl) {
// If nothing is exported, the size is unspecified
return 0;
}
if (opsym.getIndex() != htpl.getOffsetOperandIndex()) {
// If the export is not of the same operand, it does not specify its size
return 0;
}
return htpl.getSize();
}
/**
* Build a portion of the grammar representing a table of constructors
*
* @param subtable the table
* @return the partial grammar
*/
protected AssemblyGrammar buildSubGrammar(SubtableSymbol subtable) {
final AssemblyGrammar subgrammar = new AssemblyGrammar();
final AssemblyNonTerminal lhs = new AssemblyNonTerminal(subtable.getName());
SleighLanguages.traverseConstructors(subtable, new SubtableEntryVisitor() {
@Override
public int visit(DisjointPattern pattern, Constructor cons) {
AssemblySentential<AssemblyNonTerminal> rhs = new AssemblySentential<>();
List<Integer> indices = new ArrayList<>();
for (String str : cons.getPrintPieces()) {
if (str.length() != 0) {
if (str.charAt(0) == '\n') {
int index = str.charAt(1) - 'A';
OperandSymbol opsym = cons.getOperand(index);
AssemblySymbol sym = getSymbolFor(cons, opsym);
if (sym.takesOperandIndex()) {
indices.add(index);
}
rhs.add(sym);
}
else {
String tstr = str.trim();
if (tstr.equals("")) {
rhs.addWS();
}
else {
char first = tstr.charAt(0);
if (!str.startsWith(tstr)) {
rhs.addWS();
}
if (!Character.isLetterOrDigit(first)) {
rhs.addWS();
}
rhs.add(new AssemblyStringTerminal(str.trim()));
char last = tstr.charAt(tstr.length() - 1);
if (!str.endsWith(tstr)) {
rhs.addWS();
}
if (!Character.isLetterOrDigit(last)) {
rhs.addWS();
}
}
}
}
}
subgrammar.addProduction(lhs, rhs, pattern, cons, indices);
return CONTINUE;
}
});
return subgrammar;
}
/**
* Build the full grammar for the language
*/
protected void buildGrammar() {
try (DbgCtx dc = dbg.start("Building grammar")) {
grammar = new AssemblyGrammar();
for (Symbol sym : lang.getSymbolTable().getSymbolList()) {
if (sym instanceof SubtableSymbol) {
SubtableSymbol subtable = (SubtableSymbol) sym;
grammar.combine(buildSubGrammar(subtable));
}
else if (sym instanceof VarnodeSymbol) {
// Ignore. This just becomes a string terminal
}
else if (sym instanceof StartSymbol) {
// Ignore. We handle inst_start in semantic processing
}
else if (sym instanceof EndSymbol) {
// Ignore. We handle inst_next in semantic processing
}
else if (sym instanceof UseropSymbol) {
// Ignore. We don't do pcode.
}
else if (sym instanceof OperandSymbol) {
// Ignore. These are terminals, or will be produced by there defining symbol
}
else if (sym instanceof ValueSymbol) {
// Ignore. These are now terminals
}
else {
throw new RuntimeException("Unexpected type: " + sym.getClass());
}
}
grammar.setStartName("instruction");
}
}
/**
* Build the default context for the language
*/
protected void buildContext() {
defaultContext = new AssemblyDefaultContext(lang);
}
/**
* Build the context transition graph for the language
*/
protected void buildContextGraph() {
try (DbgCtx dc = dbg.start("Building context graph")) {
ctxGraph = new AssemblyContextGraph(lang, grammar);
}
}
/**
* Build the parser for the language
*/
protected void buildParser() {
try (DbgCtx dc = dbg.start("Building parser")) {
parser = new AssemblyParser(grammar);
}
}
/**
* Get the built grammar for the language
*
* @return the grammar
*/
protected AssemblyGrammar getGrammar() {
return grammar;
}
/**
* Get the built parser for the language
*
* @return the parser
*/
protected AssemblyParser getParser() {
return parser;
}
}

View file

@ -0,0 +1,165 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.processors.sleigh.expression.BinaryExpression;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
/**
* A solver that handles expressions of the form A [OP] B
*
* @param <T> the type of expression solved (the operator)
*/
public abstract class AbstractBinaryExpressionSolver<T extends BinaryExpression>
extends AbstractExpressionSolver<T> {
public AbstractBinaryExpressionSolver(Class<T> tcls) {
super(tcls);
}
@Override
public AssemblyResolution solve(T exp, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) throws NeedsBackfillException {
MaskedLong lval = solver.getValue(exp.getLeft(), vals, res, cur);
MaskedLong rval = solver.getValue(exp.getRight(), vals, res, cur);
if (lval != null && !lval.isFullyDefined()) {
if (!lval.isFullyUndefined()) {
dbg.println("Partially-defined left value for binary solver: " + lval);
}
lval = null;
}
if (rval != null && !rval.isFullyDefined()) {
if (!rval.isFullyUndefined()) {
dbg.println("Partially-defined right value for binary solver: " + rval);
}
rval = null;
}
try {
if (lval != null && rval != null) {
MaskedLong cval = compute(lval, rval);
return ConstantValueSolver.checkConstAgrees(cval, goal, description);
}
else if (lval != null) {
return solveRightSide(exp.getRight(), lval, goal, vals, res, cur, hints,
description);
}
else if (rval != null) {
return solveLeftSide(exp.getLeft(), rval, goal, vals, res, cur, hints, description);
}
else {
// Each solver may provide a strategy for solving expression where both sides are
// variable, e.g., two fields being concatenated via OR.
return solveTwoSided(exp, goal, vals, res, cur, hints, description);
}
}
catch (NeedsBackfillException e) {
throw e;
}
catch (SolverException e) {
return AssemblyResolution.error(e.getMessage(), description, null);
}
catch (AssertionError e) {
dbg.println("While solving: " + exp + " (" + description + ")");
throw e;
}
}
protected AssemblyResolution solveLeftSide(PatternExpression lexp, MaskedLong rval,
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
throws NeedsBackfillException, SolverException {
return solver.solve(lexp, computeLeft(rval, goal), vals, res, cur, hints, description);
}
protected AssemblyResolution solveRightSide(PatternExpression rexp, MaskedLong lval,
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
throws NeedsBackfillException, SolverException {
return solver.solve(rexp, computeRight(lval, goal), vals, res, cur, hints, description);
}
protected AssemblyResolution solveTwoSided(T exp, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) throws NeedsBackfillException, SolverException {
throw new NeedsBackfillException("_two_sided_");
}
@Override
public MaskedLong getValue(T exp, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur) throws NeedsBackfillException {
MaskedLong lval = solver.getValue(exp.getLeft(), vals, res, cur);
MaskedLong rval = solver.getValue(exp.getRight(), vals, res, cur);
if (lval != null && rval != null) {
MaskedLong cval = compute(lval, rval);
return cval;
}
return null;
}
/**
* Compute the left-hand-side value given that the result and the right are known
*
* @param rval the right-hand-side value
* @param goal the result
* @return the left-hand-side value solution
* @throws SolverException if the expression cannot be solved
*/
public abstract MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException;
/**
* Compute the right-hand-side value given that the result and the left are known
*
* @note Assumes commutativity by default
* @param lval the left-hand-side value
* @param goal the result
* @return the right-hand-side value solution
* @throws SolverException if the expression cannot be solved
*/
public MaskedLong computeRight(MaskedLong lval, MaskedLong goal) throws SolverException {
return computeLeft(lval, goal);
}
/**
* Compute the result of applying the operator to the two given values
*
* @param lval the left-hand-side value
* @param rval the right-hand-side value
* @return the result
*/
public abstract MaskedLong compute(MaskedLong lval, MaskedLong rval);
@Override
public int getInstructionLength(T exp, Map<Integer, Object> res) {
int ll = solver.getInstructionLength(exp.getLeft(), res);
int lr = solver.getInstructionLength(exp.getRight(), res);
return Math.max(ll, lr);
}
@Override
public MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc) {
MaskedLong lval = solver.valueForResolution(exp.getLeft(), rc);
MaskedLong rval = solver.valueForResolution(exp.getRight(), rc);
return compute(lval, rval);
}
}

View file

@ -0,0 +1,104 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
/**
* The root type of an expression solver
*
* @param <T> the type of expression solved (the operator)
*/
public abstract class AbstractExpressionSolver<T extends PatternExpression> {
private Class<T> tcls;
protected RecursiveDescentSolver solver;
protected final DbgTimer dbg = DbgTimer.INACTIVE;
/**
* Construct a solver that can solve expression of the given type
*
* @param tcls the type of expressions it can solve
*/
public AbstractExpressionSolver(Class<T> tcls) {
this.tcls = tcls;
}
/**
* Attempt to solve an expression for a given value
*
* @param exp the expression to solve
* @param goal the desired value of the expression
* @param vals values of defined symbols
* @param res the results of subconstructor resolutions (used for lengths)
* @param hints describes techniques applied by calling solvers
* @param description the description to give to resolved solutions
* @return the resolution
* @throws NeedsBackfillException if the expression refers to an undefined symbol
*/
public abstract AssemblyResolution solve(T exp, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) throws NeedsBackfillException;
/**
* Attempt to get a constant value for the expression
*
* @param exp the expression
* @param vals values of defined symbols
* @param res the results of subconstructor resolutions (used for lengths)
* @return the constant value, or null if it depends on a variable
* @throws NeedsBackfillException if the expression refers to an undefined symbol
*/
public abstract MaskedLong getValue(T exp, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur) throws NeedsBackfillException;
/**
* Determines the length of the subconstructor that would be returned had the expression not
* depended on an undefined symbol.
*
* This is used by the backfilling process to ensure values are written to the correct offset
*
* @param exp the expression
* @param res the results of subconstructor resolutions (used for lengths)
* @return the length of filled in token field(s).
*/
public abstract int getInstructionLength(T exp, Map<Integer, Object> res);
/**
* Compute the value of the expression given the (possibly-intermediate) resolution
*
* @param exp the expression to evaluate
* @param rc the resolution on which to evaluate it
* @return the result
*/
public abstract MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc);
/**
* Register this particular solver with the general expression solver
*
* @param general the general solver
*/
protected void register(RecursiveDescentSolver general) {
this.solver = general;
general.register(tcls, this);
}
}

View file

@ -0,0 +1,101 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.processors.sleigh.expression.UnaryExpression;
/**
* A solver that handles expressions of the form [OP]A
*
* @param <T> the type of expression solved (the operator)
*/
public abstract class AbstractUnaryExpressionSolver<T extends UnaryExpression>
extends AbstractExpressionSolver<T> {
public AbstractUnaryExpressionSolver(Class<T> tcls) {
super(tcls);
}
@Override
public AssemblyResolution solve(T exp, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) throws NeedsBackfillException {
MaskedLong uval = solver.getValue(exp.getUnary(), vals, res, cur);
try {
if (uval != null && uval.isFullyDefined()) {
MaskedLong cval = compute(uval);
if (cval != null) {
return ConstantValueSolver.checkConstAgrees(cval, goal, description);
}
}
return solver.solve(exp.getUnary(), computeInverse(goal), vals, res, cur, hints,
description);
}
/*
* catch (NeedsBackfillException e) { throw e; } catch (SolverException e) { return
* AssemblyResolvedConstructor.error(e.getMessage(), description, null); }
*/
catch (AssertionError e) {
dbg.println("While solving: " + exp + " (" + description + ")");
throw e;
}
}
@Override
public MaskedLong getValue(T exp, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur) throws NeedsBackfillException {
MaskedLong val = solver.getValue(exp.getUnary(), vals, res, cur);
if (val != null) {
return compute(val);
}
return null;
}
/**
* Compute the input value given that the result is known
*
* @note Assumes an involution by default
* @param goal the result
* @return the input value solution
*/
public MaskedLong computeInverse(MaskedLong goal) {
return compute(goal);
}
/**
* Compute the result of applying the operator to the given value
*
* @param val the input value
* @return the result
*/
public abstract MaskedLong compute(MaskedLong val);
@Override
public int getInstructionLength(T exp, Map<Integer, Object> res) {
return solver.getInstructionLength(exp.getUnary(), res);
}
@Override
public MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc) {
MaskedLong val = solver.valueForResolution(exp.getUnary(), rc);
return compute(val);
}
}

View file

@ -0,0 +1,38 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import ghidra.app.plugin.processors.sleigh.expression.AndExpression;
/**
* Solves expressions of the form A & B
*/
public class AndExpressionSolver extends AbstractBinaryExpressionSolver<AndExpression> {
public AndExpressionSolver() {
super(AndExpression.class);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.and(rval);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException {
return goal.invAnd(rval);
}
}

View file

@ -0,0 +1,70 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.processors.sleigh.expression.ConstantValue;
/**
* "Solves" constant expressions
*
* Essentially, this either evaluates successfully when asked for a constant value, or checks that
* the goal is equal to the constant. Otherwise, there is no solution.
*/
public class ConstantValueSolver extends AbstractExpressionSolver<ConstantValue> {
public ConstantValueSolver() {
super(ConstantValue.class);
}
@Override
public AssemblyResolution solve(ConstantValue cv, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) {
MaskedLong value = getValue(cv, vals, res, cur);
return checkConstAgrees(value, goal, description);
}
@Override
public MaskedLong getValue(ConstantValue cv, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur) {
return MaskedLong.fromLong(cv.getValue());
}
@Override
public int getInstructionLength(ConstantValue cv, Map<Integer, Object> res) {
return 0;
}
@Override
public MaskedLong valueForResolution(ConstantValue cv, AssemblyResolvedConstructor rc) {
return MaskedLong.fromLong(cv.getValue());
}
static AssemblyResolution checkConstAgrees(MaskedLong value, MaskedLong goal,
String description) {
if (!value.agrees(goal)) {
return AssemblyResolution.error(
"Constant value " + value + " does not agree with child requirements", description,
null);
}
return AssemblyResolution.nop(description, null);
}
}

View file

@ -0,0 +1,76 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.expression.ContextField;
/**
* Solves expressions of a context register field
*
* Essentially, this just encodes the goal into the field, if it can be represented in the given
* space and format. Otherwise, there is no solution.
*/
public class ContextFieldSolver extends AbstractExpressionSolver<ContextField> {
public ContextFieldSolver() {
super(ContextField.class);
}
@Override
public AssemblyResolution solve(ContextField cf, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) {
assert cf.minValue() == 0; // In case someone decides to do signedness there.
if (!goal.isInRange(cf.maxValue(), cf.hasSignbit())) {
return AssemblyResolution.error("Value " + goal + " is not valid for " + cf,
description, null);
}
AssemblyPatternBlock block = AssemblyPatternBlock.fromContextField(cf, goal);
return AssemblyResolution.contextOnly(block, description, null);
}
@Override
public MaskedLong getValue(ContextField cf, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur) {
if (cur == null) {
return null;
}
return valueForResolution(cf, cur);
}
@Override
public int getInstructionLength(ContextField cf, Map<Integer, Object> res) {
return 0; // this is a context field, not an instruction (token) field
}
@Override
public MaskedLong valueForResolution(ContextField cf, AssemblyResolvedConstructor rc) {
int size = cf.getByteEnd() - cf.getByteStart() + 1;
MaskedLong res = rc.readContext(cf.getByteStart(), size);
res = res.shiftRight(cf.getShift());
if (cf.hasSignbit()) {
res = res.signExtend(cf.getEndBit() - cf.getStartBit() + 1);
}
else {
res = res.zeroExtend(cf.getEndBit() - cf.getStartBit() + 1);
}
return res;
}
}

View file

@ -0,0 +1,39 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
/**
* A set of built-in {@link SolverHint}s
*/
public enum DefaultSolverHint implements SolverHint {
/**
* A multiplication solver is synthesizing goals with repetition
*/
GUESSING_REPETITION,
/**
* A boolean or solver which matches a circular shift is solving the value having guessed a
* shift
*/
GUESSING_CIRCULAR_SHIFT_AMOUNT,
/**
* A left-shift solver is solving the value having guessed a shift
*/
GUESSING_LEFT_SHIFT_AMOUNT,
/**
* A right-shift solver is solving the value having guessed a shift
*/
GUESSING_RIGHT_SHIFT_AMOUNT;
}

View file

@ -0,0 +1,47 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import ghidra.app.plugin.processors.sleigh.expression.DivExpression;
/**
* Solves expressions of the form A / B
*/
public class DivExpressionSolver extends AbstractBinaryExpressionSolver<DivExpression> {
public DivExpressionSolver() {
super(DivExpression.class);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException {
return goal.multiply(rval);
}
@Override
public MaskedLong computeRight(MaskedLong lval, MaskedLong goal) throws SolverException {
if (lval.equals(goal)) {
return MaskedLong.fromLong(1);
}
throw new SolverException(
"Encountered a division of the form A / x = B, where A != B. x has many solutions not easily expressed with masking.");
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.divideSigned(rval);
}
}

View file

@ -0,0 +1,68 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.expression.EndInstructionValue;
/**
* "Solves" expressions of {@code inst_next}
*
* Works like the constant solver, but takes the value of {@code inst_next}, which is given by the
* assembly address and the resulting instruction length.
*
* @note This solver requires backfill.
*/
public class EndInstructionValueSolver extends AbstractExpressionSolver<EndInstructionValue> {
public EndInstructionValueSolver() {
super(EndInstructionValue.class);
}
@Override
public AssemblyResolution solve(EndInstructionValue iv, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) {
throw new AssertionError(
"INTERNAL: Should never be asked to solve for " + AssemblyTreeResolver.INST_NEXT);
}
@Override
public MaskedLong getValue(EndInstructionValue iv, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur)
throws NeedsBackfillException {
Long instNext = vals.get(AssemblyTreeResolver.INST_NEXT);
if (instNext == null) {
throw new NeedsBackfillException(AssemblyTreeResolver.INST_NEXT);
}
return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_NEXT));
}
@Override
public int getInstructionLength(EndInstructionValue iv, Map<Integer, Object> res) {
return 0;
}
@Override
public MaskedLong valueForResolution(EndInstructionValue exp, AssemblyResolvedConstructor rc) {
// Would need to pass in symbol values, and perhaps consider child resolutions.
throw new UnsupportedOperationException(
"The solver should never ask for this value given a resolved constructor.");
}
}

View file

@ -0,0 +1,110 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.processors.sleigh.expression.LeftShiftExpression;
import ghidra.util.Msg;
/**
* Solves expressions of the form A << B
*/
public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver<LeftShiftExpression> {
public LeftShiftExpressionSolver() {
super(LeftShiftExpression.class);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.shiftLeft(rval);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException {
return goal.invShiftLeft(rval);
}
@Override
public MaskedLong computeRight(MaskedLong lval, MaskedLong goal) throws SolverException {
long acc = 0;
long bit = 1;
for (int i = 0; i < 64; i++) {
if (lval.shiftLeft(i).agrees(goal)) {
acc |= bit;
}
bit <<= 1;
}
if (Long.bitCount(acc) == 1) {
return MaskedLong.fromLong(Long.numberOfTrailingZeros(acc));
}
throw new SolverException(
"Cannot solve for the left shift amount: " + goal + " = " + lval + " << L");
}
@Override
protected AssemblyResolution solveTwoSided(LeftShiftExpression exp, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
Set<SolverHint> hints, String description)
throws NeedsBackfillException, SolverException {
// Do not guess the same parameter recursively
if (hints.contains(DefaultSolverHint.GUESSING_LEFT_SHIFT_AMOUNT)) {
// NOTE: Nested left shifts ought to be written as a left shift by a sum
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
}
// Count the number of zeros to the right, and consider this the maximum shift value
// Any higher shift amount would produce too many zeros to the right
int maxShift = Long.numberOfTrailingZeros(goal.val);
// Without making assumptions about the maximum value of the left side, we cannot make
// use of the leading zero count, at least AFAIK. Maybe to better restrict the max???
Set<SolverHint> hintsWithLShift =
SolverHint.with(hints, DefaultSolverHint.GUESSING_LEFT_SHIFT_AMOUNT);
for (int shift = maxShift; shift >= 0; shift--) {
try {
MaskedLong reqr = MaskedLong.fromLong(shift);
MaskedLong reql = computeLeft(reqr, goal);
AssemblyResolution lres =
solver.solve(exp.getLeft(), reql, vals, res, cur, hintsWithLShift, description);
if (lres.isError()) {
throw new SolverException("Solving left failed");
}
AssemblyResolution rres =
solver.solve(exp.getRight(), reqr, vals, res, cur, hints, description);
if (rres.isError()) {
throw new SolverException("Solving right failed");
}
AssemblyResolvedConstructor lsol = (AssemblyResolvedConstructor) lres;
AssemblyResolvedConstructor rsol = (AssemblyResolvedConstructor) rres;
AssemblyResolvedConstructor sol = lsol.combine(rsol);
if (sol == null) {
throw new SolverException(
"Left and right solutions conflict for shift=" + shift);
}
return sol;
}
catch (SolverException | UnsupportedOperationException e) {
Msg.trace(this, "Shift of " + shift + " resulted in " + e);
// try the next
}
}
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
}
}

View file

@ -0,0 +1,33 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import ghidra.app.plugin.processors.sleigh.expression.MinusExpression;
/**
* Solves expressions of the form -A
*/
public class MinusExpressionSolver extends AbstractUnaryExpressionSolver<MinusExpression> {
public MinusExpressionSolver() {
super(MinusExpression.class);
}
@Override
public MaskedLong compute(MaskedLong val) {
return val.negate();
}
}

View file

@ -0,0 +1,205 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.processors.sleigh.expression.MultExpression;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
/**
* Solves expressions of the form A * B
*/
public class MultExpressionSolver extends AbstractBinaryExpressionSolver<MultExpression> {
private interface SolverFunc {
AssemblyResolution solve() throws NeedsBackfillException, SolverException;
}
private static class ResultTracker {
// Only one of these backfill things may be set, and only once
AssemblyResolution firstBackfillRes = null;
NeedsBackfillException firstBackfillExc = null;
// Only one of these error things may be set, and only once
AssemblyResolution firstErrorRes = null;
Throwable firstErrorExc = null;
AssemblyResolution trySolverFunc(SolverFunc func) {
try {
AssemblyResolution sol = func.solve();
if (sol == null) {
return null;
}
if (sol.isBackfill()) {
if (firstBackfillRes == null && firstBackfillExc == null) {
firstBackfillRes = sol;
}
}
else if (sol.isError()) {
if (firstErrorRes == null && firstErrorExc == null) {
firstErrorRes = sol;
}
}
else {
return sol;
}
}
catch (NeedsBackfillException e) {
if (firstBackfillRes == null && firstBackfillExc == null) {
firstBackfillExc = e;
}
}
catch (SolverException | UnsupportedOperationException e) {
if (firstErrorRes == null && firstErrorExc == null) {
firstErrorExc = e;
}
}
return null;
}
AssemblyResolution returnBest(MaskedLong rval, MaskedLong goal)
throws NeedsBackfillException, SolverException {
if (firstBackfillExc != null) {
throw firstBackfillExc;
}
if (firstBackfillRes != null) {
return firstBackfillRes;
}
if (firstErrorExc != null && firstErrorExc instanceof SolverException) {
throw (SolverException) firstErrorExc;
}
if (firstErrorExc != null && firstErrorExc instanceof UnsupportedOperationException) {
throw (UnsupportedOperationException) firstErrorExc;
}
if (firstErrorExc != null) {
throw new AssertionError();
}
if (firstErrorRes != null) {
return firstErrorRes;
}
throw new SolverException(
"Encountered unsolvable multiplication: " + rval + "*x = " + goal);
}
}
public MultExpressionSolver() {
super(MultExpression.class);
}
protected AssemblyResolution tryRep(PatternExpression lexp, MaskedLong rval, MaskedLong repGoal,
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
throws NeedsBackfillException {
MaskedLong lval = repGoal.divideUnsigned(rval);
if (lval.multiply(rval).agrees(goal)) {
return solver.solve(lexp, lval, vals, res, cur, hints, description);
}
return null;
}
@Override
protected AssemblyResolution solveLeftSide(PatternExpression lexp, MaskedLong rval,
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
throws NeedsBackfillException, SolverException {
// Try the usual case first
ResultTracker tracker = new ResultTracker();
AssemblyResolution sol = tracker.trySolverFunc(() -> {
return super.solveLeftSide(lexp, rval, goal, vals, res, cur, hints, description);
});
if (sol != null) {
return sol;
}
if (hints.contains(DefaultSolverHint.GUESSING_REPETITION)) {
return tracker.returnBest(rval, goal);
}
// Handle case of using multiplication for repeating fields
int unksToRight = Long.numberOfTrailingZeros(goal.msk);
int unksToLeft = Long.numberOfLeadingZeros(goal.msk);
int numBitsKnown = Long.SIZE - unksToRight - unksToLeft;
if (Long.bitCount(goal.msk) == numBitsKnown) { // All bits counted
Set<SolverHint> hintsWithRepetition =
SolverHint.with(hints, DefaultSolverHint.GUESSING_REPETITION);
// Assume right truncation
// Need to fill all bits to the right in order to divide
int reps = (unksToRight + numBitsKnown - 1) / numBitsKnown;
long repMsk = goal.msk;
long repVal = goal.val;
for (int i = 0; i < reps; i++) {
repMsk = (repMsk >>> numBitsKnown) | repMsk;
repVal = (repVal >>> numBitsKnown) | repVal;
}
if (reps > 0) {
MaskedLong repRightGoal = MaskedLong.fromMaskAndValue(repMsk, repVal);
sol = tracker.trySolverFunc(() -> {
return tryRep(lexp, rval, repRightGoal, goal, vals, res, cur,
hintsWithRepetition, description);
});
if (sol != null) {
return sol;
}
}
// Assume right and left truncation
// Fill value bits all the way to left, then try adding one mask bit at a time
reps = (unksToLeft + numBitsKnown - 1) / numBitsKnown;
for (int i = 0; i < reps; i++) {
repVal = (repVal << numBitsKnown) | repVal;
}
for (int i = unksToLeft - 1; i >= 0; i--) {
repMsk = -1L >>> i;
MaskedLong repLeftGoal = MaskedLong.fromMaskAndValue(repMsk, repVal);
sol = tracker.trySolverFunc(() -> {
return tryRep(lexp, rval, repLeftGoal, goal, vals, res, cur,
hintsWithRepetition, description);
});
if (sol != null) {
return sol;
}
}
}
return tracker.returnBest(rval, goal);
}
@Override
protected AssemblyResolution solveRightSide(PatternExpression rexp, MaskedLong lval,
MaskedLong goal, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur, Set<SolverHint> hints, String description)
throws NeedsBackfillException, SolverException {
return solveLeftSide(rexp, lval, goal, vals, res, cur, hints, description);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException {
MaskedLong lval = goal.invMultiplyUnsigned(rval);
if (lval.multiply(rval).agrees(goal)) {
return lval;
}
throw new SolverException(
"Encountered unsolvable multiplication: " + rval + "*x = " + goal);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.multiply(rval);
}
}

View file

@ -0,0 +1,50 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
/**
* An exception to indicate that the solution of an expression is not yet known
*
* Furthermore, it cannot be determined whether or not the expression is even solvable. When this
* exception is thrown, a backfill record is placed on the encoded resolution indicating that
* resolver must attempt to solve the expression again, once the encoding is otherwise complete.
* This is needed, most notably, when an encoding depends on the address of the <em>next</em>
* instruction, because the length of the current instruction is not known until resolution has
* finished.
*
* Backfill becomes a possibility when an expression depends on a symbol that is not (yet) defined.
* Thus, as a matter of good record keeping, the exception takes the name of the missing symbol.
*/
public class NeedsBackfillException extends SolverException {
private String symbol;
/**
* Construct a backfill exception, resulting from the given missing symbol name
* @param symbol the missing symbol name
*/
public NeedsBackfillException(String symbol) {
super("The symbol '" + symbol + "' is not yet available");
this.symbol = symbol;
}
/**
* Retrieve the missing symbol name from the original solution attempt
* @return the missing symbol name
*/
public String getSymbol() {
return symbol;
}
}

View file

@ -0,0 +1,33 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import ghidra.app.plugin.processors.sleigh.expression.NotExpression;
/**
* Solves expressions of the form ~A
*/
public class NotExpressionSolver extends AbstractUnaryExpressionSolver<NotExpression> {
public NotExpressionSolver() {
super(NotExpression.class);
}
@Override
public MaskedLong compute(MaskedLong val) {
return val.not();
}
}

View file

@ -0,0 +1,145 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import com.google.common.collect.ImmutableList;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.Constructor;
import ghidra.app.plugin.processors.sleigh.expression.OperandValue;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
import ghidra.app.plugin.processors.sleigh.symbol.TripleSymbol;
//Based on OperandValue#getValue()
/**
* Solves expressions of an operand value
*
* These are a sort of named sub-expression, but they may also specify a shift in encoding.
*/
public class OperandValueSolver extends AbstractExpressionSolver<OperandValue> {
public OperandValueSolver() {
super(OperandValue.class);
}
/**
* Obtains the "defining expression"
*
* This is either the symbols assigned defining expression, or the expression associated with
* its defining symbol.
*
* @return the defining expression, or null if neither is available
*/
protected PatternExpression getDefiningExpression(OperandSymbol sym) {
PatternExpression patexp = sym.getDefiningExpression();
if (patexp != null) {
return patexp;
}
TripleSymbol defSym = sym.getDefiningSymbol();
if (defSym == null) {
return null;
}
patexp = defSym.getPatternExpression();
return patexp;
}
@Override
public AssemblyResolution solve(OperandValue ov, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) throws NeedsBackfillException {
Constructor cons = ov.getConstructor();
OperandSymbol sym = cons.getOperand(ov.getIndex());
PatternExpression patexp = getDefiningExpression(sym);
if (patexp == null) {
if (goal.equals(MaskedLong.ZERO)) {
return AssemblyResolution.nop(description, null);
}
return AssemblyResolution.error("Operand " + sym.getName() +
" is undefined and does not agree with child requirements", description, null);
}
AssemblyResolution result = solver.solve(patexp, goal, vals, res, cur, hints, description);
if (result.isError()) {
AssemblyResolvedError err = (AssemblyResolvedError) result;
return AssemblyResolution.error(err.getError(),
"Solution to " + sym.getName() + " := " + goal + " = " + patexp,
ImmutableList.of(result));
}
// TODO: Shifting here seems like a hack to me.
// I assume this only comes at the top of an expression
AssemblyResolvedConstructor con = (AssemblyResolvedConstructor) result;
int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res);
return con.shift(shamt);
}
@Override
public MaskedLong getValue(OperandValue ov, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur) throws NeedsBackfillException {
Constructor cons = ov.getConstructor();
OperandSymbol sym = cons.getOperand(ov.getIndex());
PatternExpression patexp = getDefiningExpression(sym);
if (patexp == null) {
return MaskedLong.ZERO;
}
int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res);
cur = cur == null ? null : cur.truncate(shamt);
MaskedLong result = solver.getValue(patexp, vals, res, cur);
return result;
}
@Override
public int getInstructionLength(OperandValue ov, Map<Integer, Object> res) {
Constructor cons = ov.getConstructor();
OperandSymbol sym = cons.getOperand(ov.getIndex());
PatternExpression patexp = sym.getDefiningExpression();
if (patexp == null) {
return 0;
}
int length = solver.getInstructionLength(patexp, res);
int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res);
return length + shamt;
}
@Override
public MaskedLong valueForResolution(OperandValue ov, AssemblyResolvedConstructor rc) {
Constructor cons = ov.getConstructor();
OperandSymbol sym = cons.getOperand(ov.getIndex());
PatternExpression patexp = sym.getDefiningExpression();
if (patexp != null) {
// We're good to go
}
else {
TripleSymbol defSym = sym.getDefiningSymbol();
if (defSym != null) {
patexp = defSym.getPatternExpression();
}
}
if (patexp == null) {
return MaskedLong.ZERO; // TODO: ZERO or UNKS?
}
// TODO: Can just shift the rc to the left the appropriate number of bytes.
// Would only affect the instruction block.
// Since I'm using this just for context, ignore shifting for now.
//int shamt = AssemblyTreeResolver.computeOffset(sym, cons, rc.children);
// Children would be null here, anyway.
return solver.valueForResolution(patexp, rc);
// NOTE: To be paranoid, I could check for the existence of TokenField in the expression
// And also check if a shift would be performed.
}
}

View file

@ -0,0 +1,356 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
import ghidra.app.plugin.processors.sleigh.ParserWalker;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.plugin.processors.sleigh.expression.*;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.util.Msg;
import ghidra.xml.XmlPullParser;
/**
* Solves expressions of the form A | B
*/
public class OrExpressionSolver extends AbstractBinaryExpressionSolver<OrExpression> {
static final PatternExpression DUMMY = new PatternExpression() {
@Override
public long getValue(ParserWalker walker) throws MemoryAccessException {
return 0;
}
@Override
public void restoreXml(XmlPullParser parser, SleighLanguage lang) {
// Dummy intentionally left empty
}
@Override
public String toString() {
return null;
}
};
public OrExpressionSolver() {
super(OrExpression.class);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.or(rval);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException {
return goal.invOr(rval);
}
protected AssemblyResolution tryCatenationExpression(OrExpression exp, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
Set<SolverHint> hints, String description) throws SolverException {
/*
* If OR is being used to concatenate fields, then we can solve with some symbolic
* manipulation. We'll descend to see if this is a tree of ORs with SHIFTs or fields at the
* leaves. If it is, we can derive the layout of the composite field and solve each
* component independently.
*/
Map<Long, PatternExpression> fields = new TreeMap<>();
fields.put(0L, new ConstantValue(0));
collectComponentsOr(exp, 0, fields, vals, res, cur);
fields.put(64L, new ConstantValue(0));
long lo = 0;
PatternExpression fieldExp = null;
AssemblyResolvedConstructor result = AssemblyResolution.nop(description, null);
try (DbgCtx dc = dbg.start("Trying solution of field catenation")) {
dbg.println("Original: " + goal + ":= " + exp);
for (Map.Entry<Long, PatternExpression> ent : fields.entrySet()) {
long hi = ent.getKey();
if (hi == 0) {
fieldExp = ent.getValue();
continue;
}
dbg.println("Part(" + hi + ":" + lo + "]:= " + fieldExp);
MaskedLong part = goal.shiftLeft(64 - hi).shiftRightPositional(64 - hi + lo);
dbg.println("Solving: " + part + ":= " + fieldExp);
AssemblyResolution sol = solver.solve(fieldExp, part, vals, res, cur, hints,
description + " with shift " + lo);
if (sol.isError()) {
return sol;
}
result = result.combine((AssemblyResolvedConstructor) sol);
if (result == null) {
throw new SolverException("Solutions to individual fields produced conflict");
}
lo = hi;
fieldExp = ent.getValue();
}
}
return result;
}
protected AssemblyResolution tryCircularShiftExpression(OrExpression exp, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
Set<SolverHint> hints, String description) throws SolverException {
// If OR is being used to accomplish a circular shift, then we can apply a clever solver.
// We'll match against the patterns: (f << (C - g)) | (f >> g)
// (f >> (C - g)) | (f << g)
int leftdir; // 0 is left, 1 is right
// "left" and "right" are about to get really overloaded....
if ((exp.getLeft() instanceof LeftShiftExpression &&
exp.getRight() instanceof RightShiftExpression)) {
leftdir = 0;
}
else if (exp.getLeft() instanceof RightShiftExpression &&
exp.getRight() instanceof LeftShiftExpression) {
leftdir = 1;
}
else {
throw new SolverException("Not a circular shift");
}
BinaryExpression left = (BinaryExpression) exp.getLeft();
BinaryExpression right = (BinaryExpression) exp.getRight();
PatternExpression expValu1 = left.getLeft();
PatternExpression expValu2 = right.getLeft();
if (!expValu1.equals(expValu2)) {
throw new SolverException("Not a circular shift");
}
PatternExpression expShift = null;
int size = -1, dir = -1;
PatternExpression s1 = left.getRight();
PatternExpression s2 = right.getRight();
if (s1 instanceof SubExpression) {
SubExpression sub = (SubExpression) s1;
expShift = sub.getRight();
if (expShift.equals(s2)) {
PatternExpression c = sub.getLeft();
MaskedLong cc = solver.getValue(c, vals, res, cur);
if (cc.isFullyDefined()) {
// the left side has the subtraction, so the overall shift is the opposite
// of the direction of the shift on the left
dir = 1 - leftdir;
size = (int) cc.longValue();
}
}
}
if (dir == -1 && s2 instanceof SubExpression) {
SubExpression sub = (SubExpression) s2;
expShift = sub.getRight();
if (expShift.equals(s1)) {
PatternExpression c = sub.getLeft();
MaskedLong cc = solver.getValue(c, vals, res, cur);
if (cc.isFullyDefined()) {
// the right side has the subtraction, so the overall shift is the same
// as the direction of the shift on the left
dir = leftdir;
size = (int) cc.longValue();
}
}
}
if (dir == -1) {
throw new SolverException("Not a circular shift (or of known size)");
}
// At this point, I know it's a circular shift
dbg.println("Identified circular shift: value:= " + expValu1 + ", shift:= " + expShift +
", size:= " + size + ", dir:= " + (dir == 1 ? "right" : "left"));
return solveLeftCircularShift(expValu1, expShift, size, dir, goal, vals, res, cur, hints,
description);
}
protected AssemblyResolution solveLeftCircularShift(PatternExpression expValue,
PatternExpression expShift, int size, int dir, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) throws NeedsBackfillException, SolverException {
MaskedLong valValue = solver.getValue(expValue, vals, res, cur);
MaskedLong valShift = solver.getValue(expShift, vals, res, cur);
if (valValue != null && !valValue.isFullyDefined()) {
if (!valValue.isFullyUndefined()) {
dbg.println("Partially-defined f for left circular shift solver: " + valValue);
}
valValue = null;
}
if (valShift != null && valShift.isFullyDefined()) {
if (!valShift.isFullyUndefined()) {
dbg.println("Partially-defined g for left circular shift solver: " + valShift);
}
valShift = null;
}
if (valValue != null && valShift != null) {
throw new AssertionError("Should not have constants when solving special forms");
}
else if (valValue != null) {
return solver.solve(expShift, computeCircShiftG(valValue, size, dir, goal), vals, res,
cur, hints, description);
}
else if (valShift != null) {
return solver.solve(expValue, computeCircShiftF(valShift, size, dir, goal), vals, res,
cur, hints, description);
}
// Oiy. Try guessing the shift amount, starting at 0
if (hints.contains(DefaultSolverHint.GUESSING_CIRCULAR_SHIFT_AMOUNT)) {
throw new SolverException("Already guessing circular shift amount. " +
"Try to express a double-shift as a shift by sum.");
}
Set<SolverHint> hintsWithCircularShift =
SolverHint.with(hints, DefaultSolverHint.GUESSING_CIRCULAR_SHIFT_AMOUNT);
for (int shift = 0; shift < size; shift++) {
try {
MaskedLong reqShift = MaskedLong.fromLong(shift);
MaskedLong reqValue = computeCircShiftF(reqShift, size, dir, goal);
AssemblyResolution resValue = solver.solve(expValue, reqValue, vals, res, cur,
hintsWithCircularShift, description);
if (resValue.isError()) {
AssemblyResolvedError err = (AssemblyResolvedError) resValue;
throw new SolverException("Solving f failed: " + err.getError());
}
AssemblyResolution resShift =
solver.solve(expShift, reqShift, vals, res, cur, hints, description);
if (resShift.isError()) {
AssemblyResolvedError err = (AssemblyResolvedError) resShift;
throw new SolverException("Solving g failed: " + err.getError());
}
AssemblyResolvedConstructor solValue = (AssemblyResolvedConstructor) resValue;
AssemblyResolvedConstructor solShift = (AssemblyResolvedConstructor) resShift;
AssemblyResolvedConstructor sol = solValue.combine(solShift);
if (sol == null) {
throw new SolverException(
"value and shift solutions conflict for shift=" + shift);
}
return sol;
}
catch (SolverException | UnsupportedOperationException e) {
Msg.trace(this, "Shift of " + shift + " resulted in " + e);
// try the next
}
}
throw new SolverException(
"Could not solve circular shift with variable bits and shift amount");
}
protected MaskedLong computeCircShiftG(MaskedLong fval, int size, int dir, MaskedLong goal)
throws SolverException {
long acc = 0;
//long bit = 1;
for (int i = 0; i < size; i++) {
if (fval.shiftCircular(i, size, dir).agrees(goal)) {
return MaskedLong.fromLong(i);
//acc |= bit;
}
//bit <<= 1;
}
if (Long.bitCount(acc) == 1) {
return MaskedLong.fromLong(Long.numberOfTrailingZeros(acc));
}
throw new SolverException("Cannot solve for the circular shift amount");
}
protected MaskedLong computeCircShiftF(MaskedLong gval, int size, int dir, MaskedLong goal) {
// Should just be the plain ol' opposite
return goal.shiftCircular(gval, size, 1 - dir);
}
@Override
protected AssemblyResolution solveTwoSided(OrExpression exp, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
Set<SolverHint> hints, String description)
throws NeedsBackfillException, SolverException {
try {
return tryCatenationExpression(exp, goal, vals, res, cur, hints, description);
}
catch (Exception e) {
dbg.println("while solving: " + goal + "=:" + exp);
dbg.println(e.getMessage());
}
try {
return tryCircularShiftExpression(exp, goal, vals, res, cur, hints, description);
}
catch (Exception e) {
dbg.println("while solving: " + goal + "=:" + exp);
dbg.println(e.getMessage());
}
throw new SolverException("Could not solve two-sided OR");
}
void collectComponents(PatternExpression exp, long shift,
Map<Long, PatternExpression> components, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur) throws SolverException {
if (exp instanceof OrExpression) {
collectComponentsOr((OrExpression) exp, shift, components, vals, res, cur);
}
else if (exp instanceof LeftShiftExpression) {
collectComponentsLeft((LeftShiftExpression) exp, shift, components, vals, res, cur);
}
else if (exp instanceof RightShiftExpression) {
collectComponentsRight((RightShiftExpression) exp, shift, components, vals, res, cur);
}
else {
assert shift < 64;
components.put(shift, exp);
}
}
void collectComponentsOr(OrExpression exp, long shift, Map<Long, PatternExpression> components,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur)
throws SolverException {
collectComponents(exp.getLeft(), shift, components, vals, res, cur);
collectComponents(exp.getRight(), shift, components, vals, res, cur);
}
void collectComponentsLeft(LeftShiftExpression exp, long shift,
Map<Long, PatternExpression> components, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur) throws SolverException {
MaskedLong adj;
try {
adj = solver.getValue(exp.getRight(), vals, res, cur);
}
catch (NeedsBackfillException e) {
throw new SolverException("Variable shifts break field catenation solver", e);
}
if (adj == null || !adj.isFullyDefined()) {
throw new SolverException("Variable shifts break field catenation solver");
}
collectComponents(exp.getLeft(), shift + adj.val, components, vals, res, cur);
}
void collectComponentsRight(RightShiftExpression exp, long shift,
Map<Long, PatternExpression> components, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur) throws SolverException {
MaskedLong adj;
try {
adj = solver.getValue(exp.getRight(), vals, res, cur);
}
catch (NeedsBackfillException e) {
throw new SolverException("Variable shifts break field catenation solver", e);
}
if (adj == null || !adj.isFullyDefined()) {
throw new SolverException("Variable shifts break field catenation solver");
}
collectComponents(exp.getLeft(), shift - adj.val, components, vals, res, cur);
}
}

View file

@ -0,0 +1,38 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import ghidra.app.plugin.processors.sleigh.expression.PlusExpression;
/**
* Solves expressions of the form A + B
*/
public class PlusExpressionSolver extends AbstractBinaryExpressionSolver<PlusExpression> {
public PlusExpressionSolver() {
super(PlusExpression.class);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) {
return goal.subtract(rval);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.add(rval);
}
}

View file

@ -0,0 +1,211 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.*;
import com.google.common.collect.ImmutableSet;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
/**
* This singleton class seeks solutions to {@link PatternExpression}s
*
* It is called naive, because it does not perform algebraic transformations. Rather, it attempts to
* fold constants, assuming there is a single variable in the expression, modifying the goal as it
* descends toward that variable. If it finds a variable, i.e., token or context field, it encodes
* the solution, positioned in the field. If the expression is constant, it checks that the goal
* agrees. If not, an error is returned.
*
* @TODO This whole mechanism ought to just be factored directly into {@link PatternExpression}.
*/
public class RecursiveDescentSolver {
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
private static final RecursiveDescentSolver solver = new RecursiveDescentSolver();
// A mapping from each subclass of PatternExpression to the appropriate solver
protected Map<Class<?>, AbstractExpressionSolver<?>> registry = new HashMap<>();
{
// Register all the solvers. Just one instance will do.
new AndExpressionSolver().register(this);
new ConstantValueSolver().register(this);
new ContextFieldSolver().register(this);
new DivExpressionSolver().register(this);
new EndInstructionValueSolver().register(this);
new LeftShiftExpressionSolver().register(this);
new MinusExpressionSolver().register(this);
new MultExpressionSolver().register(this);
new NotExpressionSolver().register(this);
new OperandValueSolver().register(this);
new OrExpressionSolver().register(this);
new PlusExpressionSolver().register(this);
new RightShiftExpressionSolver().register(this);
new StartInstructionValueSolver().register(this);
new SubExpressionSolver().register(this);
new TokenFieldSolver().register(this);
new XorExpressionSolver().register(this);
}
/**
* Obtain an instance of the naive solver
*
* @return the singleton instance
*/
public static RecursiveDescentSolver getSolver() {
return solver;
}
/**
* Register a solver for a particular subclass of {@link PatternExpression}
*
* @param tcls the subclass the solver can handle
* @param s the solver for the subclass
*/
protected <T extends PatternExpression> void register(Class<T> tcls,
AbstractExpressionSolver<T> s) {
registry.put(tcls, s);
}
/**
* Retrieve the registered solver for a given subclass of {@link PatternExpression}
*
* @param tcls the subclass to solve
* @return the registered solver
*/
protected <T extends PatternExpression> AbstractExpressionSolver<T> getRegistered(
Class<?> tcls) {
@SuppressWarnings("unchecked")
AbstractExpressionSolver<T> s = (AbstractExpressionSolver<T>) registry.get(tcls);
if (s == null) {
throw new RuntimeException("No registered solver for class " + tcls);
}
return s;
}
/**
* Solve a given expression, passing hints
*
* @param exp the expression to solve
* @param goal the desired output (modulo a mask) of the expression
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
* @param res resolved subconstructors, by operand index (see method details)
* @param hints describes techniques applied by calling solvers
* @param description a description to attached to the encoded solution
* @return the encoded solution
* @throws NeedsBackfillException a solution may exist, but a required symbol is missing
* @throws SolverException a solution does not exist
*/
protected AssemblyResolution solve(PatternExpression exp, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
Set<SolverHint> hints, String description) throws NeedsBackfillException {
try {
return getRegistered(exp.getClass()).solve(exp, goal, vals, res, cur, hints,
description);
}
catch (UnsupportedOperationException e) {
dbg.println("Error solving " + exp + " = " + goal);
throw e;
}
}
/**
* Solve a given expression, assuming it outputs a given masked value
*
* From a simplified perspective, we need only the expression and the desired value to solve it.
* Generally speaking, the expression may have only contain a single variable, and the encoded
* result represents that single variable. It must be absorbed into the overall instruction
* and/or context encoding.
*
* More realistically, however, these expressions may depend on quite a bit of extra
* information. For example, PC-relative encodings (i.e., those involving {@code inst_start} or
* {@code inst_next}, need to know the starting address of the resulting instruction. {@code
* inst_start} must be provided to the solver by the assembler. {@code inst_next} cannot be
* known until the instruction length is known. Thus, expressions using it always result in a
* {@link NeedsBackfillException}. The symbols, when known, are provided to the solver via the
* {@code vals} parameter.
*
* Expressions involving {@link OperandValue}s are a little more complicated, because they
* specify an offset that affects its encoding in the instruction. To compute this offset, the
* lengths of other surrounding operands must be known. Thus, when solving a context change for
* a given constructor, its resolved subconstructors must be provided to the solver via the
* {@code res} parameter.
*
* @param exp the expression to solve
* @param goal the desired output (modulo a mask) of the expression
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
* @param res resolved subconstructors, by operand index (see method details)
* @param description a description to attached to the encoded solution
* @return the encoded solution
* @throws NeedsBackfillException a solution may exist, but a required symbol is missing
* @throws SolverException a solution does not exist
*/
public AssemblyResolution solve(PatternExpression exp, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, String description)
throws NeedsBackfillException {
return solve(exp, goal, vals, res, cur, ImmutableSet.of(), description);
}
/**
* Attempt to fold a given expression (or sub-expression) into a single constant.
*
* @param exp the (sub-)expression to fold
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
* @param res resolved subconstructors, by operand index (see
* {@link #solve(PatternExpression, MaskedLong, Map, Map, String)})
* @return the masked solution
* @throws NeedsBackfillException it may be folded, but a required symbol is missing
*/
protected <T extends PatternExpression> MaskedLong getValue(T exp, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur)
throws NeedsBackfillException {
MaskedLong value = getRegistered(exp.getClass()).getValue(exp, vals, res, cur);
dbg.println("Expression: " + value + " =: " + exp);
return value;
}
/**
* Determine the length of the instruction part of the encoded solution to the given expression
*
* This is used to keep operands in their appropriate position when backfilling becomes
* applicable. Normally, the instruction length is taken from the encoding of a solution, but if
* the solution cannot be determined yet, the instruction length must still be obtained.
*
* The length can be determined by finding token fields in the expression.
*
* @param exp the expression, presumably containing a token field
* @param res resolved subconstructors, by operand index (see
* {@link #solve(PatternExpression, MaskedLong, Map, Map, String)})
* @return the anticipated length, in bytes, of the instruction encoding
*/
public int getInstructionLength(PatternExpression exp, Map<Integer, Object> res) {
return getRegistered(exp.getClass()).getInstructionLength(exp, res);
}
/**
* Compute the value of an expression given a (possibly-intermediate) resolution
*
* @param exp the expression to evaluate
* @param rc the resolution on which to evalute it
* @return the result
*/
public MaskedLong valueForResolution(PatternExpression exp, AssemblyResolvedConstructor rc) {
return getRegistered(exp.getClass()).valueForResolution(exp, rc);
}
}

View file

@ -0,0 +1,110 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor;
import ghidra.app.plugin.processors.sleigh.expression.RightShiftExpression;
import ghidra.util.Msg;
/**
* Solves expressions of the form A >> B
*/
public class RightShiftExpressionSolver
extends AbstractBinaryExpressionSolver<RightShiftExpression> {
public RightShiftExpressionSolver() {
super(RightShiftExpression.class);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.shiftRight(rval);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException {
return goal.invShiftRight(rval);
}
@Override
public MaskedLong computeRight(MaskedLong lval, MaskedLong goal) throws SolverException {
long acc = 0;
long bit = 1;
for (int i = 0; i < 64; i++) {
if (lval.shiftRight(i).agrees(goal)) {
acc |= bit;
}
bit <<= 1;
}
if (Long.bitCount(acc) == 1) {
return MaskedLong.fromLong(Long.numberOfTrailingZeros(acc));
}
throw new SolverException(
"Cannot solve for the right shift amount: " + goal + " = " + lval + " >> R");
}
@Override
protected AssemblyResolution solveTwoSided(RightShiftExpression exp, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
Set<SolverHint> hints, String description)
throws NeedsBackfillException, SolverException {
// Do the similar thing as in {@link LeftShiftExpressionSolver}
// Do not guess the same parameter recursively
if (hints.contains(DefaultSolverHint.GUESSING_RIGHT_SHIFT_AMOUNT)) {
// NOTE: Nested right shifts ought to be written as a right shift by a sum
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
}
int maxShift = Long.numberOfLeadingZeros(goal.val);
Set<SolverHint> hintsWithRShift =
SolverHint.with(hints, DefaultSolverHint.GUESSING_RIGHT_SHIFT_AMOUNT);
for (int shift = 0; shift <= maxShift; shift++) {
try {
MaskedLong reqr = MaskedLong.fromLong(shift);
MaskedLong reql = computeLeft(reqr, goal);
AssemblyResolution lres =
solver.solve(exp.getLeft(), reql, vals, res, cur, hintsWithRShift, description);
if (lres.isError()) {
throw new SolverException("Solving left failed");
}
AssemblyResolution rres =
solver.solve(exp.getRight(), reqr, vals, res, cur, hints, description);
if (rres.isError()) {
throw new SolverException("Solving right failed");
}
AssemblyResolvedConstructor lsol = (AssemblyResolvedConstructor) lres;
AssemblyResolvedConstructor rsol = (AssemblyResolvedConstructor) rres;
AssemblyResolvedConstructor sol = lsol.combine(rsol);
if (sol == null) {
throw new SolverException(
"Left and right solutions conflict for shift=" + shift);
}
return sol;
}
catch (SolverException | UnsupportedOperationException e) {
Msg.trace(this, "Shift of " + shift + " resulted in " + e);
// try the next
}
}
return super.solveTwoSided(exp, goal, vals, res, cur, hints, description);
}
}

View file

@ -0,0 +1,29 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
/**
* An exception that indicates no solution is possible
*/
public class SolverException extends Exception {
public SolverException(String message) {
super(message);
}
public SolverException(String message, Throwable cause) {
super(message, cause);
}
}

View file

@ -0,0 +1,42 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Set;
import com.google.common.collect.ImmutableSet;
/**
* A type for solver hints
*
* Hints inform "sub-"solvers of the techniques already being applied by the calling solvers. This
* helps prevent situations where, e.g., two multiplication solvers (applied to repeated or nested
* multiplication) both attempt to synthesize new goals for repetition. This sort of expression is
* common when decoding immediates in the AArch64 specification.
*
* Using an interface implemented by an enumeration (instead of just using the enumeration directly)
* eases expansion by extension without modifying the core code.
*
* @see DefaultSolverHint
*/
public interface SolverHint {
static Set<SolverHint> with(Set<SolverHint> set, SolverHint... plus) {
ImmutableSet.Builder<SolverHint> hints = ImmutableSet.builder();
hints.addAll(set);
hints.add(plus);
return hints.build();
}
}

View file

@ -0,0 +1,62 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.expression.StartInstructionValue;
/**
* "Solves" expression of {@code inst_start}
*
* Works like the constant solver, but takes the value of {@code inst_start}, which is given by the
* assembly address.
*/
public class StartInstructionValueSolver extends AbstractExpressionSolver<StartInstructionValue> {
public StartInstructionValueSolver() {
super(StartInstructionValue.class);
}
@Override
public AssemblyResolution solve(StartInstructionValue iv, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
Set<SolverHint> hints, String description) {
throw new AssertionError(
"INTERNAL: Should never be asked to solve for " + AssemblyTreeResolver.INST_START);
}
@Override
public MaskedLong getValue(StartInstructionValue iv, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur) {
return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_START));
}
@Override
public int getInstructionLength(StartInstructionValue exp, Map<Integer, Object> res) {
return 0;
}
@Override
public MaskedLong valueForResolution(StartInstructionValue exp,
AssemblyResolvedConstructor rc) {
// Would need to pass in symbol values.
throw new UnsupportedOperationException(
"The solver should never ask for this value given a resolved constructor.");
}
}

View file

@ -0,0 +1,43 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import ghidra.app.plugin.processors.sleigh.expression.SubExpression;
/**
* Solves expressions of the form A - B
*/
public class SubExpressionSolver extends AbstractBinaryExpressionSolver<SubExpression> {
public SubExpressionSolver() {
super(SubExpression.class);
}
@Override
public MaskedLong computeLeft(MaskedLong rval, MaskedLong goal) throws SolverException {
return rval.add(goal);
}
@Override
public MaskedLong computeRight(MaskedLong lval, MaskedLong goal) throws SolverException {
return lval.subtract(goal);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.subtract(rval);
}
}

View file

@ -0,0 +1,79 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import java.util.Map;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.app.plugin.processors.sleigh.expression.TokenField;
/**
* Solves expressions of a token (instruction encoding) field
*
* Essentially, this just encodes the goal into the field, if it can be represented in the given
* space and format. Otherwise, there is no solution.
*/
public class TokenFieldSolver extends AbstractExpressionSolver<TokenField> {
public TokenFieldSolver() {
super(TokenField.class);
}
@Override
public AssemblyResolution solve(TokenField tf, MaskedLong goal, Map<String, Long> vals,
Map<Integer, Object> res, AssemblyResolvedConstructor cur, Set<SolverHint> hints,
String description) {
assert tf.minValue() == 0; // In case someone decides to do signedness there.
if (!goal.isInRange(tf.maxValue(), tf.hasSignbit())) {
return AssemblyResolution.error("Value " + goal + " is not valid for " + tf,
description, null);
}
AssemblyPatternBlock block = AssemblyPatternBlock.fromTokenField(tf, goal);
return AssemblyResolution.instrOnly(block, description, null);
}
@Override
public MaskedLong getValue(TokenField tf, Map<String, Long> vals, Map<Integer, Object> res,
AssemblyResolvedConstructor cur) {
if (cur == null) {
return null;
}
return valueForResolution(tf, cur);
}
@Override
public int getInstructionLength(TokenField tf, Map<Integer, Object> res) {
return tf.getByteEnd() + 1;
}
@Override
public MaskedLong valueForResolution(TokenField tf, AssemblyResolvedConstructor rc) {
int size = tf.getByteEnd() - tf.getByteStart() + 1;
MaskedLong res = rc.readInstruction(tf.getByteStart(), size);
if (!tf.isBigEndian()) {
res = res.byteSwap(size);
}
res = res.shiftRight(tf.getShift());
if (tf.hasSignbit()) {
res = res.signExtend(tf.getBitEnd() - tf.getBitStart() + 1);
}
else {
res = res.zeroExtend(tf.getBitEnd() - tf.getBitStart() + 1);
}
return res;
}
}

View file

@ -0,0 +1,38 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.expr;
import ghidra.app.plugin.processors.sleigh.expression.XorExpression;
/**
* Solves expressions of the form A $xor B
*/
public class XorExpressionSolver extends AbstractBinaryExpressionSolver<XorExpression> {
public XorExpressionSolver() {
super(XorExpression.class);
}
@Override
public MaskedLong computeLeft(MaskedLong other, MaskedLong goal) {
return goal.xor(other);
}
@Override
public MaskedLong compute(MaskedLong lval, MaskedLong rval) {
return lval.xor(rval);
}
}

View file

@ -0,0 +1,280 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.collections4.MultiValuedMap;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyTerminal;
import ghidra.generic.util.datastruct.TreeSetValuedTreeMap;
/**
* Defines a context-free grammar, usually for the purpose of parsing mnemonic assembly instructions
*
* As in classic computer science, a CFG consists of productions of non-terminals and terminals.
* The left-hand side of the a production must be a single non-terminal, but the right-hand side
* may be any string of symbols. To avoid overloading the term "String," here we call it a
* "Sentential."
*
* To define a grammar, simply construct an appropriate subclass (probably {@link AssemblyGrammar})
* and call {@link #addProduction(AbstractAssemblyProduction)} or
* {@link #addProduction(AssemblyNonTerminal, AssemblySentential). The grammar object will collect
* the non-terminals and terminals.
*
* By default, the start symbol is taken from the left-hand side of the first production added to
* the grammar.
*
* @param <NT> the type of non-terminals
* @param <T> the type of terminals
* @param <P> the type of productions, which must have the same types of (non-)terminals.
*/
public abstract class AbstractAssemblyGrammar<NT extends AssemblyNonTerminal, P extends AbstractAssemblyProduction<NT>>
implements Iterable<P> {
protected final MultiValuedMap<String, P> productions = new TreeSetValuedTreeMap<>();
protected final List<P> prodList = new ArrayList<>();
protected final Map<String, NT> nonterminals = new TreeMap<>();
protected final Map<String, AssemblyTerminal> terminals = new TreeMap<>();
protected final Map<String, AssemblySymbol> symbols = new TreeMap<>();
protected String startName;
/**
* Because a subclass may have a different type of production, it must provide a mechanism for
* constructing an appropriate production given just the LHS and RHS.
*
* @param lhs the left-hand side of the production
* @param rhs the right-hand side of the production
* @return the constructed production
*/
protected abstract P newProduction(NT lhs, AssemblySentential<NT> rhs);
/**
* Add a production to the grammar
* @param lhs the left-hand side
* @param rhs the right-hand side
*/
public void addProduction(NT lhs, AssemblySentential<NT> rhs) {
P prod = newProduction(lhs, rhs);
addProduction(prod);
}
/**
* Add a production to the grammar
* @param prod the production
*/
public void addProduction(P prod) {
String lname = prod.getName();
if (productions.put(lname, prod)) {
prod.idx = prodList.size();
prodList.add(prod);
}
NT lhs = prod.getLHS();
if (startName == null) {
setStart(lhs);
}
String lhsName = lhs.getName();
symbols.put(lhsName, lhs);
nonterminals.put(lhsName, lhs);
for (AssemblySymbol sym : prod) {
if (sym instanceof AssemblyNonTerminal) {
@SuppressWarnings("unchecked")
NT nt = (NT) sym;
String name = nt.getName();
symbols.put(name, nt);
nonterminals.put(name, nt);
}
else {
AssemblyTerminal t = (AssemblyTerminal) sym;
String name = t.getName();
symbols.put(name, t);
terminals.put(name, t);
}
}
}
/**
* Check if the given production is purely recursive, i.e., of the form I => I
* @param prod the production to check
* @return true iff the production is purely recursive
*/
protected boolean isPureRecursive(P prod) {
if (prod.size() != 1) {
return false;
}
if (!prod.getLHS().equals(prod.getRHS().get(0))) {
return false;
}
return true;
}
/**
* Change the start symbol for the grammar
* @param nt the new start symbol
*/
public void setStart(AssemblyNonTerminal nt) {
setStartName(nt == null ? null : nt.getName());
}
/**
* Change the start symbol for the grammar
* @param startName the name of the new start symbol
*/
public void setStartName(String startName) {
this.startName = startName;
}
/**
* Get the start symbol for the grammar
* @return the start symbol
*/
public NT getStart() {
return nonterminals.get(startName);
}
/**
* Get the name of the start symbol for the grammar
* @return the name of the start symbol
*/
public String getStartName() {
return startName;
}
/**
* Get the named non-terminal
* @param name the name of the desired non-terminal
* @return the non-terminal, or null if it is not in this grammar
*/
public NT getNonTerminal(String name) {
return nonterminals.get(name);
}
/**
* Get the named terminal
* @param name the name of the desired terminal
* @return the terminal, or null if it is not in this grammar
*/
public AssemblyTerminal getTerminal(String name) {
return terminals.get(name);
}
/**
* Add all the productions of a given grammar to this one
* @param that the grammar whose productions to add
*/
public void combine(AbstractAssemblyGrammar<NT, P> that) {
for (P prod : that.prodList) {
addProduction(prod);
}
}
/**
* Print the productions of this grammar to the given stream
* @param out the stream
*/
public void print(PrintStream out) {
for (P prod : prodList) {
out.println(prod);
}
}
/**
* Check that the grammar is consistent
*
* The grammar is consistent if every non-terminal appearing in the grammar, also appears as
* the left-hand side of some production. If not, such non-terminals are said to be undefined.
* @throws AssemblyGrammarException the grammar is inconsistent, i.e., contains undefined
* non-terminals.
*/
public void verify() throws AssemblyGrammarException {
if (!productions.containsKey(startName)) {
throw new AssemblyGrammarException("Start symbol has no defining production");
}
for (P prod : productions.values()) {
for (AssemblySymbol sym : prod) {
if (sym instanceof AssemblyNonTerminal) {
AssemblyNonTerminal nt = (AssemblyNonTerminal) sym;
if (!(productions.containsKey(nt.getName()))) {
throw new AssemblyGrammarException("Grammar has non-terminal '" +
nt.getName() + "' without a defining production");
}
}
}
}
}
/**
* Traverse the productions
*/
@Override
public Iterator<P> iterator() {
return Collections.unmodifiableList(prodList).iterator();
}
/**
* Get the non-terminals
* @return
*/
public Collection<NT> nonTerminals() {
return Collections.unmodifiableCollection(nonterminals.values());
}
/**
* Get the terminals
* @return
*/
public Collection<AssemblyTerminal> terminals() {
return Collections.unmodifiableCollection(terminals.values());
}
/**
* Get all productions where the left-hand side non-terminal has the given name
* @param name the name of the non-terminal
* @return all productions "defining" the named non-terminal
*/
public Collection<P> productionsOf(String name) {
if (!productions.containsKey(name)) {
return Collections.emptySet();
}
return productions.get(name);
}
/**
* Get all productions where the left-hand side is the given non-terminal
* @param nt the non-terminal whose defining productions to find
* @return all productions "defining" the given non-terminal
*/
public Collection<P> productionsOf(AssemblyNonTerminal nt) {
return productionsOf(nt.getName());
}
/**
* Check if the grammar contains any symbol with the given name
* @param name the name to find
* @return true iff a terminal or non-terminal has the given name
*/
public boolean contains(String name) {
return symbols.containsKey(name);
}
}

View file

@ -0,0 +1,141 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import java.util.List;
import org.apache.commons.collections4.list.AbstractListDecorator;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
/**
* Defines a production in a context-free grammar, usually for parsing mnemonic assembly
*
* @see AbstractAssemblyGrammar
* @param <NT> the type of non-terminals
* @param <T> the type of terminals
*/
public abstract class AbstractAssemblyProduction<NT extends AssemblyNonTerminal>
extends AbstractListDecorator<AssemblySymbol>
implements Comparable<AbstractAssemblyProduction<NT>> {
private final NT lhs;
private final AssemblySentential<NT> rhs;
int idx = -1;
/**
* Construct a production with the given LHS and RHS
* @param lhs the left-hand side
* @param rhs the right-hand side
*/
public AbstractAssemblyProduction(NT lhs, AssemblySentential<NT> rhs) {
rhs.finish();
this.lhs = lhs;
this.rhs = rhs;
}
@Override
protected List<AssemblySymbol> decorated() {
return rhs;
}
/**
* Get the index of the production
*
* Instead of using deep comparison, the index is often used as the identify of the production
* within a grammar.
* @return the index
*/
public int getIndex() {
return idx;
}
/**
* Get the left-hand side
* @return the LHS
*/
public NT getLHS() {
return lhs;
}
/**
* Get the right-hand side
* @return the RHS
*/
public AssemblySentential<NT> getRHS() {
return rhs;
}
@Override
public String toString() {
String result = idx + ". " + lhs + " => " + rhs;
return result;
}
@Override
public boolean equals(Object that) {
if (!(that instanceof AbstractAssemblyProduction)) {
return false;
}
AbstractAssemblyProduction<?> aap = (AbstractAssemblyProduction<?>) that;
if (!this.lhs.equals(aap.lhs)) {
return false;
}
if (!this.rhs.equals(aap.rhs)) {
return false;
}
return true;
}
@Override
public int compareTo(AbstractAssemblyProduction<NT> that) {
int result;
result = this.lhs.compareTo(that.lhs);
if (result != 0) {
return result;
}
result = this.rhs.compareTo(that.rhs);
if (result != 0) {
return result;
}
return 0;
}
@Override
public int hashCode() {
int result = 0;
result += lhs.hashCode();
result *= 31;
result += rhs.hashCode();
return result;
}
@Override
public AssemblySentential<NT> subList(int fromIndex, int toIndex) {
return rhs.subList(fromIndex, toIndex);
}
/**
* Get the "name" of this production
*
* This is mostly just notional and for debugging. The name is taken as the name of the LHS.
* @return the name of the LHS
*/
public String getName() {
return lhs.getName();
}
}

View file

@ -0,0 +1,35 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyExtendedNonTerminal;
/**
* Defines an "extended" grammar
*
* "Extended grammar" as in a grammar extended with state numbers from an LR0 parser.
* See <a href="http://web.cs.dal.ca/~sjackson/lalr1.html">LALR(1) Parsing</a> from Stephen Jackson
* of Dalhousie University, Halifax, Nova Scotia, Canada.
*/
public class AssemblyExtendedGrammar
extends AbstractAssemblyGrammar<AssemblyExtendedNonTerminal, AssemblyExtendedProduction> {
@Override
protected AssemblyExtendedProduction newProduction(AssemblyExtendedNonTerminal lhs,
AssemblySentential<AssemblyExtendedNonTerminal> rhs) {
throw new UnsupportedOperationException("Please construct extended productions yourself");
}
}

View file

@ -0,0 +1,65 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyExtendedNonTerminal;
/**
* Defines a production of an "extended" grammar
*
* @see AssemblyExtendedGrammar
*/
public class AssemblyExtendedProduction
extends AbstractAssemblyProduction<AssemblyExtendedNonTerminal> {
private final int finalState;
private final AssemblyProduction ancestor;
/**
* Construct an extended production based on the given ancestor
* @param lhs the extended left-hand side
* @param rhs the extended right-hand side
* @param finalState the end state of the final symbol of the RHS
* @param ancestor the original production from which this extended production is derived
*/
public AssemblyExtendedProduction(AssemblyExtendedNonTerminal lhs,
AssemblySentential<AssemblyExtendedNonTerminal> rhs, int finalState,
AssemblyProduction ancestor) {
super(lhs, rhs);
this.finalState = finalState;
this.ancestor = ancestor;
}
@Override
public AssemblyExtendedNonTerminal getLHS() {
return super.getLHS();
}
/**
* Get the final state of this production
* @return the end state of the last symbol of the RHS
*/
public int getFinalState() {
return finalState;
}
/**
* Get the original production from which this production was derived
* @return the original production
*/
public AssemblyProduction getAncestor() {
return ancestor;
}
}

View file

@ -0,0 +1,120 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import java.util.*;
import org.apache.commons.collections4.map.LazyMap;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
import ghidra.app.plugin.processors.sleigh.Constructor;
import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
/**
* Defines a context free grammar, used to parse mnemonic assembly instructions
*
* This stores the CFG and the associated semantics for each production. It also has mechanisms for
* tracking "purely recursive" productions. These are productions of the form I => I, and they
* necessarily create ambiguity. Thus, when constructing a parser, it is useful to identify them
* early.
*/
public class AssemblyGrammar
extends AbstractAssemblyGrammar<AssemblyNonTerminal, AssemblyProduction> {
// a nested map of semantics by production, by constructor
protected final Map<AssemblyProduction, Map<Constructor, AssemblyConstructorSemantic>> semantics =
LazyMap.lazyMap(new TreeMap<>(), () -> new TreeMap<>());
// a map of purely recursive, e.g., I => I, productions by name of LHS
protected final Map<String, AssemblyProduction> pureRecursive = new TreeMap<>();
@Override
protected AssemblyProduction newProduction(AssemblyNonTerminal lhs,
AssemblySentential<AssemblyNonTerminal> rhs) {
return new AssemblyProduction(lhs, rhs);
}
@Override
public void addProduction(AssemblyProduction prod) {
if (isPureRecursive(prod)) {
pureRecursive.put(prod.getLHS().getName(), prod);
}
else {
super.addProduction(prod);
}
}
/**
* Add a production associated with a SLEIGH constructor semantic
* @param lhs the left-hand side
* @param rhs the right-hand side
* @param pattern the pattern associated with the constructor
* @param cons the SLEIGH constructor
* @param indices the indices of RHS non-terminals that represent an operand in the constructor
*/
public void addProduction(AssemblyNonTerminal lhs, AssemblySentential<AssemblyNonTerminal> rhs,
DisjointPattern pattern, Constructor cons, List<Integer> indices) {
AssemblyProduction prod = newProduction(lhs, rhs);
addProduction(prod);
Map<Constructor, AssemblyConstructorSemantic> map = semantics.get(prod);
AssemblyConstructorSemantic sem = map.get(cons);
if (sem == null) {
sem = new AssemblyConstructorSemantic(cons, indices);
map.put(cons, sem);
}
else if (!indices.equals(sem.getOperandIndices())) {
throw new IllegalStateException(
"Productions of the same constructor must have same operand indices");
}
sem.addPattern(pattern);
}
/**
* Get the semantics associated with a given production
* @param prod the production
* @return all semantics associated with the given production
*/
public Collection<AssemblyConstructorSemantic> getSemantics(AssemblyProduction prod) {
return Collections.unmodifiableCollection(semantics.get(prod).values());
}
@Override
public void combine(AbstractAssemblyGrammar<AssemblyNonTerminal, AssemblyProduction> that) {
super.combine(that);
if (that instanceof AssemblyGrammar) {
AssemblyGrammar ag = (AssemblyGrammar) that;
this.semantics.putAll(ag.semantics);
this.pureRecursive.putAll(ag.pureRecursive);
}
}
/**
* Get all productions in the grammar that are purely recursive
* @return
*/
public Collection<AssemblyProduction> getPureRecursive() {
return pureRecursive.values();
}
/**
* Obtain, if present, the purely recursive production having the given LHS
* @param lhs the left-hand side
* @return the desired production, or null
*/
public AssemblyProduction getPureRecursion(AssemblyNonTerminal lhs) {
return pureRecursive.get(lhs.getName());
}
}

View file

@ -0,0 +1,31 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import ghidra.app.plugin.assembler.AssemblyException;
/**
* An exception to identify errors associated with grammar construction
*/
public class AssemblyGrammarException extends AssemblyException {
public AssemblyGrammarException(String msg) {
super(msg);
}
public AssemblyGrammarException(String msg, Throwable cause) {
super(msg, cause);
}
}

View file

@ -0,0 +1,31 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
/**
* Defines a production for parsing mnemonic assembly
*
* @see AssemblyGrammar
*/
public class AssemblyProduction extends AbstractAssemblyProduction<AssemblyNonTerminal> {
public AssemblyProduction(AssemblyNonTerminal lhs,
AssemblySentential<AssemblyNonTerminal> rhs) {
super(lhs, rhs);
}
}

View file

@ -0,0 +1,227 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.grammars;
import java.util.*;
import org.apache.commons.collections4.list.AbstractListDecorator;
import ghidra.app.plugin.assembler.sleigh.symbol.*;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
/**
* A "string" of symbols
*
* To avoid overloading the word "String", we call this a "sentential". Technically, to be a
* "sentential" in the classic sense, it must be a possible element in the derivation of a sentence
* in the grammar starting with the start symbol. We ignore that if only for the sake of naming.
*
* @param <NT> the type of non-terminals
* @param <T> the type of terminals
*/
public class AssemblySentential<NT extends AssemblyNonTerminal> extends
AbstractListDecorator<AssemblySymbol> implements Comparable<AssemblySentential<NT>> {
private List<AssemblySymbol> symbols;
private boolean finished = false;
public static final AssemblyStringTerminal WHITE_SPACE = new WhiteSpace();
/**
* Construct a string from the given list of symbols
* @param symbols
*/
public AssemblySentential(List<? extends AssemblySymbol> symbols) {
this.symbols = new ArrayList<>(symbols);
}
@Override
protected List<AssemblySymbol> decorated() {
return symbols;
}
/**
* Construct a blank string
*
* This is suitable as a blank start, to add new symbols, or to use directly as the RHS,
* effectively creating an "epsilon" production.
*/
public AssemblySentential() {
this.symbols = new ArrayList<>();
}
/**
* Construct a string from any number of symbols
* @param syms
*/
public AssemblySentential(AssemblySymbol... syms) {
this.symbols = Arrays.asList(syms);
}
@Override
public String toString() {
if (symbols.size() == 0) {
return "e";
}
Iterator<? extends AssemblySymbol> symIt = symbols.iterator();
StringBuilder sb = new StringBuilder();
sb.append(symIt.next());
while (symIt.hasNext()) {
sb.append(" ");
sb.append(symIt.next());
}
return sb.toString();
}
@Override
public int compareTo(AssemblySentential<NT> that) {
int result;
int min = Math.min(this.symbols.size(), that.symbols.size());
for (int i = 0; i < min; i++) {
AssemblySymbol a = this.symbols.get(i);
AssemblySymbol b = that.symbols.get(i);
result = a.compareTo(b);
if (result != 0) {
return result;
}
}
if (that.symbols.size() > min) {
return -1;
}
if (this.symbols.size() > min) {
return 1;
}
return 0;
}
@Override
public int hashCode() {
int result = 0;
for (AssemblySymbol sym : symbols) {
result *= 31;
result += sym.hashCode();
}
return result;
}
/**
* A "whitespace" terminal
*
* This terminal represents "optional" whitespace. "Optional" because in certain circumstances,
* whitespace is not actually required, i.e., before or after a special character.
*/
private static class WhiteSpace extends AssemblyStringTerminal {
private WhiteSpace() {
super(" ");
}
@Override
public String toString() {
return "_";
}
@Override
public Collection<AssemblyParseToken> match(String buffer, int pos, AssemblyGrammar grammar,
Map<String, Long> labels) {
if (buffer.length() == 0) {
return Collections.singleton(new WhiteSpaceParseToken(grammar, this, ""));
}
int b = pos;
while (b < buffer.length() && Character.isWhitespace(buffer.charAt(b))) {
b++;
}
if (b == pos) {
if (pos == buffer.length()) {
if (Character.isLetterOrDigit(buffer.charAt(b - 1))) {
return Collections.singleton(
new TruncatedWhiteSpaceParseToken(grammar, this));
}
return Collections.singleton(new WhiteSpaceParseToken(grammar, this, ""));
}
if (Character.isLetterOrDigit(buffer.charAt(b)) &&
Character.isLetterOrDigit(buffer.charAt(b - 1))) {
return Collections.emptySet();
}
}
return Collections.singleton(
new WhiteSpaceParseToken(grammar, this, buffer.substring(pos, b)));
}
@Override
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
return Collections.singleton(" ");
}
}
/**
* The token consumed by a whitespace terminal
*/
public static class WhiteSpaceParseToken extends AssemblyParseToken {
public WhiteSpaceParseToken(AssemblyGrammar grammar, AssemblyTerminal term, String str) {
super(grammar, term, str);
}
}
/**
* The token consumed by a whitespace terminal when it anticipates the end of input
*
* "Expected" tokens given by a parse machine when this is the last token it has consumed are
* not valid suggestions. The machine should instead suggest a whitespace character.
*/
public static class TruncatedWhiteSpaceParseToken extends WhiteSpaceParseToken {
public TruncatedWhiteSpaceParseToken(AssemblyGrammar grammar, AssemblyTerminal term) {
super(grammar, term, "");
}
}
/**
* Add "optional" whitespace, if not already preceded by whitespace
* @return true if whitespace was added
*/
public boolean addWS() {
WhiteSpace last = lastWhiteSpace();
if (last != null) {
return false;
}
return add(WHITE_SPACE);
}
// If the right-most symbol is whitespace, return it
private WhiteSpace lastWhiteSpace() {
if (symbols.size() == 0) {
return null;
}
AssemblySymbol last = symbols.get(symbols.size() - 1);
if (last instanceof WhiteSpace) {
return (WhiteSpace) last;
}
return null;
}
/**
* Trim leading and trailing whitespace, and make the string immutable
*/
public void finish() {
if (finished) {
return;
}
symbols = Collections.unmodifiableList(symbols);
finished = true;
}
@Override
public AssemblySentential<NT> subList(int fromIndex, int toIndex) {
return new AssemblySentential<>(symbols.subList(fromIndex, toIndex));
}
}

View file

@ -0,0 +1,210 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.io.PrintStream;
import java.util.*;
import org.apache.commons.collections4.MultiValuedMap;
import ghidra.app.plugin.assembler.sleigh.grammars.AbstractAssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.grammars.AbstractAssemblyProduction;
import ghidra.app.plugin.assembler.sleigh.symbol.*;
import ghidra.generic.util.datastruct.TreeSetValuedTreeMap;
/**
* A class to compute the first and follow of every non-terminal in a grammar
*
* See Alfred V. Aho, Monica S. Lam, Ravi Sethi, Jeffrey D. Ullman, <i>Compilers: Principles,
* Techniques, &amp; Tools</i>. Bostom, MA: Pearson, 2007, pp. 220-2.
*/
public class AssemblyFirstFollow {
private final AbstractAssemblyGrammar<?, ?> grammar;
// non-terminals which may derive epsilon
private final Set<AssemblyNonTerminal> nullable = new TreeSet<>();
private final MultiValuedMap<AssemblyNonTerminal, AssemblyTerminal> first =
new TreeSetValuedTreeMap<>();
private final MultiValuedMap<AssemblyNonTerminal, AssemblyTerminal> follow =
new TreeSetValuedTreeMap<>();
/**
* Compute the first and follow sets for every non-terminal in the given grammar
* @param grammar the grammar
*/
public AssemblyFirstFollow(AbstractAssemblyGrammar<?, ?> grammar) {
this.grammar = grammar;
computeNullable();
computeFirsts();
computeFollows();
}
/**
* Compute the nullable set
*/
protected void computeNullable() {
boolean changed = true;
while (changed) {
changed = false;
for (AbstractAssemblyProduction<?> prod : grammar) {
if (nullable.containsAll(prod)) {
changed |= nullable.add(prod.getLHS());
}
}
}
}
/**
* Compute the first set for each non-terminal
*/
protected void computeFirsts() {
boolean changed = true;
while (changed) {
changed = false;
// [A] => 'a' ALPHA implies 'a' in First[A]
// [A] => ALPHA [X] BETA implies First[A] includes First[X] and First(ALPHA)
// Walk each production from the left over nullable non-terminals
// Add the first of all each symbol
// Terminate after a terminal or non-nullable symbol
for (AbstractAssemblyProduction<?> prod : grammar) {
for (AssemblySymbol sym : prod) {
if (sym instanceof AssemblyNonTerminal) {
AssemblyNonTerminal nt = (AssemblyNonTerminal) sym;
changed |= first.putAll(prod.getLHS(), first.get(nt));
if (!nullable.contains(sym)) {
break; // next production
}
}
else if (sym instanceof AssemblyTerminal) {
AssemblyTerminal t = (AssemblyTerminal) sym;
changed |= first.put(prod.getLHS(), t);
break; // next production
}
}
}
}
}
/**
* Compute the follow set for each non-terminal
*/
protected void computeFollows() {
// Put EOI after the start symbol
// follow.put(grammar.getStart(), AssemblyEOI.EOI);
boolean changed = true;
while (changed) {
changed = false;
// [A] => ... [X] ALPHA [B] ... implies Follow[X] includes First(ALPHA) and First[B]
// [A] => ... [B] ALPHA implies Follow[B] includes Follow[A]
// Walk each production from left, scanning for non-terminals
// For each, walk to the right, adding the first of each to the current (not LHS)
// Finish the subwalk after a terminal or non-nullable symbol
// If you hit the end, add follow(LHS) to follow the current symbol
for (AbstractAssemblyProduction<?> prod : grammar) {
nextX: for (int i = 0; i < prod.size(); i++) {
AssemblySymbol px = prod.get(i);
if (px instanceof AssemblyNonTerminal) {
AssemblyNonTerminal X = (AssemblyNonTerminal) px;
int j;
for (j = i + 1; j < prod.size(); j++) {
AssemblySymbol B = prod.get(j);
if (B instanceof AssemblyNonTerminal) {
AssemblyNonTerminal nt = (AssemblyNonTerminal) B;
changed |= follow.putAll(X, first.get(nt));
if (!nullable.contains(B)) {
continue nextX;
}
}
else if (B instanceof AssemblyTerminal) {
AssemblyTerminal t = (AssemblyTerminal) B;
changed |= follow.put(X, t);
continue nextX;
}
}
// If I got here, I never encountered a non-nullable symbol
// Do a simple substitution for understanding:
// [A] => ... [X] ALPHA (we never hit non-nullable B)
changed |= follow.putAll(X, follow.get(prod.getLHS()));
}
}
}
}
}
/**
* Get the nullable set
*
* That is the set of all non-terminals, which through some derivation, can produce epsilon.
* @return the set
*/
public Collection<AssemblyNonTerminal> getNullable() {
return Collections.unmodifiableSet(nullable);
}
/**
* Get the first set for a given non-terminal
*
* That is the set of all terminals, which through some derivation from the given non-terminal,
* can appear first in a sentential form.
* @param nt the non-terminal
* @return the set
*/
public Collection<AssemblyTerminal> getFirst(AssemblyNonTerminal nt) {
return Collections.unmodifiableCollection(first.get(nt));
}
/**
* Get the follow set for a given non-terminal
*
* That is the set of all terminals, which through some derivation from the start symbol, can
* appear immediately after the given non-terminal in a sentential form.
* @param nt the non-terminal
* @return the set
*/
public Collection<AssemblyTerminal> getFollow(AssemblyNonTerminal nt) {
return Collections.unmodifiableCollection(follow.get(nt));
}
/**
* For debugging, print out the computed sets to the given stream
* @param out the stream
*/
public void print(PrintStream out) {
out.print("Nullable: ");
for (AssemblyNonTerminal nt : nullable) {
out.print(nt + " ");
}
out.println();
out.println("Firsts:");
for (AssemblyNonTerminal nt : grammar.nonTerminals()) {
out.print(nt + "\t");
for (AssemblyTerminal f : first.get(nt)) {
out.print(f + " ");
}
out.println();
}
out.println("Follows:");
for (AssemblyNonTerminal nt : grammar.nonTerminals()) {
out.print(nt + "\t");
for (AssemblyTerminal f : follow.get(nt)) {
out.print(f + " ");
}
out.println();
}
}
}

View file

@ -0,0 +1,55 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseBranch;
/**
* A successful result from parsing
*/
public class AssemblyParseAcceptResult extends AssemblyParseResult {
private final AssemblyParseBranch tree;
/**
* @see {@link AssemblyParseResult#accept(AssemblyParseBranch)}
*/
protected AssemblyParseAcceptResult(AssemblyParseBranch tree) {
this.tree = tree;
}
@Override
public boolean isError() {
return false;
}
/**
* Get the tree
* @return the tree
*/
public AssemblyParseBranch getTree() {
return tree;
}
@Override
public String toString() {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
tree.print(new PrintStream(baos));
return new String(baos.toByteArray());
}
}

View file

@ -0,0 +1,203 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.util.Collection;
import org.apache.commons.collections4.MultiValuedMap;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyEOI;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyTerminal;
import ghidra.app.plugin.assembler.sleigh.util.TableEntryKey;
import ghidra.generic.util.datastruct.TreeSetValuedTreeMap;
/**
* The Action/Goto table for a LALR(1) parser
*
* This table is unconventional in that it permits a single cell to be populated by more than one
* action. Typically, such a situation would indicate an ambiguity, or the need for a longer
* look-ahead value. Because we do not presume to control the grammar (which was automatically
* derived from another source), the parsing algorithm will simply branch, eventually trying both
* options.
*/
public class AssemblyParseActionGotoTable {
// A map representing the actual (sparse) table
protected final MultiValuedMap<TableEntryKey, Action> map = new TreeSetValuedTreeMap<>();
// A map tracking the terminal columns for each state (optimization)
protected final MultiValuedMap<Integer, AssemblyTerminal> possibleTerms =
new TreeSetValuedTreeMap<>();
/**
* Add an action entry to the given cell
* @param fromState the state (row) in the table
* @param next the symbol (column) in the table
* @param action the entry to add to the cell
* @return true, if the given entry was not already present
*/
public boolean put(int fromState, AssemblySymbol next, Action action) {
if (next instanceof AssemblyTerminal) {
possibleTerms.put(fromState, (AssemblyTerminal) next);
}
return map.put(new TableEntryKey(fromState, next), action);
}
/**
* Add a SHIFT (S<i>n</i>) entry to the given cell
* @param fromState the state (row) in the table
* @param next the symbol (column) in the table
* @param newState the state (<i>n</i>) after the shift is applied
* @return true, if the given entry was not already present
*/
public boolean putShift(int fromState, AssemblyTerminal next, int newState) {
return put(fromState, next, new ShiftAction(newState));
}
/**
* Add a REDUCE (R<i>n</i>) entry to the given cell
* @param fromState the state (row) in the table
* @param next the symbol (column) in the table
* @param prod the production (having index <i>n</i>) associated with the reduction
* @return true, if the given entry was not already present
*/
public boolean putReduce(int fromState, AssemblyTerminal next, AssemblyProduction prod) {
return put(fromState, next, new ReduceAction(prod));
}
/**
* Add a GOTO entry to the given cell
* @param fromState the state (row) in the table
* @param next the symbol (column) in the table
* @param newState the target state
* @return true, if the given entry was not already present
*/
public boolean putGoto(int fromState, AssemblyNonTerminal next, int newState) {
return put(fromState, next, new GotoAction(newState));
}
/**
* Add an ACCEPT entry for the given state at the end of input
* @param fromState the state (row) in the table
* @return true, if the state does not already accept on end of input
*/
public boolean putAccept(int fromState) {
return put(fromState, AssemblyEOI.EOI, AcceptAction.ACCEPT);
}
/**
* Get the terminals that are expected, i.e., have entries for the given state
* @param fromState the state (row) in the table
* @return the collection of populated columns (terminals) for the given state
*/
public Collection<AssemblyTerminal> getExpected(int fromState) {
return possibleTerms.get(fromState);
}
/**
* Get all entries in a given cell
* @param fromState the state (row) in the table
* @param next the symbol (column) in the table
* @return all action entries in the given cell
*/
public Collection<Action> get(int fromState, AssemblySymbol next) {
return map.get(new TableEntryKey(fromState, next));
}
/**
* An action in the Action/Goto table
*/
public static abstract class Action implements Comparable<Action> {
@Override
public int hashCode() {
return toString().hashCode();
}
@Override
public boolean equals(Object that) {
if (!(that instanceof Action)) {
return false;
}
return this.toString().equals(that.toString());
}
@Override
public int compareTo(Action that) {
return this.toString().compareTo(that.toString());
}
}
/**
* A SHIFT (S<i>n</i>) entry
*/
public static class ShiftAction extends Action {
protected int newStateNum;
public ShiftAction(int newStateNum) {
this.newStateNum = newStateNum;
}
@Override
public String toString() {
return "S" + newStateNum;
}
}
/**
* A REDUCE (R<i>n</i>) entry
*/
public static class ReduceAction extends Action {
protected AssemblyProduction prod;
public ReduceAction(AssemblyProduction prod) {
this.prod = prod;
}
@Override
public String toString() {
return "R" + prod.getIndex();
}
}
/**
* A GOTO (G<i>n</i>) entry
*/
public static class GotoAction extends Action {
protected int newStateNum;
public GotoAction(int newStateNum) {
this.newStateNum = newStateNum;
}
@Override
public String toString() {
return "G" + newStateNum;
}
}
/**
* An ACCEPT (acc) entry
*/
public static class AcceptAction extends Action {
public static final AcceptAction ACCEPT = new AcceptAction();
@Override
public String toString() {
return "acc";
}
}
}

View file

@ -0,0 +1,69 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.util.Collections;
import java.util.Set;
/**
* An unsuccessful result from parsing
*/
public class AssemblyParseErrorResult extends AssemblyParseResult {
private final String buffer;
private final Set<String> suggestions;
/**
* @see {@link AssemblyParseResult#error(String, Set)}
*/
protected AssemblyParseErrorResult(String got, Set<String> suggestions) {
this.buffer = got;
this.suggestions = suggestions;
}
@Override
public boolean isError() {
return true;
}
/**
* Get a description of the error
* @return a description
*/
public String describeError() {
return "Syntax Error: Expected " + suggestions + ". Got " + buffer;
}
/**
* Get a set of suggested tokens that would have allowed parsing to continue
* @return the set
*/
public Set<String> getSuggestions() {
return Collections.unmodifiableSet(suggestions);
}
/**
* Get the leftover contents of the input buffer when the error occurred
* @return
*/
public String getBuffer() {
return buffer;
}
@Override
public String toString() {
return describeError();
}
}

View file

@ -0,0 +1,396 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.util.*;
import generic.util.DequePush;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential.TruncatedWhiteSpaceParseToken;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseActionGotoTable.*;
import ghidra.app.plugin.assembler.sleigh.symbol.*;
import ghidra.app.plugin.assembler.sleigh.tree.*;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
/**
* A class that implements the LALR(1) parsing algorithm
*
* Instances of this class store a parse state. In order to work correctly, the class must be
* given a properly-constructed Action/Goto table.
*
* This implementation is somewhat unconventional. First, instead of strictly tokenizing and then
* parsing, each terminal is given the opportunity to match a token in the input. If none match, it
* results in a syntax error (equivalent to the token type having an empty cell in the classical
* algorithm). If more than one match, the parser branches. Also, because a single cell may also
* contain multiple actions, the parser could branch again. Thus, if a sentence is ambiguous, this
* algorithm will identify all possible parse trees, including ones where the input is tokenized
* differently than in other trees.
*/
public class AssemblyParseMachine implements Comparable<AssemblyParseMachine> {
private static final int ERROR_NONE = 0;
private static final int ERROR_SYNTAX = 1;
// The parser, containing the Action/Goto table
protected final AssemblyParser parser;
// The formal output of the parser
protected final List<Integer> output = new ArrayList<>(); // for checking, debugging...?
// The format stack of the parser
protected final Stack<Integer> stack = new Stack<>();
// The stack of trees actually used by the assembler
protected final Stack<AssemblyParseTreeNode> treeStack = new Stack<>();
// The formal input buffer of the parser
protected final String buffer;
// The position in the buffer where we are parsing.
protected int pos;
// The last token we consumed (i.e., last terminal pushed to the stack)
protected AssemblyParseToken lastTok;
// A set of labels that identify valid tokens for some terminals
protected final Map<String, Long> labels; // used for label -> number substitution
protected boolean accepted = false; // the machine is in the accepted state
protected int error = ERROR_NONE; // non-zero if the machine is in an error state
protected String got; // if in error, provides the remaining characters in the buffer
protected Collection<AssemblyTerminal> expected; // if in error, provides suggestions
protected final int id; // convenient ID for debug printing
static int nextMachineId = 0;
static final DbgTimer dbg = DbgTimer.INACTIVE;
/**
* Construct a new parse state
* @param parser the parser driving this machine
* @param input the full input line
* @param pos the position in the line identifying the next characters to parse
* @param labels a map of valid tokens to number for numeric terminals
*/
public AssemblyParseMachine(AssemblyParser parser, String input, int pos,
AssemblyParseToken lastTok, Map<String, Long> labels) {
this.parser = parser;
this.stack.push(0);
this.buffer = input;
this.pos = pos;
this.lastTok = lastTok;
this.id = nextMachineId++;
this.labels = labels;
}
/* ********************************************************************************************
* Equality, comparison, etc.
*/
// NOTE: Buffer is ignored. Machines parsing different buffers should NEVER be in the same
// collection.
@Override
public int hashCode() {
int result = pos;
for (int s : output) {
result *= 31;
result += s;
}
/*for (int s : stack) {
result *= 31;
result += s;
}*/ // Does not distinguish among multiple matches on a single terminal
for (AssemblyParseTreeNode s : treeStack) {
result *= 31;
result += s.hashCode();
}
result *= 31;
result += accepted ? 1 : 0;
result *= 31;
result += error;
return result;
}
@Override
public boolean equals(Object that) {
if (!(that instanceof AssemblyParseMachine)) {
return false;
}
AssemblyParseMachine apm = (AssemblyParseMachine) that;
if (this.pos != apm.pos) {
return false;
}
if (!this.output.equals(apm.output)) {
return false;
}
if (!this.stack.equals(apm.stack)) {
return false;
}
if (this.accepted != apm.accepted) {
return false;
}
if (this.error != apm.error) {
return false;
}
return true;
}
@Override
public int compareTo(AssemblyParseMachine that) {
int result;
result = this.pos - that.pos;
if (result != 0) {
return result;
}
result = SleighUtil.compareInOrder(this.stack, that.stack);
if (result != 0) {
return result;
}
result = SleighUtil.compareInOrder(this.output, that.output);
if (result != 0) {
return result;
}
if (this.accepted & !that.accepted) {
return 1;
}
if (!this.accepted & that.accepted) {
return -1;
}
result = (this.error - that.error);
if (result != 0) {
return result;
}
return 0;
}
/* *******************************************************************************************/
/**
* Duplicate this machine state
*
* This is used extensively when branching
* @return the duplicate
*/
public AssemblyParseMachine copy() {
AssemblyParseMachine c = new AssemblyParseMachine(parser, buffer, pos, lastTok, labels);
// leave labels copied by reference
c.output.clear();
c.output.addAll(output);
c.stack.clear();
c.stack.addAll(stack);
c.treeStack.clear();
c.treeStack.addAll(treeStack);
c.accepted = accepted;
c.error = error;
dbg.println("Copied " + id + " to " + c.id);
return c;
}
/**
* Perform a given action and continue parsing, exhausting all results after the action
* @param a the action
* @param tok the token given by the terminal (column) of the entry containing this action
* @param results a place to store all the parsing results (each must be accept or error state)
* @param visited a collection of machine states already visited
*
* The visited "collection" prevents infinite loops or stack overflows resulting from
* "consuming" epsilon and going to the same state. Such loops may involve many states. It is
* also defined as a map here for debugging purposes, so that when a loop is detected, we can
* print the ID of the first visit.
*/
protected void doAction(Action a, AssemblyParseToken tok, Set<AssemblyParseMachine> results,
Deque<AssemblyParseMachine> visited) {
try (DbgCtx dc = dbg.start("Action: " + a)) {
if (a instanceof ShiftAction) {
AssemblyParseMachine m = copy();
m.stack.push(((ShiftAction) a).newStateNum);
m.treeStack.push(tok);
m.lastTok = tok;
m.pos += tok.getString().length();
m.exhaust(results, visited);
}
else if (a instanceof ReduceAction) {
AssemblyProduction prod = ((ReduceAction) a).prod;
AssemblyParseBranch branch = new AssemblyParseBranch(parser.grammar, prod);
AssemblyParseMachine m = copy();
m.output.add(prod.getIndex());
dbg.println("Prod: " + prod);
for (@SuppressWarnings("unused")
AssemblySymbol sym : prod) {
m.stack.pop();
branch.addChild(m.treeStack.pop());
}
for (Action aa : m.parser.actions.get(m.stack.peek(), prod.getLHS())) {
GotoAction ga = (GotoAction) aa;
dbg.println("Goto: " + ga);
AssemblyParseMachine n = m.copy();
n.stack.push(ga.newStateNum);
n.treeStack.push(branch);
n.exhaust(results, visited);
}
}
else if (a instanceof AcceptAction) {
AssemblyParseMachine m = copy();
m.accepted = true;
results.add(m);
}
}
}
/**
* Consume a given terminal (and corresponding token) and continue parsing
* @param t the terminal
* @param tok the corresponding token
* @param results a place to store all the parsing results
* @param visited a collection of machine states already visited
*/
protected void consume(AssemblyTerminal t, AssemblyParseToken tok,
Set<AssemblyParseMachine> results, Deque<AssemblyParseMachine> visited) {
try (DbgCtx dc = dbg.start("Matched " + t + " " + tok)) {
Collection<Action> as = parser.actions.get(stack.peek(), t);
assert !as.isEmpty();
dbg.println("Actions: " + as);
for (Action a : as) {
doAction(a, tok, results, visited);
}
}
}
/**
* Look for previous machine states having the same stack and position
*
* This would imply we have gone in a loop without consuming anything. We need to prune.
* @param machine the machine state to check
* @param visited the stack of previous machine states
* @return if there is a loop, the machine state proving it, null otherwise
*/
protected static AssemblyParseMachine findLoop(AssemblyParseMachine machine,
Collection<AssemblyParseMachine> visited) {
for (AssemblyParseMachine v : visited) {
if (v == machine) {
continue;
}
if (v.pos != machine.pos) {
continue;
}
if (!v.stack.equals(machine.stack)) {
continue;
}
return v;
}
return null;
}
@Override
public String toString() {
return stack + ":" + treeStack + ":" + buffer + " (" + pos + ")";
}
/**
* Parse (or continue parsing) all possible trees from this machine state
* @param results a place to store all the parsing results
* @param visited a collection of machine states already visited
*/
protected void exhaust(Set<AssemblyParseMachine> results, Deque<AssemblyParseMachine> visited) {
try (DbgCtx dc = dbg.start("Exhausting machine " + id)) {
dbg.println("Machine: " + this);
AssemblyParseMachine loop = findLoop(this, visited);
if (loop != null) {
dbg.println("Pruned. Loop of " + loop.id);
return;
}
try (DequePush<?> push = DequePush.push(visited, this)) {
if (error != ERROR_NONE) {
throw new AssertionError("INTERNAL: Tried to step a machine with errors");
}
if (accepted) {
// Gratuitous inputs should be detected by getTree
throw new AssertionError("INTERNAL: Tried to step an accepted machine");
}
Collection<AssemblyTerminal> terms = parser.actions.getExpected(stack.peek());
if (terms.isEmpty()) {
throw new RuntimeException("Encountered a state with no actions");
}
Set<AssemblyTerminal> unmatched = new TreeSet<>(terms);
for (AssemblyTerminal t : terms) {
for (AssemblyParseToken tok : t.match(buffer, pos, parser.grammar, labels)) {
unmatched.remove(t);
assert buffer.regionMatches(pos, tok.getString(), 0,
tok.getString().length());
consume(t, tok, results, visited);
}
}
if (!unmatched.isEmpty()) {
AssemblyParseMachine m = copy();
final Collection<AssemblyTerminal> newExpected;
if (m.lastTok == null ||
!(m.lastTok instanceof TruncatedWhiteSpaceParseToken)) {
newExpected = unmatched;
}
else {
newExpected = new TreeSet<>();
newExpected.add(AssemblySentential.WHITE_SPACE);
}
dbg.println("Syntax Error: ");
dbg.println(" Expected: " + newExpected);
dbg.println(" Got: " + buffer.substring(pos));
m.error = ERROR_SYNTAX;
m.got = buffer.substring(pos);
m.expected = newExpected;
results.add(m);
return;
}
}
}
}
/**
* Parse (or continue parsing) all possible trees from this machine state
* @return the set of all possible trees and errors
*/
public Set<AssemblyParseMachine> exhaust() {
Set<AssemblyParseMachine> results = new LinkedHashSet<>();
Deque<AssemblyParseMachine> visited = new LinkedList<>();
exhaust(results, visited);
return results;
}
/**
* If in the accepted state, get the resulting parse tree for this machine
* @return the parse tree
*/
public AssemblyParseBranch getTree() {
if (!accepted) {
throw new AssertionError("INTERNAL: Machine has not accepted its buffer");
}
if (pos != buffer.length()) {
throw new AssertionError("INTERNAL: Machine has not emptied its buffer");
}
if (!treeStack.pop().getSym().equals(AssemblyEOI.EOI)) {
throw new AssertionError("INTERNAL: Machine has not encountered end of input marker");
}
if (treeStack.size() != 1) {
throw new AssertionError("INTERNAL: More than root branch remains on machine stack");
}
return (AssemblyParseBranch) treeStack.pop();
}
}

View file

@ -0,0 +1,57 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseBranch;
/**
* A result of parsing a sentence
*
* If the sentence was accepted, this yields a parse tree. If not, this describes the error and
* provides suggestions to correct the error.
*/
public abstract class AssemblyParseResult implements Comparable<AssemblyParseResult> {
/**
* Construct a successful parse result
* @param tree the tree output by the parser
*/
public static AssemblyParseAcceptResult accept(AssemblyParseBranch tree) {
return new AssemblyParseAcceptResult(tree);
}
/**
* Construct an error parse result
* @param got the input buffer when the error occurred
* @param suggestions a subset of strings that would have allowed parsing to proceed
*/
public static AssemblyParseErrorResult error(String got, Set<String> suggestions) {
return new AssemblyParseErrorResult(got, suggestions);
}
/**
* Check if the parse result is successful or an error
* @return true if the result describes an error
*/
public abstract boolean isError();
@Override
public int compareTo(AssemblyParseResult that) {
return this.toString().compareTo(that.toString());
}
}

View file

@ -0,0 +1,128 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.util.*;
import org.apache.commons.collections4.set.AbstractSetDecorator;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
/**
* A state in an LR(0) parsing machine
*
* Each item consists of a kernel and an implied closure. Only the kernel is necessary to define
* the item, but the whole closure must be considered when deriving new states.
*/
public class AssemblyParseState extends AbstractSetDecorator<AssemblyParseStateItem>
implements Comparable<AssemblyParseState> {
private final AssemblyGrammar grammar;
private final Set<AssemblyParseStateItem> kernel = new LinkedHashSet<>();
private Set<AssemblyParseStateItem> closure;
/**
* Construct a new state associated with the given grammar
* @param grammar the grammar
*/
public AssemblyParseState(AssemblyGrammar grammar) {
this.grammar = grammar;
}
/**
* Construct a new state associated with the given grammar, seeded with the given item
* @param grammar the grammar
* @param item an item in the state
*/
public AssemblyParseState(AssemblyGrammar grammar, AssemblyParseStateItem item) {
this(grammar);
kernel.add(item);
}
@Override
protected Set<AssemblyParseStateItem> decorated() {
return kernel;
}
/**
* Get the closure of this item, caching the result
* @return the closure
*/
public Set<AssemblyParseStateItem> getClosure() {
if (closure != null) {
return closure;
}
closure = new LinkedHashSet<>(kernel);
Set<AssemblyParseStateItem> newItems = new LinkedHashSet<>();
do {
newItems.clear();
for (AssemblyParseStateItem item : closure) {
newItems.addAll(item.getClosure(grammar));
}
}
while (closure.addAll(newItems));
return closure;
}
@Override
public boolean equals(Object that) {
if (!(that instanceof AssemblyParseState)) {
return false;
}
return this.kernel.equals(((AssemblyParseState) that).kernel);
}
@Override
public int compareTo(AssemblyParseState that) {
int result;
result = this.kernel.size() - that.kernel.size();
if (result != 0) {
return result;
}
// This only works because TreeSet presents the items in order
result = SleighUtil.compareInOrder(this.kernel, that.kernel);
if (result != 0) {
return result;
}
return 0;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Iterator<AssemblyParseStateItem> it = kernel.iterator();
if (!it.hasNext()) {
return "";
}
sb.append("\n\n"); // Helps with debugging
sb.append(it.next());
while (it.hasNext()) {
sb.append("\n");
sb.append(it.next());
}
return sb.toString();
}
@Override
public int hashCode() {
int result = 0;
for (AssemblyParseStateItem item : kernel) {
result *= 31;
result += item.hashCode();
}
return result;
}
}

View file

@ -0,0 +1,181 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.grammars.*;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
/**
* An item in the state of an LR(0) parser
*
* An item is a production with a dot indicating a position while parsing
*/
public class AssemblyParseStateItem implements Comparable<AssemblyParseStateItem> {
private final AssemblyProduction prod;
private final int pos;
/**
* Construct a new item starting at the far left of the given production
* @param prod the production
*/
public AssemblyParseStateItem(AssemblyProduction prod) {
this(prod, 0);
}
/**
* Construct a new item starting immediately before the symbol at the given position in the
* given production
* @param prod the production
* @param pos the position of the dot
*/
public AssemblyParseStateItem(AssemblyProduction prod, int pos) {
this.prod = prod;
this.pos = pos;
if (pos > prod.size()) {
throw new AssertionError("INTERNAL: Attempt to advance beyond end of RHS");
}
}
/**
* Advance the dot by one position, producing a new item
* @return the new item
*/
public AssemblyParseStateItem read() {
return new AssemblyParseStateItem(prod, pos + 1);
}
/**
* Get the symbol immediately to the right of the dot
*
* This is the symbol which must be matched to advance the dot.
* @return the symbol, or null if the item is completed, i.e., the dot is at the far right
*/
public AssemblySymbol getNext() {
if (completed()) {
return null;
}
return prod.get(pos);
}
/**
* "Fill" one step out to close a state containing this item
*
* To compute the full closure, you must continue stepping out until no new items are generated
* @param grammar the grammar containing the production
* @return a subset of items in the closure of a state containing this item
*/
public Collection<AssemblyParseStateItem> getClosure(AssemblyGrammar grammar) {
AssemblySymbol next = getNext();
if (next == null) {
return Collections.emptySet();
}
if (!(next instanceof AssemblyNonTerminal)) {
return Collections.emptySet();
}
AssemblyNonTerminal nt = (AssemblyNonTerminal) next;
Set<AssemblyParseStateItem> result = new TreeSet<>();
for (AssemblyProduction subst : grammar.productionsOf(nt)) {
result.add(new AssemblyParseStateItem(subst, 0));
}
return result;
}
@Override
public boolean equals(Object that) {
if (!(that instanceof AssemblyParseStateItem)) {
return false;
}
AssemblyParseStateItem apsi = (AssemblyParseStateItem) that;
if (!(this.prod.getIndex() == apsi.prod.getIndex())) {
return false;
}
if (this.pos != apsi.pos) {
return false;
}
return true;
}
@Override
public int compareTo(AssemblyParseStateItem that) {
int result;
result = this.prod.getIndex() - that.prod.getIndex();
if (result != 0) {
return result;
}
result = this.pos - that.pos;
if (result != 0) {
return result;
}
return 0;
}
@Override
public int hashCode() {
int result = 0;
result += prod.getIndex();
result *= 31;
result += pos;
return result;
}
@Override
public String toString() {
AssemblySentential<?> prec = prod.subList(0, pos);
AssemblySentential<?> proc = prod.subList(pos, prod.size());
StringBuilder sb = new StringBuilder(prod.getIndex() + ". " + prod.getLHS() + " => ");
if (prec.size() != 0) {
sb.append(prec + " ");
}
sb.append("*");
if (proc.size() != 0) {
sb.append(" " + proc);
}
return sb.toString();
}
/**
* Check if this item is completed
*
* The item is completed if all symbols have been matched, i.e., the dot is at the far right of
* the production.
* @return true iff the item is completed
*/
public boolean completed() {
return (pos == prod.size());
}
/**
* Get the position of the dot
*
* The position is the number of symbols to the left of the dot.
* @return
*/
public int getPos() {
return pos;
}
/**
* Get the production associated with this item
* @return the production
*/
public AssemblyProduction getProduction() {
return prod;
}
}

View file

@ -0,0 +1,67 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Consumer;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
import ghidra.app.plugin.assembler.sleigh.util.TableEntry;
import ghidra.app.plugin.assembler.sleigh.util.TableEntryKey;
/**
* The transition table defining an LR(0) parsing machine
*/
public class AssemblyParseTransitionTable {
// a map for the (sparse) table
private final Map<TableEntryKey, Integer> map = new TreeMap<>();
/**
* Put an entry into the state machine
* @param fromState the source state
* @param next the symbol that is matched
* @param newState the destination state
* @return the previous value for newState
*
* @note Generally, if this return non-null, something is probably wrong with your LR(0)
* machine generator
*/
public Integer put(int fromState, AssemblySymbol next, int newState) {
return map.put(new TableEntryKey(fromState, next), newState);
}
/**
* Get an entry from the state machine
* @param fromState the source state
* @param next the symbol that has been matched
* @return the destination state
*/
public Integer get(int fromState, AssemblySymbol next) {
return map.get(new TableEntryKey(fromState, next));
}
/**
* Traverse every entry in the table, invoking {@link Consumer#accept(Object)} on each
* @param consumer the callback
*/
public void forEach(Consumer<TableEntry<Integer>> consumer) {
for (Map.Entry<TableEntryKey, Integer> ent : map.entrySet()) {
consumer.accept(
new TableEntry<>(ent.getKey().getState(), ent.getKey().getSym(), ent.getValue()));
}
}
}

View file

@ -0,0 +1,527 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.parse;
import java.io.PrintStream;
import java.util.*;
import java.util.function.Consumer;
import org.apache.commons.collections4.map.LazyMap;
import org.apache.commons.lang3.StringUtils;
import ghidra.app.plugin.assembler.sleigh.grammars.*;
import ghidra.app.plugin.assembler.sleigh.symbol.*;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
import ghidra.app.plugin.assembler.sleigh.util.TableEntry;
/**
* A class to encapsulate LALR(1) parsing for a given grammar
*
* This class constructs the Action/Goto table (and all the other trappings) of a LALR(1) parser
* and provides a {@link #parse(String)} method to parse actual sentences.
*
* This implementation is somewhat unconventional in that it permits ambiguous grammars. Instead of
* complaining, it produces the set of all possible parse trees. Of course, this comes at the cost
* of some efficiency.
*
* See Alfred V. Aho, Monica S. Lam, Ravi Sethi, Jeffrey D. Ullman, <i>Compilers: Principles,
* Techniques, &amp; Tools</i>. Bostom, MA: Pearson, 2007.
*
* See Jackson, Stephen. <a href="http://web.cs.dal.ca/~sjackson/lalr1.html">LALR(1) Parsing</a>.
* Halifax, Nova Scotia, Canada: Dalhousie University.
* &lt;http://web.cs.dal.ca/~sjackson/lalr1.html&gt;
*/
public class AssemblyParser {
protected final AssemblyGrammar grammar; // The input grammar
protected final AssemblyFirstFollow ff; // The first and follow sets for the input grammar
// LR(0) fodder
protected final ArrayList<AssemblyParseState> states = new ArrayList<>();
protected final AssemblyParseTransitionTable table = new AssemblyParseTransitionTable();
// see Stephen Jackson's rant regarding this fodder
protected AssemblyExtendedGrammar extendedGrammar;
protected final AssemblyFirstFollow extff;
protected Map<MergeKey, MergeValue> mergers;
// the LALR(1) Action/Goto table
protected AssemblyParseActionGotoTable actions;
/** A convenience to specify no labels in {@link #parse(String, Map)} */
public static final Map<String, Long> EMPTY_LABELS =
Collections.unmodifiableMap(new HashMap<String, Long>());
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
protected static final boolean dbg_detail = false;
/**
* Construct a LALR(1) parser from the given grammar
* @param grammar the grammar
*/
public AssemblyParser(AssemblyGrammar grammar) {
this.grammar = grammar;
// Check if the start production is of the form
// A => B, where A != B, and that is the only production of A
// If not, synthesize a new start symbol
String newName = "$S";
while (grammar.contains(newName)) {
newName = "$" + newName;
}
AssemblyNonTerminal start = new AssemblyNonTerminal(newName);
grammar.addProduction(start, new AssemblySentential<>(grammar.getStart(), AssemblyEOI.EOI));
grammar.setStart(start);
try (DbgCtx dc = dbg.start("Computing First/Follow for General Grammar")) {
this.ff = new AssemblyFirstFollow(grammar);
if (dbg_detail) {
printGeneralFF(dbg);
}
}
try (DbgCtx dc = dbg.start("Computing LR0 States and Transition Table")) {
buildLR0Machine();
if (dbg_detail) {
printLR0States(dbg);
printLR0TransitionTable(dbg);
}
}
try (DbgCtx dc = dbg.start("Computing Extended Grammar")) {
buildExtendedGrammar();
if (dbg_detail) {
printExtendedGrammar(dbg);
}
}
try (DbgCtx dc = dbg.start("Computing First/Follow for Extended Grammar")) {
this.extff = new AssemblyFirstFollow(extendedGrammar);
if (dbg_detail) {
printExtendedFF(dbg);
}
}
try (DbgCtx dc = dbg.start("Computing Parse Table")) {
buildActionGotoTable();
if (dbg_detail) {
printParseTable(dbg);
}
}
}
protected void buildLR0Machine() {
AssemblyProduction sp = grammar.productionsOf(grammar.getStart()).iterator().next();
AssemblyParseStateItem startItem = new AssemblyParseStateItem(sp, 0);
AssemblyParseState startState = new AssemblyParseState(grammar, startItem);
states.add(startState);
// I'm using a counting loop purposefully
// Want to add things and process them later
for (int curState = 0; curState < states.size(); curState++) {
// perform a "read" or "goto" on each item, adding it to the kernel of its destination state
// NOTE: destination state is keyed ONLY from curState and symbol read
AssemblyParseState state = states.get(curState);
// Since we work with one state at a time, we need only key on symbol read
Map<AssemblySymbol, AssemblyParseState> go =
LazyMap.lazyMap(new LinkedHashMap<AssemblySymbol, AssemblyParseState>(),
() -> new AssemblyParseState(grammar));
// Advance each item, and add the result to the kernel
// NOTE: We must advance from the closure of the current state
for (AssemblyParseStateItem item : state.getClosure()) {
AssemblySymbol sym = item.getNext();
if (sym != null) {
AssemblyParseStateItem ni = item.read();
go.get(sym).add(ni);
}
}
// Now, add the appropriate entries to the transition table
for (Map.Entry<AssemblySymbol, AssemblyParseState> ent : go.entrySet()) {
int newStateNum = addLR0State(ent.getValue());
table.put(curState, ent.getKey(), newStateNum);
}
}
}
/**
* Add a newly-constructed LR0 state, and return it's assigned number
* @param state the newly-constructed state
* @return the assigned number
*
* If the state already exists, this just returns its previously assigned number
*/
protected int addLR0State(AssemblyParseState state) {
int num = states.indexOf(state);
if (num != -1) {
return num;
}
states.add(state);
return states.size() - 1;
}
protected void buildExtendedGrammar() {
extendedGrammar = new AssemblyExtendedGrammar();
extendedGrammar.setStartName(grammar.getStartName());
for (int curState = 0; curState < states.size(); curState++) {
AssemblyParseState state = states.get(curState);
for (AssemblyParseStateItem item : state.getClosure()) {
if (item.getPos() == 0) {
AssemblyExtendedProduction ext = extend(item.getProduction(), curState);
extendedGrammar.addProduction(ext);
}
}
}
}
/**
* Extend a production, using the given LR0 start state
* @param prod the production to extend
* @param start the starting LR0 state
* @return the extended production, if the start state is valid for it
*/
protected AssemblyExtendedProduction extend(AssemblyProduction prod, int start) {
AssemblySentential<AssemblyExtendedNonTerminal> extR = new AssemblySentential<>();
int curState = start;
for (AssemblySymbol sym : prod) {
int nextState = table.get(curState, sym);
if (sym instanceof AssemblyTerminal) {
extR.add(sym);
}
else if (sym instanceof AssemblyNonTerminal) {
extR.add(new AssemblyExtendedNonTerminal(curState, (AssemblyNonTerminal) sym,
nextState));
}
else {
throw new RuntimeException(
"Internal error: all AssemblySymbols must be either terminal or non-terminal");
}
curState = nextState;
}
AssemblyNonTerminal lhs = prod.getLHS();
int nextState = -1;
if (!lhs.equals(grammar.getStart())) {
nextState = table.get(start, prod.getLHS());
}
AssemblyExtendedNonTerminal extL =
new AssemblyExtendedNonTerminal(start, prod.getLHS(), nextState);
return new AssemblyExtendedProduction(extL, extR, curState, prod);
}
protected void buildActionGotoTable() {
actions = new AssemblyParseActionGotoTable();
// Copy the translations tables NT columns as GOTOs
// Also, copy the T columns as SHIFTs
table.forEach(new Consumer<TableEntry<Integer>>() {
@Override
public void accept(TableEntry<Integer> ent) {
if (ent.getSym() instanceof AssemblyNonTerminal) {
AssemblyNonTerminal nt = (AssemblyNonTerminal) ent.getSym();
actions.putGoto(ent.getState(), nt, ent.getValue());
}
else if (ent.getSym() instanceof AssemblyTerminal) {
AssemblyTerminal t = (AssemblyTerminal) ent.getSym();
actions.putShift(ent.getState(), t, ent.getValue());
}
else {
throw new AssertionError("INTERNAL: symbols must be T or NT");
}
}
});
// Merge rules from same general rule, ending in same state
mergers =
LazyMap.lazyMap(new LinkedHashMap<MergeKey, MergeValue>(), () -> new MergeValue());
int i = -1;
for (AssemblyExtendedProduction prod : extendedGrammar) {
i++;
MergeValue entry = mergers.get(new MergeKey(prod.getFinalState(), prod.getAncestor()));
entry.merge(i, extff.getFollow(prod.getLHS()));
}
// Write merged stuff to table as REDUCEs
for (Map.Entry<MergeKey, MergeValue> ent : mergers.entrySet()) {
for (AssemblyTerminal t : ent.getValue().follow) {
AssemblyProduction prod = ent.getKey().prod;
if (!prod.getLHS().equals(grammar.getStart())) {
actions.putReduce(ent.getKey().finalState, t, prod);
}
}
}
// Make $ accept on any state with a completed start item.
nextState: for (i = 0; i < states.size(); i++) {
AssemblyParseState state = states.get(i);
for (AssemblyParseStateItem item : state) {
if (item.completed() && item.getProduction().getLHS().getName().equals("$S")) {
actions.putAccept(i);
continue nextState;
}
}
}
}
/**
* A map key used to identify merges for Step 4 in Stephen Jackson's rant
*/
protected static class MergeKey implements Comparable<MergeKey> {
int finalState;
AssemblyProduction prod;
protected MergeKey(int finalState, AssemblyProduction prod) {
this.finalState = finalState;
this.prod = prod;
}
@Override
public int hashCode() {
int result = 0;
result += finalState;
result *= 31;
result += prod.hashCode();
return result;
}
@Override
public boolean equals(Object that) {
if (!(that instanceof MergeKey)) {
return false;
}
MergeKey mk = (MergeKey) that;
if (this.finalState != mk.finalState) {
return false;
}
if (!this.prod.equals(mk.prod)) {
return false;
}
return true;
}
@Override
public int compareTo(MergeKey that) {
int result;
result = this.finalState - that.finalState;
if (result != 0) {
return result;
}
result = this.prod.compareTo(that.prod);
if (result != 0) {
return result;
}
return 0;
}
}
/**
* The map value keyed by {@link MergeKey}
*/
protected static class MergeValue {
Set<Integer> extProds = new TreeSet<>();
Set<AssemblyTerminal> follow = new TreeSet<>();
protected void merge(int extProdNum, Collection<AssemblyTerminal> more) {
extProds.add(extProdNum);
this.follow.addAll(more);
}
}
/**
* Parse the given sentence
* @param input the sentence to parse
* @return all possible parse trees (and possible errors)
*/
public Iterable<AssemblyParseResult> parse(final String input) {
return parse(input, EMPTY_LABELS);
}
/**
* Parse the given sentence with the given defined labels
* @param input the sentence to parser
* @param labels a map of label to number substitutions
* @return all possible parse results (trees and errors)
*
* The tokenizer for numeric terminals also accepts any key in {@code labels.} In such cases,
* the resulting token is assigned the value of the label.
*/
public Collection<AssemblyParseResult> parse(final String input, Map<String, Long> labels) {
AssemblyParseMachine init = new AssemblyParseMachine(this, input, 0, null, labels);
Set<AssemblyParseMachine> results = init.exhaust();
Set<AssemblyParseResult> ret = new LinkedHashSet<>();
for (AssemblyParseMachine m : results) {
if (m.accepted) {
ret.add(AssemblyParseResult.accept(m.getTree()));
}
else if (m.error != 0) {
Set<String> suggestions = new TreeSet<>();
for (AssemblyTerminal t : m.expected) {
suggestions.addAll(t.getSuggestions(m.got, labels));
}
ret.add(AssemblyParseResult.error(m.got, suggestions));
}
else {
throw new AssertionError("INTERNAL: Unfinished machine was returned");
}
}
return ret;
}
/**
* For debugging
*/
public void printGrammar(PrintStream out) {
out.println("\nGeneral Grammar:");
grammar.print(out);
}
/**
* For debugging
*/
public void printLR0States(PrintStream out) {
out.println("\nLR0 States:");
for (int i = 0; i < states.size(); i++) {
AssemblyParseState state = states.get(i);
out.println("I" + i);
for (AssemblyParseStateItem item : state) {
out.println("K: " + item);
}
for (AssemblyParseStateItem item : state.getClosure()) {
if (!state.contains(item)) {
out.println("C: " + item);
}
}
}
}
/**
* For debugging
*/
public void printLR0TransitionTable(PrintStream out) {
out.println("\nLR0 Transition Table:");
out.print("State\t");
for (AssemblyTerminal t : grammar.terminals()) {
out.print(t + "\t");
}
for (AssemblyNonTerminal nt : grammar.nonTerminals()) {
out.print(nt + "\t");
}
out.println();
for (int i = 0; i < states.size(); i++) {
out.print(i + "\t");
for (AssemblyTerminal t : grammar.terminals()) {
Integer newState = table.get(i, t);
if (newState != null) {
out.print(newState);
}
out.print("\t");
}
for (AssemblyNonTerminal nt : grammar.nonTerminals()) {
Integer newState = table.get(i, nt);
if (newState != null) {
out.print(newState);
}
out.print("\t");
}
out.println();
}
}
/**
* For debugging
*/
public void printExtendedGrammar(PrintStream out) {
out.println("\nExtended Grammar:");
extendedGrammar.print(out);
}
/**
* For debugging
*/
public void printGeneralFF(PrintStream out) {
out.println("\nGeneral FF:");
ff.print(out);
}
/**
* For debugging
*/
public void printExtendedFF(PrintStream out) {
out.println("\nExtended FF:");
extff.print(out);
}
/**
* For debugging
*/
public void printMergers(PrintStream out) {
out.println("\nMergers:");
for (Map.Entry<MergeKey, MergeValue> ent : mergers.entrySet()) {
out.print(ent.getKey().finalState + "\t");
out.print(ent.getKey().prod + "\t");
out.print(ent.getValue().extProds + "\t");
out.print(ent.getValue().follow + "\n");
}
}
/**
* For debugging
*/
public void printParseTable(PrintStream out) {
out.println("\nParse Table:");
out.print("State\t");
for (AssemblyTerminal t : grammar.terminals()) {
out.print(t + "\t");
}
for (AssemblyNonTerminal nt : grammar.nonTerminals()) {
out.print(nt + "\t");
}
out.println();
for (int i = 0; i < states.size(); i++) {
out.print(i + "\t");
for (AssemblyTerminal t : grammar.terminals()) {
out.print(StringUtils.join(actions.get(i, t), "/"));
out.print("\t");
}
for (AssemblyNonTerminal nt : grammar.nonTerminals()) {
out.print(StringUtils.join(actions.get(i, nt), "/"));
out.print("\t");
}
out.println();
}
}
/**
* For debugging
*/
public void printStuff(PrintStream out) {
printGrammar(out);
printGeneralFF(out);
printLR0States(out);
printLR0TransitionTable(out);
printExtendedGrammar(out);
printExtendedFF(out);
printMergers(out);
printParseTable(out);
}
/**
* Get the grammar used to construct this parser
* @return the grammar
*/
public AssemblyGrammar getGrammar() {
return grammar;
}
}

View file

@ -0,0 +1,330 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.*;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
import ghidra.app.plugin.assembler.sleigh.expr.RecursiveDescentSolver;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.languages.sleigh.SleighLanguages;
import ghidra.app.plugin.languages.sleigh.SubtableEntryVisitor;
import ghidra.app.plugin.processors.sleigh.*;
import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol;
/**
* Describes a SLEIGH constructor semantic
*
* These are collected and associated with productions in the grammar based on the given
* constructor's print pieces.
*/
public class AssemblyConstructorSemantic implements Comparable<AssemblyConstructorSemantic> {
protected static final RecursiveDescentSolver solver = RecursiveDescentSolver.getSolver();
protected static final DbgTimer dbg = AssemblyTreeResolver.dbg;
protected final Set<AssemblyResolvedConstructor> patterns = new HashSet<>();
protected final Constructor cons;
protected final ImmutableList<Integer> indices;
// A set initialized on first access with forbidden patterns added
protected ImmutableSet<AssemblyResolvedConstructor> upatterns;
/**
* Build a new SLEIGH constructor semantic
* @param pattern the encoding pattern associated with the constructor
* @param cons the SLEIGH constructor
* @param indices the indices of RHS non-terminals in the associated production that represent an
* operand in the SLEIGH constructor
*/
public AssemblyConstructorSemantic(Constructor cons, List<Integer> indices) {
this.cons = cons;
this.indices = ImmutableList.copyOf(indices);
}
public void addPattern(DisjointPattern pat) {
addPattern(AssemblyResolution.fromPattern(pat, cons.getMinimumLength(), cons.toString()));
}
public void addPattern(AssemblyResolvedConstructor pat) {
if (upatterns != null) {
throw new IllegalStateException("Cannot add patterns after a call to getPatterns()");
}
this.patterns.add(pat);
}
@Override
public String toString() {
return cons.toString() + ":" + patterns.toString();
}
/**
* Get the SLEIGH constructor
* @return the constructor
*/
public Constructor getConstructor() {
return cons;
}
/**
* Get the associated encoding patterns for the constructor
* @return the patterns
*/
public Collection<AssemblyResolvedConstructor> getPatterns() {
if (upatterns == null) {
computeAllForbids();
}
return upatterns;
}
/**
* Convert the index of a print piece to its associated operand index
* @param printpos position excluding whitespace and string tokens.
* @return the operand index
*/
public int getOperandIndex(int printpos) {
return indices.get(printpos);
}
/**
* Get the list of operand indices in print piece order
* @return the list
*/
public ImmutableList<Integer> getOperandIndices() {
return indices;
}
/**
* Get an iterator over the operand indices
*
* If this iterator is advanced for each non-terminal, while simultaneously iterating over the
* RHS of the associated production, then this will identify the corresponding operand index
* for each non-terminal
* @return the iterator
*/
public Iterator<Integer> getOperandIndexIterator() {
return Collections.unmodifiableList(indices).iterator();
}
/**
* Initialize upatterns with an unmodifiable copy of patterns, with forbidden patterns added
*/
protected void computeAllForbids() {
if (upatterns != null) {
throw new IllegalStateException(
"Already computed all forbidden patterns for this constructor");
}
Set<AssemblyResolvedConstructor> result = new HashSet<>();
for (AssemblyResolvedConstructor pat : patterns) {
AssemblyResolvedConstructor fpat = withComputedForbids(pat);
result.add(fpat);
}
upatterns = ImmutableSet.copyOf(result);
}
/**
* Add the list of forbidden patterns to one of the constructor's patterns
*
* SLEIGH disambiguates multiple matching pattern by two rules. First, if one is more specific
* than ("specializes") another, i.e., it matches on more bits than another pattern, the more
* specific pattern is chosen. Second, if the two are equally special, then the one that occurs
* first in the SLEIGH specification is taken. So, during resolution, if a less-special or
* later-occurring constructor is chosen, we must prevent continued resolution from matching
* the more-special or earlier-occurring pattern(s).
*
* Essentially, this states, "you may choose any value matching my pattern, except those that
* match these forbidden patterns."
*
* This takes a given pattern, and searches the rest of the language for any patterns that
* would take precedence, and combines them as forbidden patterns with the given pattern.
*
* @param pat2 the given pattern
* @returns the same pattern with forbidden records added
*/
protected AssemblyResolvedConstructor withComputedForbids(AssemblyResolvedConstructor pat) {
// Forbid anything more specific (or otherwise takes precedence) over me.
Set<AssemblyResolvedConstructor> forbids = new HashSet<>();
SubtableSymbol parent = cons.getParent();
SleighLanguages.traverseConstructors(parent, new SubtableEntryVisitor() {
@Override
public int visit(DisjointPattern sibDP, Constructor sibcons) {
// Do not forbid myself.
if (sibcons == cons) {
return CONTINUE;
}
/*
* I had misunderstood the precedence rules originally.
* 1. If one pattern defines a subset of the other pattern, then the more-specific
* one is preferred.
* 2. Otherwise, preference is by line number
*
* Thus, I need to check if there is any overlap at all. If not, then I don't
* need to worry about forbidding anything.
* Then, I'll check if it defines a strict subset, and forbid it if so.
* Then, I'll check if it defines a strict overset, and skip the line check if so.
* Then, I'll check if its line number *precedes* mine, and forbid it if so.
*
* (I originally though the pattern with the most bits won, no matter whether or
* not those bits overlapped.)
*/
// If the two patterns cannot be combined, then they are disjoint.
AssemblyResolvedConstructor sibpat = AssemblyResolution.fromPattern(sibDP,
sibcons.getMinimumLength(), "For specialization check");
AssemblyResolvedConstructor comb = pat.combine(sibpat);
if (null == comb) {
return CONTINUE;
}
// OK, they overlap. Let's see if its a strict subset
if (comb.bitsEqual(sibpat)) {
forbids.add(sibpat.withDescription(
cons + " forbids " + sibcons + " by pattern specificity"));
return CONTINUE;
}
else if (comb.bitsEqual(pat)) {
// I'm a strict subset, so I will no matter the line number
return CONTINUE;
}
// Finally, check the line number
if (sibcons.getId() < cons.getId()) {
forbids.add(
sibpat.withDescription(cons + " forbids " + sibcons + " by rule position"));
return CONTINUE;
}
// I guess, I have the more-specific pattern, or I appear higher...
return CONTINUE;
}
});
return pat.withForbids(forbids);
}
/**
* Solve this constructor's context changes
* @param res the combined resolution requirements derived from the subconstructors
* @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next})
* @param opvals a map from operand index to operand value
* @return the resolution with context changes applied in reverse, or an error
*
* Each value in {@code opvals} must either be a numeric value, e.g., an index from a varnode
* list, or another {@link AssemblyResolvedConstructor} for a subconstructor operand.
*
* It's helpful to think of the SLEIGH disassembly process here. Normally, once the appropriate
* constructor has been identified (by matching patterns), its context changes are applied, and
* then its operands parsed (possibly parsing subconstructor operands). Thus, {@code res} can
* be thought of as the intermediate result between applying context changes and parsing
* operands, except in reverse. The output of this method corresponds to the state before
* context changes were applied, i.e., immediately after selecting the constructor. Thus, in
* reverse, the context is solved immediately before applying the selected constructor
* patterns.
*
* @see AssemblyTreeResolver#resolveSelectedChildren(AssemblyProduction, List, List, Collection)
*/
public AssemblyResolution solveContextChanges(AssemblyResolvedConstructor res,
Map<String, Long> vals, Map<Integer, Object> opvals) {
List<ContextChange> contextChanges = cons.getContextChanges();
List<ContextChange> reversed = new LinkedList<>();
for (ContextChange chg : contextChanges) {
reversed.add(0, chg);
}
for (ContextChange chg : reversed) {
if (chg instanceof ContextOp) {
dbg.println("Current: " + res.lineToString());
// This seems backwards. That's because we're going backwards.
// This is the "write" location for disassembly.
ContextOp cop = (ContextOp) chg;
dbg.println("Handling context change: " + cop);
// TODO: Is this res or subres?
MaskedLong reqval = res.readContextOp(cop);
if (reqval.equals(MaskedLong.UNKS)) {
dbg.println("Doesn't affect a current requirement");
continue; // this context change does not satisfy any requirement
}
dbg.println("'read' " + reqval);
// Remove the requirement that we just read before trying to solve
res = res.maskOut(cop);
dbg.println("Masked out: " + res.lineToString());
// Now, solve
AssemblyResolution sol = AssemblyTreeResolver.solveOrBackfill(
cop.getPatternExpression(), reqval, vals, opvals, res, "Solution to " + cop);
dbg.println("Solution: " + sol.lineToString());
if (sol.isError()) {
AssemblyResolvedError err = (AssemblyResolvedError) sol;
return AssemblyResolution.error(err.getError(), res);
}
// Now, forward the new requirements to my parents.
if (sol instanceof AssemblyResolvedConstructor) {
AssemblyResolvedConstructor solcon = (AssemblyResolvedConstructor) sol;
AssemblyResolvedConstructor check = res.combine(solcon);
if (null == check) {
return AssemblyResolution.error(
"A context change caused a conflict: " + sol, res);
}
res = check;
}
else {
AssemblyResolvedBackfill solbf = (AssemblyResolvedBackfill) sol;
res = res.combine(solbf);
}
dbg.println("Combined: " + res.lineToString());
}
}
return res;
}
/**
* Apply just context transformations in the forward (disassembly) direction
*
* @param outer the state before context changes
* @return the state after context changes
*
* Unlike the usual disassembly process, this method does not take into account any information
* from the instruction encoding. Any context bits that depend on it are set to unknown
* ({@code x}) in the output. This method is used to pre-compute a context transition graph in
* order to quickly resolve purely-recursive semantics on the root constructor table.
*/
public AssemblyResolvedConstructor applyForward(AssemblyResolvedConstructor outer) {
AssemblyResolvedConstructor res = outer;
// TODO: Figure out semantics of ContextCommit. Not sure it matters here.
for (ContextChange chg : cons.getContextChanges()) {
if (chg instanceof ContextOp) {
ContextOp cop = (ContextOp) chg;
MaskedLong val = solver.valueForResolution(cop.getPatternExpression(), res);
res = res.writeContextOp(cop, val);
}
}
return res;
}
@Override
public int compareTo(AssemblyConstructorSemantic that) {
// TODO: This could be better
return this.toString().compareTo(that.toString());
}
}

View file

@ -0,0 +1,408 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.*;
import java.util.Map.Entry;
import org.apache.commons.collections4.map.LazyMap;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyContextGraph.Edge;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyContextGraph.Vertex;
import ghidra.app.plugin.processors.sleigh.Constructor;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.plugin.processors.sleigh.symbol.*;
import ghidra.graph.*;
import ghidra.graph.algo.DijkstraShortestPathsAlgorithm;
/**
* A graph of possible context changes via the application of various constructors
*
* This is used primarily to find optimal paths for the application of recursive rules, i.e., those
* of the form I => I. These cannot be resolved without some form of semantic analysis. The most
* notable disadvantage to all of this is that you no longer get all of the possible assemblies,
* but only those with the fewest rule applications.
*
* Conceivably, this may also be used to prune some possibilities during semantic resolution of a
* parse tree. Even better, it may be possible to derive a grammar which accounts for the context
* changes already; however, it's unclear how many rules this will generate, and consequently, how
* much larger its LALR(1) parser would become.
*/
public class AssemblyContextGraph implements GImplicitDirectedGraph<Vertex, Edge> {
protected final Map<String, Set<AssemblyConstructorSemantic>> semantics =
LazyMap.lazyMap(new HashMap<>(), () -> new HashSet<>());
protected final AssemblyGrammar grammar;
protected final SleighLanguage lang;
protected final DijkstraShortestPathsAlgorithm<Vertex, Edge> dijkstra;
protected final Set<Vertex> cachedVertices = new HashSet<>();
protected final Set<Edge> cachedEdges = new HashSet<>();
protected final Map<Vertex, Set<Edge>> cachedOutEdges =
LazyMap.lazyMap(new HashMap<>(), (Vertex v) -> computeOutEdges(v));
/**
* Build the context change graph for a given language and grammar
*
* The grammar must have been constructed from the given language. The language is used just to
* obtain the most common default context.
*
* At the moment, this graph only expands the recursive rules at the root constructor table,
* i.e., "instruction". Thus, the assembler will not be able to process any language that has
* <i>purely</i>-recursive rules at subconstructors.
* @param lang the language
* @param grammar the grammar derived from the given language
*/
public AssemblyContextGraph(SleighLanguage lang, AssemblyGrammar grammar) {
this.grammar = grammar;
this.lang = lang;
gatherSemantics();
AssemblyDefaultContext ctx = new AssemblyDefaultContext(lang);
AssemblyPatternBlock defctx = ctx.getDefault();
defctx = defctx.fillMask();
Vertex v = new Vertex(defctx, grammar.getStartName());
// Because this graph is potentially infinite, we must cap the distance.
// Since we'd like to apply each constructor once, we can cap by the number of semantics.
// Certainly this doesn't strictly enforce the apply once rule, but we do get an overset.
dijkstra = new DijkstraShortestPathsAlgorithm<>(this,
semantics.get(grammar.getStartName()).size(), GEdgeWeightMetric.unitMetric());
// Pre-compute for the source we know we will always use
dijkstra.getDistancesFromSource(v);
}
/**
* Compute the optimal, i.e., fewest, sequences of applications to resolve a given context to
* the language's default context.
*
* @param src presumably, the language's default context
* @param srcTable the name of the SLEIGH constructor table, presumably "instruction"
* @param dst the context block being resolved
* @param dstTable the name of the SLEIGH constructor table being resolved
* @return a collection of sequences of constructor applications from {@code src} to
* {@code dst}
*
* @note For assembly, the sequences will need to be applied right-to-left.
*/
public Collection<Deque<AssemblyConstructorSemantic>> computeOptimalApplications(
AssemblyPatternBlock src, String srcTable, AssemblyPatternBlock dst, String dstTable) {
Vertex s = new Vertex(src, srcTable);
Vertex xd = new Vertex(dst, dstTable);
// Because we're working with masks, there may be many vertices that match dst
// Find the one(s) with the shortest distance
Set<Vertex> bestDests = new HashSet<>();
Double bestDist = null;
for (Entry<Vertex, Double> ent : dijkstra.getDistancesFromSource(s).entrySet()) {
if (ent.getKey().matches(xd)) {
if (bestDist == null || ent.getValue() < bestDist) {
bestDests.clear();
bestDests.add(ent.getKey());
bestDist = ent.getValue();
}
else if (bestDist.equals(ent.getValue())) {
bestDests.add(ent.getKey());
}
}
}
// Now collect all the shortest paths to those closest destinations
Set<Deque<AssemblyConstructorSemantic>> result = new HashSet<>();
for (Vertex d : bestDests) {
Collection<Deque<Edge>> optimalPaths = dijkstra.computeOptimalPaths(s, d);
for (Deque<Edge> path : optimalPaths) {
Deque<AssemblyConstructorSemantic> sems = new LinkedList<>();
for (Edge e : path) {
sems.add(e.sem);
}
result.add(sems);
}
}
return result;
}
/**
* Gather all the semantics that can be used as state transitions
*
* Currently, only semantics from {@code :^instruction} constructors are taken.
*/
protected void gatherSemantics() {
AssemblyProduction rec =
grammar.getPureRecursion(grammar.getNonTerminal(grammar.getStartName()));
if (rec == null) {
return;
}
for (AssemblyConstructorSemantic sem : grammar.getSemantics(rec)) {
semantics.get(grammar.getStartName()).add(sem);
}
}
/**
* A vertex in a context transition graph
*
* Each vertex consists of a context block and a (sub)table name
*/
protected static class Vertex implements Comparable<Vertex> {
protected final AssemblyPatternBlock context;
protected final String subtable;
/**
* Construct a new vertex with the given block and subtable name
* @param context the context
* @param subtable the name
*/
protected Vertex(AssemblyPatternBlock context, String subtable) {
this.context = context;
this.subtable = subtable;
}
/**
* Check if this and another vertex "agree"
*
* This doesn't mean they're equal, but that they share a subtable, and the defined bits of
* their context blocks agree.
* @param that the other vertex
* @return true iff they share subtables and defined bits
*/
public boolean matches(Vertex that) {
if (!this.subtable.equals(that.subtable)) {
return false;
}
if (this.context.combine(that.context) == null) {
return false;
}
return true;
}
@Override
public int hashCode() {
return context.hashCode() * 31 + subtable.hashCode();
}
@Override
public String toString() {
return "ctx:" + context + " at " + subtable;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Vertex)) {
return false;
}
Vertex that = (Vertex) o;
if (!this.context.equals(that.context)) {
return false;
}
if (!this.subtable.equals(that.subtable)) {
return false;
}
return true;
}
@Override
public int compareTo(Vertex that) {
int result;
result = this.context.compareTo(that.context);
if (result != 0) {
return result;
}
result = this.subtable.compareTo(that.subtable);
if (result != 0) {
return result;
}
return 0;
}
}
/**
* A transition in a context transition graph
*
* A transition consists of the constructor whose context changes were applied. The operand
* index is included for reference and debugging. If we ever need to process rules with
* multiple subconstructors, the operand index explains the subtable name of the destination
* vertex.
*/
protected static class Edge implements GEdge<Vertex>, Comparable<Edge> {
protected final AssemblyConstructorSemantic sem;
protected final int op;
protected final Vertex start;
protected final Vertex end;
/**
* Construct a new transition associated with the given constructor and operand index
* @param sem the constructor semantic
* @param op the operand index
*/
public Edge(AssemblyConstructorSemantic sem, int op, Vertex start, Vertex end) {
this.sem = sem;
this.op = op;
this.start = start;
this.end = end;
}
@Override
public int hashCode() {
int result = sem.hashCode();
result *= 31;
result += Integer.hashCode(op);
result *= 31;
result += start.hashCode();
result *= 31;
result += end.hashCode();
return result;
}
@Override
public boolean equals(Object o) {
if (!(o instanceof Edge)) {
return false;
}
Edge that = (Edge) o;
if (!this.sem.equals(that.sem)) {
return false;
}
if (this.op != that.op) {
return false;
}
if (!this.start.equals(that.start)) {
return false;
}
if (!this.end.equals(that.end)) {
return false;
}
return true;
}
@Override
public int compareTo(Edge that) {
int result;
result = this.sem.compareTo(that.sem);
if (result != 0) {
return result;
}
result = this.op - that.op;
if (result != 0) {
return result;
}
result = this.start.compareTo(that.start);
if (result != 0) {
return result;
}
result = this.end.compareTo(that.end);
if (result != 0) {
return result;
}
return 0;
}
@Override
public String toString() {
return start + " --[" + sem + " op " + op + "]-> " + end;
}
@Override
public Vertex getStart() {
return start;
}
@Override
public Vertex getEnd() {
return end;
}
}
protected Set<Edge> computeOutEdges(Vertex from) {
cachedVertices.add(from);
Set<Edge> result = new HashSet<>();
for (AssemblyConstructorSemantic sem : semantics.get(from.subtable)) {
for (AssemblyResolvedConstructor rc : sem.patterns) {
AssemblyPatternBlock pattern = rc.ctx;
AssemblyPatternBlock outer = from.context.combine(pattern);
if (outer == null) {
continue;
}
if (sem.getConstructor().getNumOperands() == 0) {
continue;
}
AssemblyResolvedConstructor orc =
AssemblyResolution.contextOnly(outer, "For context transition", null);
AssemblyResolvedConstructor irc = sem.applyForward(orc);
AssemblyPatternBlock inner = irc.getContext();
Constructor ct = sem.getConstructor();
for (int i = 0; i < ct.getNumOperands(); i++) {
OperandSymbol op = ct.getOperand(i);
TripleSymbol def = op.getDefiningSymbol();
if (!(def instanceof SubtableSymbol)) {
continue;
}
SubtableSymbol subtable = (SubtableSymbol) def;
// TODO: Remove this check, eventually
// NOTE: If pure recursion appears anywhere other than "instruction", this
// check will prevent it from being handled.
if (!from.subtable.equals(subtable.getName())) {
continue;
}
Vertex dest = new Vertex(inner, subtable.getName());
cachedVertices.add(dest);
Edge e = new Edge(sem, i, from, dest);
cachedEdges.add(e);
result.add(e);
}
}
}
return result;
}
/**
* This operation is not supported.
*
* I could implement this using the cached edges, but that may not be semantically, what a path
* computation algorithm actually requires. Instead, I will assume the algorithm only explores
* the graph in the same direction as its edges. If not, I will hear about it quickly.
*/
@Override
public Collection<Edge> getInEdges(Vertex v) {
throw new UnsupportedOperationException("Does not support backward traversal");
}
@Override
public Collection<Edge> getOutEdges(Vertex v) {
return cachedOutEdges.get(v);
}
/**
* Returns a copy of the graph explored so far
*/
@Override
public GDirectedGraph<Vertex, Edge> copy() {
GDirectedGraph<Vertex, Edge> graph = GraphFactory.createDirectedGraph();
for (Vertex v : cachedVertices) {
graph.addVertex(v);
}
for (Edge e : cachedEdges) {
graph.addEdge(e);
}
return graph;
}
}

View file

@ -0,0 +1,181 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.math.BigInteger;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.ContextChangeException;
import ghidra.program.model.listing.DefaultProgramContext;
/**
* A class that computes the default context for a language, and acts as a pseudo context
*
* This class helps maintain context consistency when performing both assembly and disassembly.
*/
public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgramContext {
protected final SleighLanguage lang;
protected final Address at;
protected AssemblyPatternBlock curctx; // the pseudo context value
protected AssemblyPatternBlock defctx; // the computed default
protected final static DbgTimer dbg = DbgTimer.INACTIVE;
/**
* Compute the default context at most addresses for the given language
* @param lang the language
*/
public AssemblyDefaultContext(SleighLanguage lang) {
this(lang, null);
}
/**
* Compute the default context at the given address for the given language
* @param lang the language
* @param at the address
*/
protected AssemblyDefaultContext(SleighLanguage lang, Address at) {
this.lang = lang;
this.at = at;
Register ctxreg = lang.getContextBaseRegister();
if (null == ctxreg) {
this.defctx = AssemblyPatternBlock.nop();
this.curctx = AssemblyPatternBlock.nop();
}
else {
int size = ctxreg.getMinimumByteSize();
this.defctx = AssemblyPatternBlock.fromLength(size);
this.curctx = AssemblyPatternBlock.fromLength(size);
}
lang.applyContextSettings(this);
}
/**
* Set the value of the pseudo context register
*
* If the provided value has length less than the register, it will be left aligned, and the
* remaining bytes will be set to unknown (masked out).
* @param val the value of the register
*/
public void setContextRegister(byte[] val) {
curctx = AssemblyPatternBlock.fromBytes(0, val);
}
/**
* Get the default value of the context register
* @return the value as a pattern block for assembly
*/
public AssemblyPatternBlock getDefault() {
return defctx;
}
/**
* Compute the default value of the context register at the given address
* @param addr the addres
* @return the value as a pattern block for assembly
*/
public AssemblyPatternBlock getDefaultAt(Address addr) {
return new AssemblyDefaultContext(lang, addr).getDefault();
}
@Override
public void setValue(Register register, BigInteger value) throws ContextChangeException {
dbg.println("Set " + register + " to " + value);
}
@Override
public void setRegisterValue(RegisterValue value) throws ContextChangeException {
dbg.println("Set " + value);
}
@Override
public void clearRegister(Register register) throws ContextChangeException {
dbg.println("Clear " + register);
}
@Override
public Register getBaseContextRegister() {
return lang.getContextBaseRegister();
}
@Override
public Register[] getRegisters() {
return lang.getRegisters();
}
@Override
public Register getRegister(String name) {
return lang.getRegister(name);
}
@Override
public BigInteger getValue(Register register, boolean signed) {
if (signed) {
throw new UnsupportedOperationException();
}
if (!register.isProcessorContext()) {
return null;
}
BigInteger res = curctx.toBigInteger(register.getMinimumByteSize());
if (register.isBaseRegister()) {
return res;
}
throw new UnsupportedOperationException();
}
@Override
public RegisterValue getRegisterValue(Register register) {
return new RegisterValue(register, getValue(register, false));
}
@Override
public boolean hasValue(Register register) {
return register.isProcessorContext();
}
@Override
public void setFutureRegisterValue(Address address, RegisterValue value) {
dbg.println("Set " + value + " at " + address);
}
@Override
public void setFutureRegisterValue(Address fromAddr, Address toAddr, RegisterValue value) {
dbg.println("Set " + value + " for [" + fromAddr + ":" + toAddr + "]");
}
@Override
public void setDefaultValue(RegisterValue registerValue, Address start, Address end) {
if (!registerValue.getRegister().isProcessorContext()) {
return;
}
if (at != null && (start.compareTo(at) > 0 || at.compareTo(end) > 0)) {
return;
}
defctx = defctx.combine(AssemblyPatternBlock.fromRegisterValue(registerValue));
dbg.println("Combining " + registerValue);
dbg.println(" " + defctx);
}
@Override
public RegisterValue getDefaultValue(Register register, Address address) {
throw new UnsupportedOperationException();
}
}

View file

@ -0,0 +1,841 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicLong;
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
import ghidra.app.plugin.assembler.sleigh.expr.SolverException;
import ghidra.app.plugin.assembler.sleigh.util.SleighUtil;
import ghidra.app.plugin.processors.sleigh.ContextOp;
import ghidra.app.plugin.processors.sleigh.expression.ContextField;
import ghidra.app.plugin.processors.sleigh.expression.TokenField;
import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
import ghidra.app.plugin.processors.sleigh.pattern.PatternBlock;
import ghidra.program.model.lang.RegisterValue;
import ghidra.util.NumericUtilities;
import ghidra.util.StringUtilities;
/**
* The analog of {@link PatternBlock}, designed for use by the assembler
*
* It is suitable for the assembler because it is represented byte-by-byte, and it offers a number
* of useful conversions and operations.
*
* @TODO A lot of this could probably be factored into the {@link PatternBlock} class, but it was
* best to experiment in another class altogether to avoid breaking things.
*/
public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
protected static final String SHIFT_STR = "SS:";
protected static final String SHIFT_STR_END = "SS";
private final int offset; // offset relative to the start of the instruction
private final byte[] mask;
private final byte[] vals;
/**
* Construct a new pattern block with the given mask, values, and offset
* @param offset an offset (0-up, left-to-right) where the pattern actually starts
* @param mask a mask: only {@code 1} bits are included in the pattern
* @param vals the value, excluding corresponding {@code 0} bits in the mask
*/
protected AssemblyPatternBlock(int offset, byte[] mask, byte[] vals) {
assert mask.length == vals.length;
this.offset = offset;
this.mask = mask;
this.vals = vals;
}
/**
* Construct a new empty pattern block at the given offset, prepared with the given capacity
* @param offset an offset (0-up, left-to-right) where the pattern will start
* @param capacity the space to allocate for the mask and values
*/
protected AssemblyPatternBlock(int offset, int capacity) {
this.offset = offset;
mask = new byte[capacity];
vals = new byte[capacity];
}
/**
* Get an empty pattern block
* @return the pattern block
*/
public static AssemblyPatternBlock nop() {
return new AssemblyPatternBlock(0, 0);
}
/**
* Get a pattern block with the given (fully-included) values at the given offset
* @param offset the offset (0-up, left-to-right)
* @param vals the values
* @return a pattern block (having a full mask)
*/
public static AssemblyPatternBlock fromBytes(int offset, byte[] vals) {
byte[] mask = new byte[vals.length];
for (int i = 0; i < mask.length; i++) {
mask[i] = -1;
}
AssemblyPatternBlock res = new AssemblyPatternBlock(offset, mask, vals);
return res;
}
/**
* Convert the given long to a pattern block (having offset 0 and a full mask)
* @note The result will be 8 bytes in length
* @param value the value to convert
* @return the pattern block containing the big-endian representation of the value
*/
public static AssemblyPatternBlock fromLong(long value) {
byte[] mask = new byte[8];
byte[] vals = new byte[8];
for (int i = vals.length; i >= 0; i--) {
mask[i] = -1;
vals[i] = (byte) (value & 0xff);
value >>= 8;
}
AssemblyPatternBlock res = new AssemblyPatternBlock(0, mask, vals);
return res;
}
/**
* Convert the given masked long to a pattern block (having offset 0)
* @note The result will be 8 bytes in length
* @param ml the masked long, whose values and mask to convert
* @return the pattern block containing the big-endian representation of the value
*/
public static AssemblyPatternBlock fromMaskedLong(MaskedLong ml) {
byte[] mask = new byte[8];
byte[] vals = new byte[8];
long lmask = ml.getMask();
long value = ml.longValue();
for (int i = vals.length; i >= 0; i--) {
mask[i] = (byte) (lmask & 0xff);
vals[i] = (byte) (value & 0xff);
lmask >>= 8;
value >>= 8;
}
AssemblyPatternBlock res = new AssemblyPatternBlock(0, mask, vals);
return res;
}
/**
* Convert a string representation to a pattern block
* @see StringUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String)
* @param str the string to convert
* @return the resulting pattern block
*/
public static AssemblyPatternBlock fromString(String str) {
if ("[]".equals(str)) {
return new AssemblyPatternBlock(0, new byte[0], new byte[0]);
}
int pos = 0;
int offset = 0;
// Compute the offset, byte consuming SS:
while (str.regionMatches(pos, SHIFT_STR, 0, SHIFT_STR.length())) {
pos += SHIFT_STR.length();
offset++;
}
if (str.regionMatches(pos, SHIFT_STR_END, 0, SHIFT_STR_END.length())) {
return new AssemblyPatternBlock(offset, new byte[0], new byte[0]);
}
// Compute the length by counting the colons
int length = 1;
for (int p = pos; p < str.length();) {
int newpos = str.indexOf(':', p);
if (newpos == -1) {
break;
}
length++;
p = newpos + 1;
}
// Convert the bytes
// TODO: Optimize this some
byte[] mask = new byte[length];
byte[] vals = new byte[length];
AtomicLong msk = new AtomicLong();
AtomicLong val = new AtomicLong();
int i = 0;
for (String hex : str.split(":")) {
NumericUtilities.convertHexStringToMaskedValue(msk, val, hex, 2, 0, null);
mask[i] = (byte) msk.get();
vals[i] = (byte) val.get();
i++;
}
return new AssemblyPatternBlock(offset, mask, vals);
}
/**
* Convert a block from a disjoint pattern into an assembly pattern block
* @param pat the pattern to convert
* @param context true to select the context block, false to select the instruction block
* @return the converted pattern block
*/
public static AssemblyPatternBlock fromPattern(DisjointPattern pat, int minLen,
boolean context) {
PatternBlock block = pat.getBlock(context);
if (block == null || block.alwaysTrue()) {
return new AssemblyPatternBlock(0, minLen);
}
if (block.alwaysFalse()) {
return null;
}
int offset = block.getOffset();
int nzlen = Math.max(block.getLength(), minLen) - offset;
int[] vec = block.getMaskVector();
ByteBuffer buf = ByteBuffer.allocate(vec.length * 4);
int datlen = Math.min(nzlen, buf.capacity());
for (int i = 0; i < vec.length; i++) {
buf.putInt(i * 4, vec[i]);
}
byte[] mask = new byte[nzlen];
for (int i = 0; i < datlen; i++) {
mask[i] = buf.get(i);
}
vec = block.getValueVector();
for (int i = 0; i < vec.length; i++) {
buf.putInt(i * 4, vec[i]);
}
byte[] vals = new byte[nzlen];
for (int i = 0; i < datlen; i++) {
vals[i] = buf.get(i);
}
return new AssemblyPatternBlock(offset, mask, vals);
}
/**
* Encode the given masked long into a pattern block as specified by a given token field
* @param tf the token field specifying the location of the value to encode
* @param val the value to encode
* @return the pattern block with the encoded value
*/
public static AssemblyPatternBlock fromTokenField(TokenField tf, MaskedLong val) {
int size = tf.getByteEnd() - tf.getByteStart() + 1;
val = val.mask(tf.maxValue());
try {
val = val.invShiftRightLogical(tf.getShift());
}
catch (SolverException e) {
throw new AssertionError(e);
}
if (!tf.isBigEndian()) {
val = val.byteSwap(size);
}
byte[] mask = new byte[size];
byte[] vals = new byte[size];
long lmsk = val.getMask();
long lval = val.longValue();
for (int i = size - 1; i >= 0; i--) {
mask[i] = (byte) (lmsk & 0xff);
vals[i] = (byte) (lval & 0xff);
lmsk >>= 8;
lval >>= 8;
}
return new AssemblyPatternBlock(tf.getByteStart(), mask, vals);
}
/**
* Encode the given masked long into a pattern block as specified by a given context field
* @param cf the context field specifying the location of the value to encode
* @param val the value to encode
* @return the pattern block with the encoded value
*/
public static AssemblyPatternBlock fromContextField(ContextField cf, MaskedLong val) {
int size = cf.getByteEnd() - cf.getByteStart() + 1;
val = val.mask(cf.maxValue());
try {
val = val.invShiftRightLogical(cf.getShift());
}
catch (SolverException e) {
throw new AssertionError(e);
}
// Context does not have variable endianness
byte[] mask = new byte[size];
byte[] vals = new byte[size];
long lmsk = val.getMask();
long lval = val.longValue();
for (int i = size - 1; i >= 0; i--) {
mask[i] = (byte) (lmsk & 0xff);
vals[i] = (byte) (lval & 0xff);
lmsk >>= 8;
lval >>= 8;
}
return new AssemblyPatternBlock(cf.getByteStart(), mask, vals);
}
/**
* Convert a register value into a pattern block
* @param rv the register value
* @return the pattern block
*
* This is used primarily to compute default context register values, and pass them into an
* assembler.
*/
public static AssemblyPatternBlock fromRegisterValue(RegisterValue rv) {
byte[] mb = rv.toBytes();
byte[] mask = new byte[mb.length / 2];
byte[] vals = new byte[mb.length / 2];
System.arraycopy(mb, 0, mask, 0, mb.length / 2);
System.arraycopy(mb, mb.length / 2, vals, 0, mb.length / 2);
return new AssemblyPatternBlock(0, mask, vals);
}
/**
* Allocate a fully-undefined pattern block of the given length
* @param length the length in bytes
* @return the block of all unknown bits
*/
public static AssemblyPatternBlock fromLength(int length) {
byte[] mask = new byte[length];
byte[] vals = new byte[length];
return new AssemblyPatternBlock(0, mask, vals);
}
/**
* Duplicate this pattern block
* @return the duplicate
*/
public AssemblyPatternBlock copy() {
return new AssemblyPatternBlock(offset, Arrays.copyOf(mask, mask.length),
Arrays.copyOf(vals, vals.length));
}
/**
* Get the length (plus the offset) of this pattern block
* @return the total length
*/
public int length() {
return offset + mask.length;
}
/**
* Shift, i.e., increase the offset of, this pattern block
* @param amt the amount to shift right
* @return the shifted pattern block
*/
public AssemblyPatternBlock shift(int amt) {
if (amt == 0) {
return this;
}
return new AssemblyPatternBlock(this.offset + amt, mask, vals);
}
/**
* Truncate (unshift) this pattern block by removing bytes from the left
* @param amt the amount to truncate or shift left
* @return the truncated pattern block
*/
public AssemblyPatternBlock truncate(int amt) {
if (amt == 0) {
return this;
}
if (offset >= amt) {
return new AssemblyPatternBlock(this.offset - amt, mask, vals);
}
int toCut = amt - offset;
// This circumstance seems unsettling, but I think it's correct
if (toCut >= this.mask.length) {
return AssemblyPatternBlock.nop();
}
byte[] newMask = Arrays.copyOfRange(this.mask, toCut, this.mask.length);
byte[] newVals = Arrays.copyOfRange(this.vals, toCut, this.vals.length);
return new AssemblyPatternBlock(0, newMask, newVals);
}
/**
* Combine this pattern block with another given block
*
* Two blocks can be combined in their corresponding defined bits agree. When blocks are
* combined, their bytes are aligned according to their shifts, and the defined bits are taken
* from either block. If neither block defines a bit (i.e., the mask bit at that position is
* {@code 0} for both input blocks, then the output has an undefined bit in the corresponding
* position. If both blocks define the bit, but they have opposite values, then the result is
* an error.
* @param that the other block
* @return the new combined block, or null if the blocks disagree for any bit
*/
public AssemblyPatternBlock combine(AssemblyPatternBlock that) {
int newOffset = Math.min(this.offset, that.offset);
int buflen = Math.max(this.length(), that.length()) - newOffset;
byte[] cmsk = new byte[buflen]; // To check for conflicts;
int diff = this.offset - newOffset;
for (int i = 0; i < this.mask.length; i++) {
cmsk[diff + i] = this.mask[i];
}
diff = that.offset - newOffset;
for (int i = 0; i < that.mask.length; i++) {
cmsk[diff + i] &= that.mask[i];
}
byte[] chek = new byte[buflen];
diff = this.offset - newOffset;
for (int i = 0; i < this.vals.length; i++) {
chek[diff + i] = (byte) (cmsk[diff + i] & this.vals[i]);
}
diff = that.offset - newOffset;
for (int i = 0; i < that.vals.length; i++) {
if (chek[diff + i] != (byte) (cmsk[diff + i] & that.vals[i])) {
return null;
}
}
byte[] newMask = new byte[buflen];
byte[] newVals = new byte[buflen];
diff = this.offset - newOffset;
for (int i = 0; i < this.mask.length; i++) {
newMask[diff + i] = this.mask[i];
newVals[diff + i] = this.vals[i];
}
diff = that.offset - newOffset;
for (int i = 0; i < that.mask.length; i++) {
newMask[diff + i] |= that.mask[i];
newVals[diff + i] |= that.vals[i];
}
return new AssemblyPatternBlock(newOffset, newMask, newVals);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < offset; i++) {
sb.append(SHIFT_STR);
}
if (mask.length == 0) {
if (sb.length() == 0) {
return "[]";
}
return sb.substring(0, sb.length() - 1);
}
for (int i = 0; i < mask.length; i++) {
if (i != 0) {
sb.append(':');
}
sb.append(NumericUtilities.convertMaskedValueToHexString(mask[i], vals[i], 2, false, 0,
null));
}
return sb.toString();
}
@Override
public int hashCode() {
int result = offset;
for (int i = 0; i < mask.length; i++) {
result *= 31;
result += mask[i];
result *= 31;
result += vals[i];
}
return result;
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof AssemblyPatternBlock)) {
return false;
}
AssemblyPatternBlock that = (AssemblyPatternBlock) obj;
int ckOffset = Math.min(this.offset, that.offset);
int length = Math.max(this.length(), that.length());
for (int i = ckOffset; i < length; i++) {
if (checkRead(this.mask, i - this.offset, 0) != checkRead(that.mask, i - that.offset,
0)) {
return false;
}
if (checkRead(this.vals, i - this.offset, 0) != checkRead(that.vals, i - that.offset,
0)) {
return false;
}
}
return true;
}
@Override
public int compareTo(AssemblyPatternBlock that) {
int result;
result = this.offset - that.offset;
if (result != 0) {
return result;
}
result = SleighUtil.compareArrays(this.mask, that.mask);
if (result != 0) {
return result;
}
result = SleighUtil.compareArrays(this.vals, that.vals);
if (result != 0) {
return result;
}
return 0;
}
/**
* Read an array, returning a default if the index is out of bounds
* @param arr the array to read
* @param idx the index
* @param def the default value
* @return the read value
*/
protected static int checkRead(byte[] arr, int idx, int def) {
// When there's an offset, idx can be < 0
if (idx >= arr.length || idx < 0) {
return 0xff & def;
}
return 0xff & arr[idx];
}
/**
* Encode the given value into a copy of this pattern block as specified by a context operation
*
* @note this method is given as a special operation, instead of a conversion factory method,
* because this is a write operation, not a combine operation. As such, the bits (including
* undefined bits) replace the bits in the existing pattern block. Were this a conversion
* method, we would lose the distinction between unknown bits being written, and bits whose
* values are simply not included in the write.
*
* @param cop the context operation specifying the location of the value to encode
* @param val the value to encode
* @return the new copy with the encoded value
*/
public AssemblyPatternBlock writeContextOp(ContextOp cop, MaskedLong val) {
// Do not consider compatibility (like in combine). Just overwrite.
// This includes overwriting knowns with unknowns.
// Take out of object early to reduce garbage.
long vval = val.longValue();
long vmsk = val.getMask();
long cmsk = cop.getMask() & 0xffffffffL;
vval <<= cop.getShift();
vmsk <<= cop.getShift();
vval &= cmsk;
vmsk &= cmsk;
int idx = cop.getWordIndex();
int newOffset = Math.min(idx * 4, this.offset);
int length = Math.max(idx * 4 + 4, this.length());
byte[] newMask = new byte[length - newOffset];
byte[] newVals = new byte[length - newOffset];
System.arraycopy(this.mask, 0, newMask, this.offset - newOffset, this.mask.length);
System.arraycopy(this.vals, 0, newVals, this.offset - newOffset, this.vals.length);
for (int i = 3; i >= 0; i--) {
newMask[idx * 4 + i] &= ~cmsk & 0xff;
newMask[idx * 4 + i] |= vmsk & 0xff;
newVals[idx * 4 + i] &= ~cmsk & 0xff;
newVals[idx * 4 + i] |= vval & 0xff;
vval >>= 8;
vmsk >>= 8;
cmsk >>= 8;
}
return new AssemblyPatternBlock(newOffset, newMask, newVals);
}
/**
* Read the input of a context operation from this pattern block
* @param cop the context operation
* @return the decoded input, as a masked value
*/
public MaskedLong readContextOp(ContextOp cop) {
// Pull defined bits from the pattern block that also fall within the "mask" of the op.
// It bothers me, this int => long conversion. Can a context operation not read more
// than 32 bits?
int idx = cop.getWordIndex();
long cmsk = cop.getMask() & 0xffffffffL;
long lmsk = 0;
for (int i = 0; i < 4; i++) {
lmsk <<= 8;
lmsk |= checkRead(mask, idx * 4 + i - offset, 0);
}
long rmsk = lmsk & cmsk; // resulting mask
if (rmsk == 0) {
return MaskedLong.UNKS;
}
long rval = 0; // resulting value
for (int i = 0; i < 4; i++) {
rval <<= 8;
rval |= checkRead(vals, idx * 4 + i - offset, 0);
}
// Shift the two separately to spare an object instantiation.
return MaskedLong.fromMaskAndValue(rmsk >>> cop.getShift(), rval >>> cop.getShift());
}
/**
* Set all bits read by a given context operation to unknown
* @param cop the context operation
* @return the result
*
* This is used during resolution to remove a context requirement passed upward by a child.
* When a parent constructor writes the required value to the context register, that
* requirement need not be passed further upward, since the write satisfies the requirement.
*/
public AssemblyPatternBlock maskOut(ContextOp cop) {
byte[] newMask = Arrays.copyOf(this.mask, this.mask.length);
byte[] newVals = Arrays.copyOf(this.vals, this.vals.length);
int idx = cop.getWordIndex();
int imsk = cop.getMask();
for (int i = 3; i >= 0; i--) {
byte bmsk = (byte) ~(imsk & 0xff); // Inverse: Getting ready to unset
int index = idx * 4 + i - offset;
// feels a little hacky, but if non-existent is assumed zero,
// this should be fine
if (index < newMask.length && index >= 0) {
newMask[index] &= bmsk;
newVals[index] &= bmsk; // for good measure
}
imsk >>= 8;
}
return new AssemblyPatternBlock(offset, newMask, newVals);
}
/**
* Get the values array
* @return the array
*/
public byte[] getVals() {
return vals;
}
/**
* Get the mask array
* @return the array
*/
public byte[] getMask() {
return mask;
}
/**
* Get the number of undefined bytes preceding the mask and values arrays
* @return the offset
*/
public int getOffset() {
return offset;
}
/**
* Decode {@code} len value bytes in big-endian format, beginning at {@code start}
* @param start the first byte to decode
* @param len the number of bytes to decode
* @return the decoded long
*/
public long readValBytes(int start, int len) {
long res = 0;
for (int i = 0; i < len; i++) {
res <<= 8;
int index = start + i - offset;
if (0 <= index && index < vals.length) {
res |= 0xff & vals[index];
}
}
return res;
}
/**
* Decode {@code} len mask bytes in big-endian format, beginning at {@code start}
* @param start the first byte to decode
* @param len the number of bytes to decode
* @return the decoded long
*/
public long readMaskBytes(int start, int len) {
long res = 0;
for (int i = 0; i < len; i++) {
res <<= 8;
int index = start + i - offset;
if (0 <= index && index < mask.length) {
res |= 0xff & mask[index];
}
}
return res;
}
/**
* Decode {@code} len bytes (values and mask) in big-endian format, beginning at {@code start}
* @param start the first byte to decode
* @param len the number of bytes to decode
* @return the decoded masked long
*/
public MaskedLong readBytes(int start, int len) {
return MaskedLong.fromMaskAndValue(readMaskBytes(start, len), readValBytes(start, len));
}
/**
* Fill all unknown bits with {@code 0} bits
* @return the result
*/
public AssemblyPatternBlock fillMask() {
byte[] newMask = new byte[this.mask.length];
for (int i = 0; i < newMask.length; i++) {
newMask[i] = (byte) 0xff;
}
return new AssemblyPatternBlock(offset, newMask, vals);
}
/**
* Check if there are any unknown bits
* @return true if no unknown bits are present, false otherwise
*/
public boolean isFullMask() {
if (offset != 0) {
return false;
}
for (byte element : mask) {
if (element != (byte) 0xff) {
return false;
}
}
return true;
}
/**
* Check if all bits are {@code 0} bits
* @return true if all are {@code 0}, false otherwise
*/
public boolean isZero() {
if (!isFullMask()) {
return false;
}
for (byte val : vals) {
if (val != 0) {
return false;
}
}
return true;
}
/**
* Decode the values array into a {@link BigInteger} of length {@code n} bytes
*
* The array is either truncated or zero-extended <em>on the right</em> to match the requested
* number of bytes, then decoded in big-endian format as an unsigned value.
* @param n the number of bytes (left-to-right) to decode
* @return the decoded big integer
*/
public BigInteger toBigInteger(int n) {
BigInteger res = new BigInteger(1, vals);
if (n < length()) {
res = res.shiftRight((length() - n) * 8);
}
else {
res = res.shiftLeft((n - length()) * 8);
}
return res;
}
/**
* Counts the total number of known bits in the pattern
*
* At a slightly lower level, counts the number of 1-bits in the mask.
* @return the count
*/
public int getSpecificity() {
int result = 0;
for (byte element : mask) {
result += Integer.bitCount(0xff & element);
}
return result;
}
public int countPossibleVals() {
int count0 = 0;
for (byte element : mask) {
byte m = element;
for (int j = 0; j < 8; j++) {
if ((m & 0x80) == 0) {
count0++;
}
m <<= 1;
}
}
return 1 << count0;
}
/**
* Get an iterable over all the possible fillings of the value, given a partial mask
*
* This is meant to be used idiomatically, as in an enhanced for loop:
*
* <pre>
* {@code
* for (byte[] val : pattern.possibleVals()) {
* System.out.println(format(val));
* }
* }
* </pre>
*
* @note A single byte array is instantiated with the call to {@link Iterable#iterator()}. Each
* call to {@link Iterator#next()} modifies the one byte array and returns it. As such, if you
* intend to preserve the value in the array for later use, you <em>must</em> make a copy.
* @return the iterable.
*/
public Iterable<byte[]> possibleVals() {
return () -> {
byte[] cur = new byte[vals.length];
System.arraycopy(vals, 0, cur, 0, vals.length);
final int max = countPossibleVals();
return new Iterator<byte[]>() {
int c = 0;
@Override
public boolean hasNext() {
return c < max;
}
@Override
public byte[] next() {
int cm = max >> 1;
for (int i = 0; i < mask.length; i++) {
byte m = mask[i];
for (int j = 0; j < 8; j++) {
if ((m & 0x80) == 0) {
byte b = (byte) (0x80 >> j);
if ((c & cm) == 0) {
cur[i] &= ~b;
}
else {
cur[i] |= b;
}
cm >>= 1;
}
m <<= 1;
}
}
c++;
return cur;
}
};
};
}
}

View file

@ -0,0 +1,261 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.List;
import java.util.Map;
import com.google.common.collect.ImmutableList;
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
/**
* The (often intermediate) result of assembly
*
* These may represent a successful construction ({@link AssemblyResolvedConstructor}, a future
* field ({@link AssemblyResolvedBackfill}), or an error ({@link AssemblyResolvedError}).
*
* This class also provides the static factory methods for constructing any of its subclasses.
*/
public abstract class AssemblyResolution implements Comparable<AssemblyResolution> {
protected final String description;
protected final ImmutableList<? extends AssemblyResolution> children;
private boolean hashed = false;
private int hash;
@Override
public int hashCode() {
if (!hashed) {
hash = computeHash();
hashed = true;
}
return hash;
}
protected abstract int computeHash();
/**
* Construct a resolution
* @param description a textual description used as part of {@link #toString()}
* @param children for record keeping, any children used in constructing this resolution
*/
AssemblyResolution(String description, ImmutableList<? extends AssemblyResolution> children) {
this.description = description;
this.children = children == null ? ImmutableList.of() : children;
}
/* ********************************************************************************************
* Static factory methods
*/
/**
* Build the result of successfully resolving a SLEIGH constructor
*
* @note This is not used strictly for resolved SLEIGH constructors. It may also be used to
* store intermediates, e.g., encoded operands, during constructor resolution.
* @param ins the instruction pattern block
* @param ctx the context pattern block
* @param description a description of the resolution
* @param sel the children selected to resolve this constructor, or null
* @return the new resolution
*/
public static AssemblyResolvedConstructor resolved(AssemblyPatternBlock ins,
AssemblyPatternBlock ctx, String description,
ImmutableList<? extends AssemblyResolution> sel) {
return new AssemblyResolvedConstructor(description, sel, ins, ctx, null, null);
}
/**
* Build an instruction-only successful resolution result
* @see #resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List)
* @param ins the instruction pattern block
* @param description a description of the resolution
* @param children the children selected to resolve this constructor, or null
* @return the new resolution
*/
public static AssemblyResolvedConstructor instrOnly(AssemblyPatternBlock ins,
String description, ImmutableList<AssemblyResolution> children) {
return resolved(ins, AssemblyPatternBlock.nop(), description, children);
}
/**
* Build a context-only successful resolution result
* @see #resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List)
* @param ctx the context pattern block
* @param description a description of the resolution
* @param children the children selected to resolve this constructor, or null
* @return the new resolution
*/
public static AssemblyResolvedConstructor contextOnly(AssemblyPatternBlock ctx,
String description, ImmutableList<AssemblyResolution> children) {
return resolved(AssemblyPatternBlock.nop(), ctx, description, children);
}
/**
* Build a successful resolution result from a SLEIGH constructor's patterns
* @param pat the constructor's pattern
* @param description a description of the resolution
* @return the new resolution
*/
public static AssemblyResolvedConstructor fromPattern(DisjointPattern pat, int minLen,
String description) {
AssemblyPatternBlock ins = AssemblyPatternBlock.fromPattern(pat, minLen, false);
AssemblyPatternBlock ctx = AssemblyPatternBlock.fromPattern(pat, 0, true);
return resolved(ins, ctx, description, null);
}
/**
* Build a backfill record to attach to a successful resolution result
* @param exp the expression depending on a missing symbol
* @param goal the desired value of the expression
* @param res the resolution result for child constructors
* @param inslen the length of instruction portion expected in the future solution
* @param description a description of the backfill record
* @return the new record
*/
public static AssemblyResolvedBackfill backfill(PatternExpression exp, MaskedLong goal,
Map<Integer, Object> res, int inslen, String description) {
return new AssemblyResolvedBackfill(description, exp, goal, res, inslen, 0);
}
/**
* Obtain a new "blank" resolved SLEIGH constructor record
* @param description a description of the resolution
* @param sel any children that will be involved in populating this record
* @return the new resolution
*/
public static AssemblyResolvedConstructor nop(String description,
ImmutableList<? extends AssemblyResolution> sel) {
return resolved(AssemblyPatternBlock.nop(), AssemblyPatternBlock.nop(), description, sel);
}
/**
* Build an error resolution record
* @param error a description of the error
* @param description a description of what the resolver was doing when the error ocurred
* @param children any children involved in generating the error
* @return the new resolution
*/
public static AssemblyResolvedError error(String error, String description,
ImmutableList<? extends AssemblyResolution> children) {
return new AssemblyResolvedError(description, children, error);
}
/**
* Build an error resolution record, based on an intermediate SLEIGH constructor record
* @param error a description of the error
* @param res the constructor record that was being populated when the error ocurred
* @return the new error resolution
*/
public static AssemblyResolution error(String error, AssemblyResolvedConstructor res) {
return error(error, res.description, res.children);
}
/* ********************************************************************************************
* Abstract methods
*/
/**
* Check if this record describes an error
* @return true if the record is an error
*/
public abstract boolean isError();
/**
* Check if this record describes a backfill
* @return true if the record is a backfill
*/
public abstract boolean isBackfill();
/**
* Display the resolution result in one line (omitting child details)
* @return the display description
*/
protected abstract String lineToString();
/* ********************************************************************************************
* Misc
*/
/**
* Get the child portion of {@link #toString()}
*
* If a subclass has another, possible additional, notion of children that it would like to
* include in {@link #toString()}, it must override this method.
* @see #hasChildren()
* @param indent the current indentation
* @return the indented description for each child on its own line
*/
protected String childrenToString(String indent) {
StringBuilder sb = new StringBuilder();
for (AssemblyResolution child : children) {
sb.append(child.toString(indent) + "\n");
}
return sb.substring(0, sb.length() - 1);
}
/**
* Used only by parents: get a multi-line description of this record, indented
* @param indent the current indentation
* @return the indented description
*/
public String toString(String indent) {
StringBuilder sb = new StringBuilder();
sb.append(indent);
sb.append(lineToString());
if (hasChildren()) {
sb.append(":\n");
String newIndent = indent + " ";
sb.append(childrenToString(newIndent));
}
return sb.toString();
}
/**
* Describe this record including indented children, grandchildren, etc., each on its own line
*/
@Override
public String toString() {
return toString("");
}
@Override
public int compareTo(AssemblyResolution that) {
return this.toString().compareTo(that.toString()); // LAZY
}
/**
* Check if this record has children
*
* If a subclass has another, possibly additional, notion of children that it would like to
* include in {@link #toString()}, it must override this method to return true when such
* children are present.
* @see #childrenToString(String)
* @return true if this record has children
*/
public boolean hasChildren() {
if (children == null) {
return false;
}
if (children.size() == 0) {
return false;
}
return true;
}
}

View file

@ -0,0 +1,91 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.*;
import org.apache.commons.collections4.set.AbstractSetDecorator;
/**
* A set of possible assembly resolutions for a single SLEIGH constructor
*
* Since the assembler works from the leaves up, it unclear in what context a given token appears.
* Thus, every possible encoding is collected and passed upward. As resolution continues, many of
* the possible encodings are pruned out. When the resolver reaches the root, we end up with every
* possible encoding (less some prefixes) of an instruction. This object stores the possible
* encodings, including error records describing the pruned intermediate results.
*/
public class AssemblyResolutionResults extends AbstractSetDecorator<AssemblyResolution> {
protected final Set<AssemblyResolution> resolutions;
/**
* Construct a new (mutable) empty set of resolutions
*/
public AssemblyResolutionResults() {
resolutions = new LinkedHashSet<>();
}
private AssemblyResolutionResults(Set<AssemblyResolution> resolutions) {
this.resolutions = resolutions;
}
/**
* Construct an immutable single-entry set consisting of the one given resolution
*
* @param res the single resolution entry
* @return the new resolution set
*/
public static AssemblyResolutionResults singleton(AssemblyResolvedConstructor rc) {
return new AssemblyResolutionResults(Collections.singleton(rc));
}
@Override
public boolean add(AssemblyResolution ar) {
return resolutions.add(ar);
}
/**
* A synonym for {@link #addAll(Collection)} that accepts only another resolution set
*
* @param that the other set
*/
public void absorb(AssemblyResolutionResults that) {
this.resolutions.addAll(that.resolutions);
}
@Override
public boolean addAll(Collection<? extends AssemblyResolution> c) {
return this.resolutions.addAll(c);
}
/**
* Get an unmodifiable reference to this set
*
* @return the set
*/
public Set<AssemblyResolution> getResolutions() {
return Collections.unmodifiableSet(resolutions);
}
@Override
protected Set<AssemblyResolution> decorated() {
return getResolutions();
}
public boolean remove(AssemblyResolution ar) {
return this.resolutions.remove(ar);
}
}

View file

@ -0,0 +1,142 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.Map;
import ghidra.app.plugin.assembler.sleigh.expr.*;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
/**
* A {@link AssemblyResolution} indicating the need to solve an expression in the future
*
* Such records are collected within a {@link AssemblyResolvedConstructor} and then solved just
* before the final result(s) are assembled. This is typically required by instructions that refer
* to the {@code inst_next} symbol.
*
* @note These are used internally. The user ought never to see these from the assembly API.
*/
public class AssemblyResolvedBackfill extends AssemblyResolution {
protected final PatternExpression exp;
protected final MaskedLong goal;
protected final Map<Integer, Object> res;
protected final int inslen;
protected final int offset;
@Override
protected int computeHash() {
int result = 0;
result += exp.hashCode();
result *= 31;
result += goal.hashCode();
result *= 31;
result += inslen;
result *= 31;
result += offset;
return result;
}
/**
* @see {@link AssemblyResolution#backfill(PatternExpression, MaskedLong, Map, int, String)}
*/
AssemblyResolvedBackfill(String description, PatternExpression exp, MaskedLong goal,
Map<Integer, Object> res, int inslen, int offset) {
super(description, null);
this.exp = exp;
this.goal = goal;
this.res = res;
this.inslen = inslen;
this.offset = offset;
}
/**
* Duplicate this record
* @return the duplicate
*/
AssemblyResolvedBackfill copy() {
AssemblyResolvedBackfill cp =
new AssemblyResolvedBackfill(description, exp, goal, res, inslen, offset);
return cp;
}
/**
* Get the expected length of the instruction portion of the future encoding
*
* This is used to make sure that operands following a to-be-determined encoding are placed
* properly. Even though the actual encoding cannot yet be determined, its length can.
* @return the total expected length (including the offset)
*/
public int getInstructionLength() {
return offset + inslen;
}
@Override
public boolean isError() {
return false;
}
@Override
public boolean isBackfill() {
return true;
}
@Override
protected String lineToString() {
return "Backfill (len:" + inslen + ",off:" + offset + ") " + goal + " := " + exp + " (" +
description + ")";
}
/**
* Shift the back-fill record's "instruction" pattern to the right.
* @param amt the number of bytes to shift the result when solved.
* @return the result
*/
public AssemblyResolvedBackfill shift(int amt) {
return new AssemblyResolvedBackfill(description, exp, goal, res, inslen, offset + amt);
}
/**
* Attempt (again) to solve the expression that generated this backfill record
*
* This will attempt to solve the same expression and goal again, using the same parameters as
* were given to the original attempt, except with additional defined symbols. Typically, the
* symbol that required backfill is {@code inst_next}. This method will not throw
* {@link NeedsBackfillException}, since that would imply the missing symbol(s) from the
* original attempt are still missing. Instead, the method returns an instance of
* {@link AssemblyResolvedError}.
* @param solver a solver, usually the same as the one from the original attempt.
* @param vals the defined symbols, usually the same, but with the missing symbol(s).
* @return the solution result
*/
public AssemblyResolution solve(RecursiveDescentSolver solver, Map<String, Long> vals,
AssemblyResolvedConstructor cur) {
try {
AssemblyResolution ar =
solver.solve(exp, goal, vals, res, cur.truncate(offset), description);
if (ar.isError()) {
return ar;
}
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
return rc.shift(offset);
}
catch (NeedsBackfillException e) {
return AssemblyResolution.error("Solution still requires backfill", description, null);
}
catch (UnsupportedOperationException e) {
return AssemblyResolution.error("Unsupported: " + e.getMessage(), description, null);
}
}
}

View file

@ -0,0 +1,668 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.collections4.IteratorUtils;
import org.apache.commons.collections4.Predicate;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import ghidra.app.plugin.assembler.AssemblySelector;
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
import ghidra.app.plugin.assembler.sleigh.expr.RecursiveDescentSolver;
import ghidra.app.plugin.processors.sleigh.ConstructState;
import ghidra.app.plugin.processors.sleigh.ContextOp;
import ghidra.util.StringUtilities;
/**
* A {@link AssemblyResolution} indicating successful application of a constructor
*
* This is almost analogous to {@link DisjointPattern}, in that is joins an instruction
* {@link AssemblyPatternBlock} with a corresponding context {@link AssemblyPatternBlock}. However,
* this object is mutable, and it collects backfill records, as well as forbidden patterns.
*
* When the applied constructor is from the "instruction" subtable, this represents a fully-
* constructed instruction with required context. All backfill records ought to be resolved and
* applied before the final result is given to the user, i.e., passed into the
* {@link AssemblySelector}. If at any time during the resolution or backfill process, the result
* becomes confined to one of the forbidden patterns, it must be dropped, since the encoding will
* actually invoke a more specific SLEIGH constructor.
*/
public class AssemblyResolvedConstructor extends AssemblyResolution {
protected static final String INS = "ins:";
protected static final String CTX = "ctx:";
protected static final String SEP = ",";
protected final AssemblyPatternBlock ins;
protected final AssemblyPatternBlock ctx;
protected final ImmutableSet<AssemblyResolvedBackfill> backfills;
protected final ImmutableSet<AssemblyResolvedConstructor> forbids;
@Override
protected int computeHash() {
int result = 0;
result += ins.hashCode();
result *= 31;
result += ctx.hashCode();
result *= 31;
result += backfills.hashCode();
result *= 31;
result += forbids.hashCode();
return result;
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof AssemblyResolvedConstructor)) {
return false;
}
AssemblyResolvedConstructor that = (AssemblyResolvedConstructor) obj;
if (!this.ins.equals(that.ins)) {
return false;
}
if (!this.ctx.equals(that.ctx)) {
return false;
}
if (!this.backfills.equals(that.backfills)) {
return false;
}
if (!this.forbids.equals(that.forbids)) {
return false;
}
return true;
}
/**
* @see AssemblyResolution#resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List)
*/
AssemblyResolvedConstructor(String description,
ImmutableList<? extends AssemblyResolution> children, AssemblyPatternBlock ins,
AssemblyPatternBlock ctx, ImmutableSet<AssemblyResolvedBackfill> backfills,
ImmutableSet<AssemblyResolvedConstructor> forbids) {
super(description, children);
this.ins = ins;
this.ctx = ctx;
this.backfills = backfills == null ? ImmutableSet.of() : backfills;
this.forbids = forbids == null ? ImmutableSet.of() : forbids;
}
/**
* Build a new successful SLEIGH constructor resolution from a string representation
*
* This was used primarily in testing, to specify expected results.
* @param str the string representation: "{@code ins:[pattern],ctx:[pattern]}"
* @see StringUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String)
* @param description a description of the resolution
* @param children any children involved in the resolution
* @return the decoded resolution
*/
public static AssemblyResolvedConstructor fromString(String str, String description,
ImmutableList<AssemblyResolution> children) {
AssemblyPatternBlock ins = null;
if (str.startsWith(INS)) {
int end = str.indexOf(SEP);
if (end == -1) {
end = str.length();
}
ins = AssemblyPatternBlock.fromString(str.substring(INS.length(), end));
str = str.substring(end);
if (str.startsWith(SEP)) {
str = str.substring(1);
}
}
AssemblyPatternBlock ctx = null;
if (str.startsWith(CTX)) {
int end = str.length();
ctx = AssemblyPatternBlock.fromString(str.substring(CTX.length(), end));
str = str.substring(end);
}
if (str.length() != 0) {
throw new IllegalArgumentException(str);
}
return AssemblyResolution.resolved(//
ins == null ? AssemblyPatternBlock.nop() : ins,//
ctx == null ? AssemblyPatternBlock.nop() : ctx,//
description, children);
}
/**
* Shift the resolved instruction pattern to the right
*
* This also shifts any backfill and forbidden pattern records.
* @param amt the number of bytes to shift.
* @return the result
*/
public AssemblyResolvedConstructor shift(int amt) {
if (amt == 0) {
return this;
}
AssemblyPatternBlock newIns = this.ins.shift(amt);
// Also shift the attached backfills and forbidden patterns
Set<AssemblyResolvedBackfill> newBackfills = new HashSet<>();
for (AssemblyResolvedBackfill bf : this.backfills) {
newBackfills.add(bf.shift(amt));
}
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>();
for (AssemblyResolvedConstructor f : this.forbids) {
newForbids.add(f.shift(amt));
}
return new AssemblyResolvedConstructor(description, children, newIns, ctx,
ImmutableSet.copyOf(newBackfills), ImmutableSet.copyOf(newForbids));
}
/**
* Truncate (unshift) the resolved instruction pattern from the left
*
* @note This drops all backfill and forbidden pattern records, since this method is typically
* used to read token fields rather than passed around for resolution.
* @param amt the number of bytes to remove from the left
* @return the result
*/
public AssemblyResolvedConstructor truncate(int amt) {
if (amt == 0) {
return this;
}
AssemblyPatternBlock newIns = this.ins.truncate(amt);
return new AssemblyResolvedConstructor("Truncated: " + description, null, newIns, ctx, null,
null);
}
/**
* Check if the current encoding is forbidden by one of the attached patterns
*
* The pattern become forbidden if this encoding's known bits are an overset of any forbidden
* pattern's known bits.
* @return false if the pattern is forbidden (and thus in error), true if permitted
*/
public AssemblyResolution checkNotForbidden() {
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>();
for (AssemblyResolvedConstructor f : this.forbids) {
AssemblyResolvedConstructor check = this.combine(f);
if (null == check) {
continue;
}
newForbids.add(f);
if (check.bitsEqual(this)) {
// The result would be disassembled by a more-specific constructor.
return AssemblyResolution.error("The result is forbidden by " + f, this);
}
}
return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills,
ImmutableSet.copyOf(newForbids));
}
/**
* Check if this and another resolution have equal encodings
*
* This is like {@link #equals(Object)}, but it ignores backfills records and forbidden
* patterns.
* @param that the other resolution
* @return true if both have equal encodings
*/
protected boolean bitsEqual(AssemblyResolvedConstructor that) {
return this.ins.equals(that.ins) && this.ctx.equals(that.ctx);
}
/**
* Combine the encodings and backfills of the given resolution into this one
*
* This combines corresponding pattern blocks (assuming they agree), collects backfill
* records, and collects forbidden patterns.
* @param that the other resolution
* @return the result if successful, or null
*/
public AssemblyResolvedConstructor combine(AssemblyResolvedConstructor that) {
// Not really a backfill, but I would like to re-use code
return combineLessBackfill(that, null);
}
/**
* Combine a backfill result
* @param that the result from backfilling
* @param bf the resolved backfilled record
* @return the result if successful, or null
*
* When a backfill is successful, the result should be combined with the owning resolution. In
* addition, for bookkeeping's sake, the resolved record should be removed from the list of
* backfills.
*/
protected AssemblyResolvedConstructor combineLessBackfill(AssemblyResolvedConstructor that,
AssemblyResolvedBackfill bf) {
AssemblyPatternBlock newIns = this.ins.combine(that.ins);
if (newIns == null) {
return null;
}
AssemblyPatternBlock newCtx = this.ctx.combine(that.ctx);
if (newCtx == null) {
return null;
}
Set<AssemblyResolvedBackfill> newBackfills = new HashSet<>(this.backfills);
newBackfills.addAll(that.backfills);
if (bf != null) {
newBackfills.remove(bf);
}
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>(this.forbids);
newForbids.addAll(that.forbids);
return new AssemblyResolvedConstructor(description, children, newIns, newCtx,
ImmutableSet.copyOf(newBackfills), ImmutableSet.copyOf(newForbids));
}
/**
* Combine the given backfill record into this resolution
* @param bf the backfill record
* @return the result
*/
public AssemblyResolvedConstructor combine(AssemblyResolvedBackfill bf) {
Set<AssemblyResolvedBackfill> newBackfills = new HashSet<>(this.backfills);
newBackfills.add(bf);
return new AssemblyResolvedConstructor(description, children, ins, ctx,
ImmutableSet.copyOf(newBackfills), forbids);
}
/**
* Create a new resolution from this one with the given forbidden patterns recorded
* @param more the additional forbidden patterns to record
* @return the new resolution
*/
public AssemblyResolvedConstructor withForbids(Set<AssemblyResolvedConstructor> more) {
Set<AssemblyResolvedConstructor> combForbids = new HashSet<>(this.forbids);
combForbids.addAll(more);
return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills,
ImmutableSet.copyOf(more));
}
/**
* Create a copy of this resolution with a new description
* @param desc the new description
* @return the copy
*/
public AssemblyResolvedConstructor withDescription(String desc) {
return new AssemblyResolvedConstructor(desc, children, ins, ctx, backfills, forbids);
}
/**
* Encode the given value into the context block as specified by an operation
* @param cop the context operation specifying the location of the value to encode
* @param val the masked value to encode
* @return the result
*
* This is the forward (as in disassembly) direction of applying context operations. The
* pattern expression is evaluated, and the result is written as specified.
*/
public AssemblyResolvedConstructor writeContextOp(ContextOp cop, MaskedLong val) {
AssemblyPatternBlock newCtx = this.ctx.writeContextOp(cop, val);
return new AssemblyResolvedConstructor(description, children, ins, newCtx, backfills,
forbids);
}
/**
* Decode the value from the context located where the given context operation would write
*
* This is used to read the value from the left-hand-side "variable" of a context operation.
* It seems backward, because it is. When assembling, the right-hand-side expression of a
* context operation must be solved. This means the "variable" is known from the context(s) of
* the resolved children constructors. The value read is then used as the goal in solving the
* expression.
* @param cop the context operation whose "variable" to read.
* @return the masked result.
*/
public MaskedLong readContextOp(ContextOp cop) {
return ctx.readContextOp(cop);
}
/**
* Duplicate this resolution, with additional description text appended
* @param append the text to append
* @return the duplicate
* @note An additional separator {@code ": "} is inserted
*/
public AssemblyResolvedConstructor copyAppendDescription(String append) {
AssemblyResolvedConstructor cp = new AssemblyResolvedConstructor(
description + ": " + append, children, ins.copy(), ctx.copy(), backfills, forbids);
return cp;
}
/**
* Set all bits read by a given context operation to unknown
* @param cop the context operation
* @return the result
* @see AssemblyPatternBlock#maskOut(ContextOp)
*/
public AssemblyResolvedConstructor maskOut(ContextOp cop) {
AssemblyPatternBlock newCtx = this.ctx.maskOut(cop);
return new AssemblyResolvedConstructor(description, children, ins, newCtx, backfills,
forbids);
}
/**
* Apply as many backfill records as possible
*
* Each backfill record is resolved in turn, if the record cannot be resolved, it remains
* listed. If the record can be resolved, but it conflicts, an error record is returned. Each
* time a record is resolved and combined successfully, all remaining records are tried again.
* The result is the combined resolved backfills, with only the unresolved backfill records
* listed.
* @param solver the solver, usually the same as the original attempt to solve.
* @param vals the values.
* @return the result, or an error.
*/
public AssemblyResolution backfill(RecursiveDescentSolver solver, Map<String, Long> vals) {
if (!hasBackfills()) {
return this;
}
AssemblyResolvedConstructor res = this;
loop: while (true) {
for (AssemblyResolvedBackfill bf : res.backfills) {
AssemblyResolution ar = bf.solve(solver, vals, this);
if (ar.isError()) {
continue;
}
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
AssemblyResolvedConstructor check = res.combineLessBackfill(rc, bf);
if (check == null) {
return AssemblyResolution.error("Conflict: Backfill " + bf.description, res);
}
res = check;
continue loop;
}
return res;
}
}
@Override
public String lineToString() {
return dumpConstructorTree() + ":" + INS + ins + SEP + CTX + ctx + " (" + description + ")";
}
/**
* Check if this resolution has pending backfills to apply
* @return true if there are backfills
*/
public boolean hasBackfills() {
return !backfills.isEmpty();
}
/**
* Check if this resolution includes forbidden patterns
* @return true if there are forbidden patterns
*/
private boolean hasForbids() {
return !forbids.isEmpty();
}
/**
* Solve and apply context changes in reverse to forbidden patterns
*
* To avoid circumstances where a context change during disassembly would invoke a more
* specific subconstructor than was used to assembly the instruction, we must solve the
* forbidden patterns in tandem with the overall resolution. If the context of any forbidden
* pattern cannot be solved, we simply drop the forbidden pattern -- the lack of a solution
* implies there is no way the context change could produce the forbidden pattern.
* @param sem the constructor whose context changes to solve
* @param vals any defined symbols
* @param opvals the operand values
* @return the result
* @see AssemblyConstructorSemantic#solveContextChanges(AssemblyResolvedConstructor, Map, Map)
*/
public AssemblyResolvedConstructor solveContextChangesForForbids(
AssemblyConstructorSemantic sem, Map<String, Long> vals, Map<Integer, Object> opvals) {
if (!hasForbids()) {
return this;
}
Set<AssemblyResolvedConstructor> newForbids = new HashSet<>();
for (AssemblyResolvedConstructor f : this.forbids) {
AssemblyResolution t = sem.solveContextChanges(f, vals, opvals);
if (!(t instanceof AssemblyResolvedConstructor)) {
// Can't be solved, so it can be dropped
continue;
}
newForbids.add((AssemblyResolvedConstructor) t);
}
return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills,
ImmutableSet.copyOf(newForbids));
}
/**
* Get the length of the instruction encoding
*
* This is used to ensure each operand is encoded at the correct offset
* @return the length of the instruction block
*
* @note this DOES include the offset
* @note this DOES include pending backfills
*/
public int getInstructionLength() {
int inslen = ins.length();
for (AssemblyResolvedBackfill bf : backfills) {
inslen = Math.max(inslen, bf.getInstructionLength());
}
return inslen;
}
/**
* Get the length of the instruction encoding, excluding trailing undefined bytes
* @return the length of the defined bytes in the instruction block
*
* @note this DOES include the offset
* @note this DOES NOT include pending backfills
*/
public int getDefinedInstructionLength() {
byte[] imsk = ins.getMask();
int i;
for (i = imsk.length - 1; i >= 0; i--) {
if (imsk[i] != 0) {
break;
}
}
return ins.getOffset() + i + 1;
}
/**
* Get the instruction block
* @return the instruction block
*/
public AssemblyPatternBlock getInstruction() {
return ins;
}
/**
* Get the context block
* @return the context block
*/
public AssemblyPatternBlock getContext() {
return ctx;
}
/**
* Decode a portion of the instruction block
* @param start the first byte to decode
* @param len the number of bytes to decode
* @return the read masked value
* @see AssemblyPatternBlock#readBytes(int, int)
*/
public MaskedLong readInstruction(int start, int len) {
return ins.readBytes(start, len);
}
/**
* Decode a portion of the context block
* @param start the first byte to decode
* @param len the number of bytes to decode
* @return the read masked value
* @see AssemblyPatternBlock#readBytes(int, int)
*/
public MaskedLong readContext(int start, int len) {
return ctx.readBytes(start, len);
}
@Override
public boolean isError() {
return false;
}
@Override
public boolean isBackfill() {
return false;
}
@Override
public boolean hasChildren() {
return super.hasChildren() || hasBackfills() || hasForbids();
}
@Override
protected String childrenToString(String indent) {
StringBuilder sb = new StringBuilder();
if (super.hasChildren()) {
sb.append(super.childrenToString(indent) + "\n");
}
for (AssemblyResolvedBackfill bf : backfills) {
sb.append(indent);
sb.append("backfill: " + bf + "\n");
}
for (AssemblyResolvedConstructor f : forbids) {
sb.append(indent);
sb.append("forbidden: " + f + "\n");
}
return sb.substring(0, sb.length() - 1);
}
protected static final Pattern pat = Pattern.compile("line(\\d*)");
/**
* Used for testing and diagnostics: list the constructor line numbers used to resolve this
* encoding
*
* This includes braces to describe the tree structure
* @see ConstructState#dumpConstructorTree()
* @return the constructor tree
*/
public String dumpConstructorTree() {
StringBuilder sb = new StringBuilder();
// TODO: HACK, but diagnostic
Matcher mat = pat.matcher(description);
if (mat.find()) {
sb.append(mat.group(1));
}
else {
return null;
}
if (children == null) {
return sb.toString();
}
List<String> subs = new ArrayList<>();
for (AssemblyResolution c : children) {
if (c instanceof AssemblyResolvedConstructor) {
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) c;
String s = rc.dumpConstructorTree();
if (s != null) {
subs.add(s);
}
}
}
if (subs.isEmpty()) {
return sb.toString();
}
sb.append('[');
sb.append(StringUtils.join(subs, ","));
sb.append(']');
return sb.toString();
}
/**
* Count the number of bits specified in the resolution patterns
*
* Totals the specificity of the instruction and context pattern blocks.
* @return the number of bits in the resulting patterns
* @see AssemblyPatternBlock#getSpecificity()
*/
public int getSpecificity() {
return ins.getSpecificity() + ctx.getSpecificity();
}
/**
* Get an iterable over all the possible fillings of the instruction pattern given a context
*
* This is meant to be used idiomatically, as in an enhanced for loop:
*
* <pre>
* {@code
* for (byte[] ins : rcon.possibleInsVals(ctx)) {
* System.out.println(format(ins));
* }
* }
* </pre>
*
* This is similar to calling
* {@link #getInstruction()}.{@link AssemblyPatternBlock#possibleVals()}, <em>but</em> with
* forbidden patterns removed. A context is required so that only those forbidden patterns
* matching the given context are actually removed. This method should always be preferred to
* the sequence mentioned above, since {@link AssemblyPatternBlock#possibleVals()} on its own
* may yield bytes that do not produce the desired instruction.
*
* @note The implementation is based on {@link AssemblyPatternBlock#possibleVals()}, so be
* aware that a single array is reused for each iterate. You should not retain a pointer to the
* array, but rather make a copy.
*
* @param forCtx the context at the assembly address
* @return the iterable
*/
public Iterable<byte[]> possibleInsVals(AssemblyPatternBlock forCtx) {
Predicate<byte[]> removeForbidden = (byte[] val) -> {
for (AssemblyResolvedConstructor f : forbids) {
// If the forbidden length is larger than us, we can ignore it
if (f.getDefinedInstructionLength() > val.length) {
continue;
}
// Check if the context matches, if not, we can let it pass
if (null == f.getContext().combine(forCtx)) {
continue;
}
// If the context matches, now check the instruction
AssemblyPatternBlock i = f.getInstruction();
AssemblyPatternBlock vi =
AssemblyPatternBlock.fromBytes(ins.length() - val.length, val);
if (null == i.combine(vi)) {
continue;
}
return false;
}
return true;
};
return new Iterable<byte[]>() {
@Override
public Iterator<byte[]> iterator() {
return IteratorUtils.filteredIterator(ins.possibleVals().iterator(),
removeForbidden);
}
};
}
}

View file

@ -0,0 +1,81 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.List;
import com.google.common.collect.ImmutableList;
/**
* A {@link AssemblyResolution} indicating the occurrence of a (usually semantic) error
*
* The description should indicate where the error occurred. The error message should explain the
* actual error. To help the user diagnose the nature of the error, errors in sub-constructors
* should be placed as children of an error given by the parent constructor.
*/
public class AssemblyResolvedError extends AssemblyResolution {
protected final String error;
@Override
protected int computeHash() {
return error.hashCode();
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof AssemblyResolvedError)) {
return false;
}
AssemblyResolvedError that = (AssemblyResolvedError) obj;
if (!this.error.equals(that.error)) {
return false;
}
return true;
}
/**
* @see AssemblyResolution#error(String, String, List)
*/
AssemblyResolvedError(String description, ImmutableList<? extends AssemblyResolution> children,
String error) {
super(description, children);
AssemblyTreeResolver.dbg.println(error);
this.error = error;
}
@Override
public boolean isError() {
return true;
}
@Override
public boolean isBackfill() {
return false;
}
/**
* Get a description of the error
* @return the description
*/
public String getError() {
return error;
}
@Override
public String lineToString() {
return error + " (" + description + ")";
}
}

View file

@ -0,0 +1,605 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.sem;
import java.util.*;
import org.apache.commons.collections4.IteratorUtils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import ghidra.app.plugin.assembler.sleigh.SleighAssemblerBuilder;
import ghidra.app.plugin.assembler.sleigh.expr.*;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
import ghidra.app.plugin.assembler.sleigh.symbol.*;
import ghidra.app.plugin.assembler.sleigh.tree.*;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer;
import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx;
import ghidra.app.plugin.processors.sleigh.Constructor;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.plugin.processors.sleigh.expression.PatternExpression;
import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol;
/**
* The workhorse of semantic resolution for the assembler
*
* This class takes a parse tree and some additional information (start address, context, etc.) and
* attempts to determine possible encodings using the semantics associated with each branch of the
* given parse tree. Details of this process are described in {@link SleighAssemblerBuilder}.
*
* @see SleighAssemblerBuilder
*/
public class AssemblyTreeResolver {
protected static final RecursiveDescentSolver solver = RecursiveDescentSolver.getSolver();
protected static final DbgTimer dbg = DbgTimer.INACTIVE;
protected final SleighLanguage lang;
protected final long instStart;
protected final Map<String, Long> vals = new HashMap<>();
protected final AssemblyParseBranch tree;
protected final AssemblyGrammar grammar;
protected final AssemblyPatternBlock context;
protected final AssemblyContextGraph ctxGraph;
public static final String INST_START = "inst_start";
public static final String INST_NEXT = "inst_next";
/**
* Construct a resolver for the given parse tree
*
* @param lang
* @param instStart the byte offset where the instruction will start
* @param tree the parse tree
* @param context the context expected at {@code instStart}
* @param ctxGraph the context transition graph used to resolve purely-recursive productions
*/
public AssemblyTreeResolver(SleighLanguage lang, long instStart, AssemblyParseBranch tree,
AssemblyPatternBlock context, AssemblyContextGraph ctxGraph) {
this.lang = lang;
this.instStart = instStart;
this.vals.put(INST_START, lang.getDefaultSpace().getAddressableWordOffset(instStart));
this.tree = tree;
this.grammar = tree.getGrammar();
this.context = context.fillMask();
this.ctxGraph = ctxGraph;
}
/**
* Resolve the tree for the given parameters
*
* @return a set of resolutions (encodings and errors)
*/
public AssemblyResolutionResults resolve() {
AssemblyResolutionResults results = resolveBranch(tree);
AssemblyResolutionResults ret = new AssemblyResolutionResults();
for (AssemblyResolution ar : results) {
assert !(ar instanceof AssemblyResolvedBackfill);
if (ar.isError()) {
ret.add(ar);
continue;
}
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
vals.put(INST_NEXT, lang.getDefaultSpace().getAddressableWordOffset(
instStart + rc.getInstructionLength()));
if (rc.hasBackfills()) {
dbg.println("Backfilling: " + rc);
}
ar = rc.backfill(solver, vals);
dbg.println("Backfilled final: " + ar);
if (ar.isError()) {
ret.add(ar);
continue;
}
rc = (AssemblyResolvedConstructor) ar;
if (rc.hasBackfills()) {
ret.add(AssemblyResolution.error("Solution is incomplete", "failed backfill",
ImmutableList.of(rc)));
continue;
}
AssemblyResolvedConstructor ctx =
AssemblyResolution.contextOnly(context, "Selecting context", null);
AssemblyResolvedConstructor check = rc.combine(ctx);
if (null == check) {
ret.add(AssemblyResolution.error("Incompatible context", "resolving",
ImmutableList.of(rc)));
continue;
}
rc = check;
AssemblyResolution fcheck = rc.checkNotForbidden();
if (fcheck.isError()) {
ret.add(fcheck);
continue;
}
rc = (AssemblyResolvedConstructor) fcheck;
ret.add(rc);
}
return ret;
}
/**
* Resolve a branch of the parse tree
*
* @param branch the branch
* @return the intermediate results
*/
protected AssemblyResolutionResults resolveBranch(AssemblyParseBranch branch) {
AssemblyProduction prod = branch.getProduction();
AssemblyNonTerminal lhs = prod.getLHS();
AssemblyProduction rec = grammar.getPureRecursion(lhs);
// Currently, the assembler only allows recursion at the root.
// Otherwise, the input context cannot be known.
if (rec != null && branch.getParent() == null) {
return resolveBranchRecursive(branch, rec);
}
return resolveBranchNonRecursive(branch);
}
/**
* Apply constructors as indicated by a path returned by the context resolution graph
*
* Please note: The path given will be emptied during processing.
*
* @param path the path to apply
* @param branch the branch corresponding to the production whose LHS has a purely-recursive
* definition.
* @param rec the purely-recursive production
* @param child the intermediate result to apply the constructors to
* @return the results
*/
protected AssemblyResolutionResults applyRecursionPath(Deque<AssemblyConstructorSemantic> path,
AssemblyParseBranch branch, AssemblyProduction rec, AssemblyResolvedConstructor child) {
/*
* A constructor may have multiple patterns, so I cannot assume I will get at most one
* output at each constructor in the path. Start (1) collecting all the results, then (2)
* filter out and report the errors, then (3) feed successful resolutions into the next
* constructor in the path (or finish).
*/
AssemblyResolutionResults result = new AssemblyResolutionResults();
AssemblyResolutionResults collected = new AssemblyResolutionResults();
Set<AssemblyResolvedConstructor> intoNext = new LinkedHashSet<>();
intoNext.add(child);
while (!path.isEmpty()) {
AssemblyConstructorSemantic sem = path.pollLast();
ImmutableList<AssemblyParseTreeNode> substs =
ImmutableList.of((AssemblyParseTreeNode) branch);
// 1
for (final AssemblyResolvedConstructor res : intoNext) {
ImmutableList<AssemblyResolvedConstructor> sel = ImmutableList.of(res);
collected.absorb(resolveSelectedChildren(rec, substs, sel, ImmutableList.of(sem)));
}
intoNext.clear();
// 2
for (AssemblyResolution res : collected) {
if (res.isError()) {
result.add(res);
}
else { // 3
intoNext.add((AssemblyResolvedConstructor) res);
}
}
}
result.addAll(intoNext);
return result;
}
/**
* Resolve a branch where the production's LHS has a purely-recursive definition
*
* @param branch the branch
* @param rec the purely-recursive definition
* @return the results
*/
protected AssemblyResolutionResults resolveBranchRecursive(AssemblyParseBranch branch,
AssemblyProduction rec) {
// TODO: There's probably a clever trick regarding since-constructor productions
// And short-circuiting once a compatible recursive rule is found.
try (DbgCtx dc = dbg.start("Resolving (recursive) branch: " + branch.getProduction())) {
AssemblyResolutionResults result = new AssemblyResolutionResults();
for (AssemblyResolution ar : resolveBranchNonRecursive(branch)) {
if (ar.isError()) {
result.add(ar);
continue;
}
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
AssemblyPatternBlock dst = rc.getContext();
// TODO: The desired context may need to be passed in. For now, just take start.
AssemblyPatternBlock src = context; // TODO: This is only correct for "instruction"
String table = branch.getProduction().getName();
dbg.println("Finding paths from " + context + " to " + ar.lineToString());
Collection<Deque<AssemblyConstructorSemantic>> paths =
ctxGraph.computeOptimalApplications(src, table, dst, table);
dbg.println("Found " + paths.size());
for (Deque<AssemblyConstructorSemantic> path : paths) {
dbg.println(" " + path);
result.absorb(applyRecursionPath(path, branch, rec, rc));
}
}
return result;
}
}
/**
* Resolve the given branch, having selected a particular combination of subconstructor results
*
* @param prod the production
* @param substs the braches and tokens corrresponding to the symbols of the production's RHS
* @param sel the selected subconstructor results
* @param semantics the collection of possible constructors for this production
* @return the results
*/
protected AssemblyResolutionResults resolveSelectedChildren(AssemblyProduction prod,
List<AssemblyParseTreeNode> substs, ImmutableList<AssemblyResolvedConstructor> sel,
Collection<AssemblyConstructorSemantic> semantics) {
try (DbgCtx dc = dbg.start("Selecting: " + IteratorUtils.toString(sel.iterator(),
(AssemblyResolvedConstructor rc) -> rc.lineToString()))) {
AssemblyResolutionResults results = new AssemblyResolutionResults();
// Pre-check the combined contexts
AssemblyPatternBlock combCtx = AssemblyPatternBlock.nop();
for (AssemblyResolvedConstructor child : sel) {
AssemblyPatternBlock check = combCtx.combine(child.getContext());
if (null == check) {
results.add(AssemblyResolution.error(
"Incompatible context requirements among selected children",
"Resolving " + prod, sel));
return results;
}
combCtx = check;
}
dbg.println("Combined context: " + combCtx);
AssemblyResolvedConstructor res = AssemblyResolution.nop("Resolving " + prod, sel);
// OK, now that we have a requirement, seek constructors that are compatible.
nextSem: for (AssemblyConstructorSemantic sem : semantics) {
try (DbgCtx dc2 = dbg.start("Trying: " + sem)) {
Constructor cons = sem.getConstructor();
// Gather the operand values (from non-constructor semantics)
AssemblyResolvedConstructor subres =
res.copyAppendDescription("Applying constructor: " + sem);
Map<Integer, Object> opvals = new HashMap<>();
Iterator<Integer> opidxit = sem.getOperandIndexIterator();
Iterator<AssemblyResolvedConstructor> selit = sel.iterator();
for (int i = 0; i < prod.size(); i++) {
AssemblyParseTreeNode child = substs.get(i);
AssemblySymbol sym = prod.get(i);
if (sym.takesOperandIndex()) {
int opidx = opidxit.next();
if (child.isNumeric()) {
AssemblyParseNumericToken num = (AssemblyParseNumericToken) child;
opvals.put(opidx, num.getNumericValue());
}
else if (child.isConstructor()) {
opvals.put(opidx, selit.next());
}
}
}
// Now, work out how to write the operand values in
opidxit = sem.getOperandIndexIterator();
Iterator<AssemblyResolvedConstructor> subit = sel.iterator();
for (int i = 0; i < prod.size(); i++) {
AssemblyParseTreeNode child = substs.get(i);
AssemblySymbol sym = prod.get(i);
if (!sym.takesOperandIndex()) {
continue;
}
dbg.println("Current: " + subres.lineToString());
int opidx = opidxit.next();
OperandSymbol subsym = cons.getOperand(opidx);
int shift = computeOffset(subsym, cons, opvals);
String symname = subsym.getName();
dbg.println("Processing symbol: " + symname);
if (child.isNumeric()) {
int bitsize = 0;
if (sym instanceof AssemblyNumericTerminal) {
AssemblyNumericTerminal numeric = (AssemblyNumericTerminal) sym;
bitsize = numeric.getBitSize();
}
Long opval = (Long) opvals.get(opidx); // delay unboxing until solving
PatternExpression symexp = subsym.getDefiningExpression();
if (symexp == null) {
symexp = subsym.getDefiningSymbol().getPatternExpression();
}
String desc =
"Solution to " + sym + " := " + Long.toHexString(opval) + " = " +
symexp + " (immediate op:" + opidx + ",shift:" + shift + ")";
dbg.println("Writing: " + desc);
AssemblyResolution sol =
solveOrBackfill(symexp, opval, bitsize, vals, opvals, null, desc);
dbg.println("Solution: " + sol);
if (null == sol) {
throw new AssertionError("Who returned a null solution!? " +
"Throw an exception or return an error result, please!");
}
if (sol.isError()) {
AssemblyResolvedError err = (AssemblyResolvedError) sol;
results.add(AssemblyResolution.error(err.getError(), subres));
continue nextSem;
}
if (sol instanceof AssemblyResolvedConstructor) {
AssemblyResolvedConstructor solcon =
(AssemblyResolvedConstructor) sol;
AssemblyResolvedConstructor check =
subres.combine(solcon.shift(shift));
if (null == check) {
results.add(AssemblyResolution.error(
"Conflict: Immediate operand (token " + i + ") " + sol,
subres));
continue nextSem;
}
subres = check;
}
else {
AssemblyResolvedBackfill solbf = (AssemblyResolvedBackfill) sol;
subres = subres.combine(solbf.shift(shift));
}
}
else if (child.isConstructor()) {
// Write the instruction pattern in, shifted
AssemblyResolvedConstructor childrc = subit.next();
dbg.println("Writing subtable(opidx:" + opidx + "): " + symname + ": " +
childrc.lineToString() + " (shift:" + shift + ")");
// I've already combined the contexts
AssemblyResolvedConstructor check =
subres.combine(childrc.shift(shift));
if (null == check) {
results.add(AssemblyResolution.error(
"Conflict: Subtable operand (token " + i + ")", subres));
continue nextSem;
}
subres = check;
}
else {
dbg.println("Probably encountered a varnode production: " + child);
}
}
// Now, write out the proper requirements based on context mutations
AssemblyResolution backctx = sem.solveContextChanges(subres, vals, opvals);
if (!(backctx instanceof AssemblyResolvedConstructor)) {
results.add(backctx);
continue;
}
subres = (AssemblyResolvedConstructor) backctx;
subres = subres.solveContextChangesForForbids(sem, vals, opvals);
// Now, write the actual instruction and context requirements from the constructor
// patterns
dbg.println("Writing patterns:");
for (AssemblyResolvedConstructor pat : sem.getPatterns()) { // use the accessor
AssemblyResolvedConstructor temp = subres;
dbg.println(" Pattern: " + pat.lineToString());
dbg.println(" Current: " + temp.lineToString());
AssemblyResolvedConstructor check = temp.combine(pat);
if (null == check) {
results.add(
AssemblyResolution.error("The patterns conflict " + subres, temp));
continue;
}
temp = check;
dbg.println(" Final: " + temp.lineToString());
AssemblyResolution fcheck = temp.checkNotForbidden();
if (fcheck.isError()) {
results.add(fcheck);
continue;
}
temp = (AssemblyResolvedConstructor) fcheck;
results.add(temp);
}
}
catch (Exception e) {
dbg.println("While processing: " + sem);
throw e;
}
}
results = tryResolveBackfills(results);
return results;
}
}
protected AssemblyResolutionResults tryResolveBackfills(AssemblyResolutionResults results) {
AssemblyResolutionResults res = new AssemblyResolutionResults();
next_ar: for (AssemblyResolution ar : results) {
if (ar.isError()) {
res.add(ar);
continue;
}
while (true) {
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar;
if (!rc.hasBackfills()) {
// finish: The complete solution is known
res.add(rc);
continue next_ar;
}
ar = rc.backfill(solver, vals);
if (ar.isError() || ar.isBackfill()) {
// fail: It is now known that the solution doesn't exist
res.add(ar);
continue next_ar;
}
if (ar.equals(rc)) {
// fail: The solution is /still/ not known, and we made no progress
res.add(ar);
continue next_ar;
}
// Some progress was made, continue trying until we finish or fail
}
}
return res;
}
/**
* Resolve a branch without considering any purely-recursive productions
*
* This method is used either when the LHS has no purely-recursive definition, or before
* considering the purely-recursive definition when it is present.
*
* @param branch the branch
* @return the results
*/
protected AssemblyResolutionResults resolveBranchNonRecursive(AssemblyParseBranch branch) {
try (DbgCtx dc = dbg.start("Resolving (non-recursive) branch: " + branch.getProduction())) {
// Resolve children first
AssemblyResolutionResults results = new AssemblyResolutionResults();
AssemblyProduction prod = branch.getProduction();
List<AssemblyParseTreeNode> substs = branch.getSubstitutions();
assert prod.size() == substs.size();
// Sort the wheat and chaff
// The resolved ones need to stay in order for the cross product
List<HashSet<AssemblyResolvedConstructor>> childRes = new ArrayList<>();
List<AssemblyResolvedError> childErr = new ArrayList<>();
for (int i = 0; i < prod.size(); i++) {
AssemblySymbol sym = prod.get(i);
if (!sym.takesOperandIndex()) {
continue;
}
AssemblyParseTreeNode child = substs.get(i);
if (child.isConstructor()) {
AssemblyResolutionResults rr = resolveBranch((AssemblyParseBranch) child);
HashSet<AssemblyResolvedConstructor> childResElem = new HashSet<>();
for (AssemblyResolution ar : rr) {
if (ar.isError()) {
childErr.add((AssemblyResolvedError) ar);
}
else {
childResElem.add((AssemblyResolvedConstructor) ar);
}
}
childRes.add(childResElem);
}
}
// Now, search for constructors that are compatible, and resolve them wrt. the
// selected resolved children:
// This is also where the shifting will happen.
Collection<AssemblyConstructorSemantic> semantics = grammar.getSemantics(prod);
for (List<AssemblyResolvedConstructor> sel : Sets.cartesianProduct(childRes)) {
results.absorb(
resolveSelectedChildren(prod, substs, ImmutableList.copyOf(sel), semantics));
}
if (!childErr.isEmpty()) {
results.add(AssemblyResolution.error("Child errors", "Resolving " + prod,
ImmutableList.copyOf(childErr)));
}
return results;
}
}
/**
* Compute the offset of an operand encoded in the instruction block
*
* @param opsym the operand symbol
* @param cons the constructor containing the operand
* @param res the selected subconstructor encodings
* @return the offset (right shift) to apply to the encoded operand
*/
public static int computeOffset(OperandSymbol opsym, Constructor cons,
Map<Integer, Object> res) {
int offset = opsym.getRelativeOffset();
int baseidx = opsym.getOffsetBase();
if (baseidx != -1) {
OperandSymbol baseop = cons.getOperand(baseidx);
Object r = res.get(baseidx);
if (r instanceof AssemblyResolvedConstructor) {
AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) r;
offset += rc.getInstructionLength();
}
else {
offset += baseop.getMinimumLength();
}
offset += computeOffset(baseop, cons, res);
}
return offset;
}
/**
* Attempt to solve an expression
*
* @param exp the expression to solve
* @param goal the desired value of the expression
* @param vals any defined symbols
* @param res the selected subconstructor encodings
* @param cur the resolved constructor so far
* @param description a description of the result
* @return the encoded solution, or a backfill record
*/
protected static AssemblyResolution solveOrBackfill(PatternExpression exp, MaskedLong goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
String description) {
try {
return solver.solve(exp, goal, vals, res, cur, description);
}
catch (NeedsBackfillException bf) {
int fieldLength = solver.getInstructionLength(exp, res);
return AssemblyResolution.backfill(exp, goal, res, fieldLength, description);
}
}
/**
* Attempt to solve an expression
*
* Converts the given goal to a fully-defined {@link MaskedLong} and then solves as before.
*
* @see #solveOrBackfill(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor,
* String)
*/
protected static AssemblyResolution solveOrBackfill(PatternExpression exp, long goal,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
String description) {
return solveOrBackfill(exp, MaskedLong.fromLong(goal), vals, res, cur, description);
}
/**
* Attempt to solve an expression
*
* Converts the given goal and bits count to a {@link MaskedLong} and then solves as before. As
* a special case, if {@code bits == 0}, the goal is considered fully-defined (as if
* {@code bits == 64}).
*
* @see #solveOrBackfill(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor,
* String)
*
*/
protected static AssemblyResolution solveOrBackfill(PatternExpression exp, long goal, int bits,
Map<String, Long> vals, Map<Integer, Object> res, AssemblyResolvedConstructor cur,
String description) {
long msk;
if (bits == 0 || bits >= 64) {
msk = -1L;
}
else {
msk = ~(-1L << bits);
}
return solveOrBackfill(exp, MaskedLong.fromMaskAndValue(msk, goal), vals, res, cur,
description);
}
}

View file

@ -0,0 +1,52 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
/**
* A terminal that accepts the end of input
*/
public class AssemblyEOI extends AssemblyTerminal {
/** The end-of-input terminal */
public static final AssemblyEOI EOI = new AssemblyEOI();
private AssemblyEOI() {
super("$");
}
@Override
public String toString() {
return "$";
}
@Override
public Collection<AssemblyParseToken> match(String buffer, int pos, AssemblyGrammar grammar,
Map<String, Long> labels) {
if (pos == buffer.length()) {
return Collections.singleton(new AssemblyParseToken(grammar, this, ""));
}
return Collections.emptySet();
}
@Override
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
return Collections.singleton("");
}
}

View file

@ -0,0 +1,57 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyExtendedGrammar;
/**
* The type of non-terminal for an "extended grammar"
* @see AssemblyExtendedGrammar
*/
public class AssemblyExtendedNonTerminal extends AssemblyNonTerminal {
//private int start;
private final AssemblyNonTerminal nt;
private final int end;
/**
* Construct a new extended non terminal, derived from the given non-terminal
* @param start the start state for the extended non-terminal
* @param nt the non-terminal from which the extended non-terminal is derived
* @param end the end state for the extended non-terminal
*/
public AssemblyExtendedNonTerminal(int start, AssemblyNonTerminal nt, int end) {
super(start + "[" + nt.name + "]" + end);
//this.start = start;
this.nt = nt;
this.end = end;
}
@Override
public String getName() {
if (end == -1) {
return nt.getName();
}
return name;
}
@Override
public String toString() {
if (end == -1) {
return nt.toString();
}
return name;
}
}

View file

@ -0,0 +1,66 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
/**
* A terminal that accepts only a particular numeric value
*
* This is different from a fixed string, because it will accept any encoding of the given numeric
* value.
*/
public class AssemblyFixedNumericTerminal extends AssemblyNumericTerminal {
private final long val;
/**
* Construct a terminal that accepts only the given numeric value
* @param val the value to accept
*/
public AssemblyFixedNumericTerminal(long val) {
super("" + val, 0);
this.val = val;
}
@Override
public String toString() {
return "" + val;
}
@Override
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
return Collections.singleton("" + val);
}
@Override
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
AssemblyGrammar grammar, Map<String, Long> labels) {
// TODO: Allow label substitution here? For now, no.
Collection<AssemblyParseNumericToken> toks =
new HashSet<>(super.match(buffer, pos, grammar, new HashMap<String, Long>()));
Iterator<AssemblyParseNumericToken> tokit = toks.iterator();
while (tokit.hasNext()) {
AssemblyParseNumericToken tok = tokit.next();
if (tok.getNumericValue() != val) {
tokit.remove();
}
}
return toks;
}
}

View file

@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
/**
* The type of non-terminal for an assembly grammar
* @see AssemblyGrammar
*/
public class AssemblyNonTerminal extends AssemblySymbol {
/**
* Construct a non-terminal having the given name
* @param name the name
*/
public AssemblyNonTerminal(String name) {
super(name);
}
@Override
public String toString() {
return "[" + name + "]";
}
}

View file

@ -0,0 +1,69 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
import ghidra.app.plugin.processors.sleigh.symbol.ValueMapSymbol;
/**
* A terminal that accepts only a particular set of numeric values, mapping each to another value
*
* This often used for non-conventional numeric encodings.
* @see ValueMapSymbol
*/
public class AssemblyNumericMapTerminal extends AssemblyNumericTerminal {
protected final Map<Long, Integer> map;
/**
* Construct a terminal with the given name, accepting only the keys of a given map
* @param name the name
* @param map the map from display value to token value
*/
public AssemblyNumericMapTerminal(String name, Map<Long, Integer> map) {
super(name, 0);
this.map = map;
}
@Override
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
AssemblyGrammar grammar, Map<String, Long> labels) {
// NOTE: No label substitution
Collection<AssemblyParseNumericToken> toks =
new HashSet<>(super.match(buffer, pos, grammar, new HashMap<String, Long>()));
Collection<AssemblyParseNumericToken> results = new LinkedHashSet<>();
for (AssemblyParseNumericToken tok : toks) {
Integer mapped = map.get(tok.getNumericValue());
if (mapped == null) {
continue;
}
results.add(new AssemblyParseNumericToken(grammar, this, tok.getString(), mapped));
}
return results;
}
@Override
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
Set<String> result = new HashSet<>();
for (long k : map.keySet()) {
result.add(Long.toString(k));
result.add(Long.toHexString(k));
}
return result;
}
}

View file

@ -0,0 +1,285 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParser;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
/**
* A terminal that accepts any numeric value or program label
*
* The literal may take any form accepted by UNIX strtol() with base=0. By default, the literal is
* interpreted in base 10, but it may be prefixed such that it's interpreted in an alternative
* base. With the prefix '0x', it is interpreted in hexadecimal. With the prefix '0', it is
* interpreted in octal.
*/
public class AssemblyNumericTerminal extends AssemblyTerminal {
public static final String PREFIX_HEX = "0x";
public static final String PREFIX_OCT = "0";
/** Some suggestions, other than labels, to provide */
protected static final Collection<String> suggestions = Arrays.asList(new String[] { //
"0", "1", "0x0", "+0x0", "-0x0", "01" //
});
/** The maximum number of labels to suggest */
protected static final int MAX_LABEL_SUGGESTIONS = 10;
protected final int bitsize;
// TODO: Not all numeric literals can be substituted for a label
/**
* Construct a terminal with the given name, accepting any numeric value or program label
* @param name the name
*/
public AssemblyNumericTerminal(String name, int bitsize) {
super(name);
this.bitsize = bitsize;
}
@Override
public String toString() {
if (bitsize == 0) {
return "[num:" + name + "]";
}
return "[num" + bitsize + ":" + name + "]";
}
/**
* This is only a convenience for testing
*
* Please use {@link #match(String, AssemblyGrammar, Map<String, Long>)}.
* @param buffer the input buffer
* @return the parsed token
*/
public AssemblyParseNumericToken match(String buffer) {
Collection<AssemblyParseNumericToken> col =
match(buffer, 0, null, AssemblyParser.EMPTY_LABELS);
if (col.isEmpty()) {
return null;
}
else if (col.size() == 1) {
return col.iterator().next();
}
else {
throw new AssertionError("Multiple results for a numeric terminal?: " + col);
}
}
@Override
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
AssemblyGrammar grammar, Map<String, Long> labels) {
if (pos >= buffer.length()) {
return Collections.emptySet();
}
if (buffer.charAt(pos) == '+') {
return matchLiteral(pos + 1, buffer, pos, false, grammar);
}
else if (buffer.charAt(pos) == '-') {
return matchLiteral(pos + 1, buffer, pos, true, grammar);
}
else {
return match(pos, buffer, grammar, labels);
}
}
/**
* Try to match a sign-less numeric literal, or a program label
* @param s the buffer cursor where the literal or label is expected
* @param buffer the input buffer
* @param grammar the grammar containing this terminal
* @param labels the program labels, mapped to their values
* @return the parsed token, or null
*/
protected Collection<AssemblyParseNumericToken> match(int s, String buffer,
AssemblyGrammar grammar, Map<String, Long> labels) {
if (s >= buffer.length()) {
return Collections.emptySet();
}
// Try a literal number first
if (Character.isDigit(buffer.charAt(s))) {
return matchLiteral(s, buffer, s, false, grammar);
}
// Now, try a label
int b = s;
while (b < buffer.length()) {
char c = buffer.charAt(b);
if (Character.isJavaIdentifierPart(c)) {
b++;
continue;
}
break;
}
String lab = buffer.substring(s, b);
Long val = labels.get(lab);
if (val == null) {
return Collections.emptySet();
}
return Collections.singleton(new AssemblyParseNumericToken(grammar, this, lab, val));
}
/**
* Try to match a numeric literal, after the optional sign, encoded in hex, decimal, or octal
* @param s buffer cursor where the literal is expected
* @param buffer the input buffer
* @param pos the start offset of the token parsed so far
* @param neg true if the the optional {@code -} is present
* @param grammar the grammar containing this terminal
* @return the parsed token, or null
*/
protected Collection<AssemblyParseNumericToken> matchLiteral(int s, String buffer, int pos,
boolean neg, AssemblyGrammar grammar) {
if (buffer.regionMatches(s, PREFIX_HEX, 0, PREFIX_HEX.length())) {
return matchHex(s + PREFIX_HEX.length(), buffer, pos, neg, grammar);
}
else if (buffer.regionMatches(s, PREFIX_OCT, 0, PREFIX_OCT.length())) {
return matchOct(s + PREFIX_OCT.length(), buffer, pos, neg, grammar);
}
else {
return matchDec(s, buffer, pos, neg, grammar);
}
}
/**
* Construct a numeric token
* @param str the string value of the token taken verbatim from the buffer
* @param num portion of the token following the optional sign and prefix
* @param radix the radix of {@code num}
* @param neg true if the the optional {@code -} is present
* @param grammar the grammar containing this terminal
* @return the parsed token, or null
*/
protected Collection<AssemblyParseNumericToken> makeToken(String str, String num, int radix,
boolean neg, AssemblyGrammar grammar) {
if (num.length() == 0) {
return Collections.emptySet();
}
try {
long val = Long.parseLong(num, radix);
if (neg) {
val = -val;
}
// TODO: I'd really like to know whether or not the printpiece can take a signed value.
if (bitsize != 0 && bitsize != 64) {
if (val < (-1L) << (bitsize - 1)) {
return Collections.emptySet();
}
if (val >= 1L << bitsize) {
return Collections.emptySet();
}
}
return Collections.singleton(new AssemblyParseNumericToken(grammar, this, str, val));
}
catch (NumberFormatException e) {
return Collections.emptySet();
}
}
/**
* Try to match a hexadecimal literal, following the optional sign and prefix
* @param s the buffer cursor where the hex portion starts
* @param buffer the input buffer
* @param pos the start offset of the token parsed so far
* @param neg true if the the optional {@code -} is present
* @param grammar the grammar containing this terminal
* @return the parsed token, or null
*/
protected Collection<AssemblyParseNumericToken> matchHex(int s, String buffer, int pos,
boolean neg, AssemblyGrammar grammar) {
int b = s;
while (b < buffer.length()) {
char c = buffer.charAt(b);
if (('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f')) {
b++;
continue;
}
break;
}
return makeToken(buffer.substring(pos, b), buffer.substring(s, b), 16, neg, grammar);
}
/**
* Try to match a decimal literal, following the optional sign and optional prefix
* @param s the buffer cursor where the hex portion starts
* @param buffer the input buffer
* @param pos the start offset of the token parsed so far
* @param neg true if the the optional {@code -} is present
* @param grammar the grammar containing this terminal
* @return the parsed token, or null
*/
protected Collection<AssemblyParseNumericToken> matchDec(int s, String buffer, int pos,
boolean neg, AssemblyGrammar grammar) {
int b = s;
while (b < buffer.length()) {
char c = buffer.charAt(b);
if ('0' <= c && c <= '9') {
b++;
continue;
}
break;
}
return makeToken(buffer.substring(pos, b), buffer.substring(s, b), 10, neg, grammar);
}
/**
* Try to match an octal literal, following the optional sign and prefix
* @param s the buffer cursor where the hex portion starts
* @param buffer the input buffer
* @param pos the start offset of the token parsed so far
* @param neg true if the the optional {@code -} is present
* @param grammar the grammar containing this terminal
* @return the parsed token, or null
*/
protected Collection<AssemblyParseNumericToken> matchOct(int s, String buffer, int pos,
boolean neg, AssemblyGrammar grammar) {
int b = s;
while (b < buffer.length()) {
char c = buffer.charAt(b);
if ('0' <= c && c <= '7') {
b++;
continue;
}
break;
}
if (b == s) {
// Then the entire token is just 0
return makeToken(buffer.substring(pos, b), "0", 8, neg, grammar);
}
return makeToken(buffer.substring(pos, b), buffer.substring(s, b), 8, neg, grammar);
}
@Override
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
Set<String> s = new TreeSet<>(suggestions);
int labelcount = 0;
for (String label : labels.keySet()) {
if (labelcount >= MAX_LABEL_SUGGESTIONS) {
break;
}
if (label.startsWith(got)) {
s.add(label);
labelcount++;
}
}
return s;
}
public int getBitSize() {
return bitsize;
}
}

View file

@ -0,0 +1,68 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import java.util.*;
import java.util.Map.Entry;
import org.apache.commons.collections4.MultiValuedMap;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken;
import ghidra.app.plugin.processors.sleigh.symbol.VarnodeListSymbol;
/**
* A terminal that accepts only a particular set of strings, mapping each to a numeric value
*
* @see NameSymbol
* @see VarnodeListSymbol
*/
public class AssemblyStringMapTerminal extends AssemblyTerminal {
protected final MultiValuedMap<String, Integer> map;
/**
* Construct a terminal with the given name, accepting only the keys of a given map
* @param name the name
* @param map the map from display text to token value
*/
public AssemblyStringMapTerminal(String name, MultiValuedMap<String, Integer> map) {
super(name);
this.map = map;
}
@Override
public Collection<AssemblyParseNumericToken> match(String buffer, int pos,
AssemblyGrammar grammar, Map<String, Long> labels) {
Collection<AssemblyParseNumericToken> result = new LinkedHashSet<>();
for (Entry<String, Integer> ent : map.entries()) {
String str = ent.getKey();
if (buffer.regionMatches(pos, str, 0, str.length())) {
result.add(new AssemblyParseNumericToken(grammar, this, str, ent.getValue()));
}
}
return result;
}
@Override
public Collection<String> getSuggestions(String string, Map<String, Long> labels) {
return map.keySet();
}
@Override
public String toString() {
return "[list:" + name + "]";
}
}

View file

@ -0,0 +1,61 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import java.util.*;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
/**
* A terminal that accepts only a particular string
*/
public class AssemblyStringTerminal extends AssemblyTerminal {
protected final String str;
/**
* Construct a terminal that accepts only the given string
* @param str the string to accept
*/
public AssemblyStringTerminal(String str) {
super("\"" + str + "\"");
this.str = str;
}
@Override
public String toString() {
return "\"" + str + "\"";
}
@Override
public Collection<AssemblyParseToken> match(String buffer, int pos, AssemblyGrammar grammar,
Map<String, Long> labels) {
if (buffer.regionMatches(pos, str, 0, str.length())) {
return Collections.singleton(new AssemblyParseToken(grammar, this, str));
}
return Collections.emptySet();
}
@Override
public Collection<String> getSuggestions(String got, Map<String, Long> labels) {
return Collections.singleton(str);
}
@Override
public boolean takesOperandIndex() {
return false;
}
}

View file

@ -0,0 +1,80 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import ghidra.app.plugin.assembler.sleigh.grammars.AbstractAssemblyGrammar;
/**
* A symbol in a context-free grammar
*
* Symbols can be either terminals or non-terminals. Non-terminals must have a defining production,
* i.e., it must appear as the left-hand side of some production in the grammar.
*
* Traditionally, when displayed, non-terminals should be immediately distinguishable from
* terminals. In classic CS literature, this usually means non-terminals are in CAPS, and terminals
* are in lower-case. Because the assembler doesn't control the names provided by SLEIGH, we
* surround non-terminals in [brackets].
*
* @see AbstractAssemblyGrammar
*/
public abstract class AssemblySymbol implements Comparable<AssemblySymbol> {
protected final String name;
/**
* Construct a new symbol with the given name
* @param name the name
*/
public AssemblySymbol(String name) {
this.name = name;
}
@Override
public abstract String toString();
/**
* Get the name of this symbol
* @return the name
*/
public String getName() {
return name;
}
@Override
public int compareTo(AssemblySymbol that) {
return this.toString().compareTo(that.toString()); // LAZY
}
@Override
public boolean equals(Object that) {
if (!(that instanceof AssemblySymbol)) {
return false;
}
return this.toString().equals(that.toString()); // LAZY
}
@Override
public int hashCode() {
return toString().hashCode(); // LAZY
}
/**
* Check if this symbol consumes an operand index of its constructor
* @return true if the symbol represents an operand
*/
public boolean takesOperandIndex() {
return true;
}
}

View file

@ -0,0 +1,58 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.symbol;
import java.util.Collection;
import java.util.Map;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken;
/**
* The type of terminal for an assembly grammar
*
* Unlike classical parsing, each terminal provides its own tokenizer. If multiple tokenizers yield
* a token, the parser branches, possibly creating multiple, ambiguous trees.
* @see AssemblyGrammar
*/
public abstract class AssemblyTerminal extends AssemblySymbol {
/**
* Construct a terminal having the give name
* @param name
*/
public AssemblyTerminal(String name) {
super(name);
}
/**
* Attempt to match a token from the input buffer starting at a given position
* @param buffer the input buffer
* @param pos the cursor position in the buffer
* @param grammar the grammar containing this terminal
* @param labels the program labels, if applicable
* @return the matched token, or null
*/
public abstract Collection<? extends AssemblyParseToken> match(String buffer, int pos,
AssemblyGrammar grammar, Map<String, Long> labels);
/**
* Provide a collection of strings that this terminal would have accepted
* @param got the remaining contents of the input buffer
* @param labels the program labels, if applicable
* @return a, possibly empty, collection of suggestions
*/
public abstract Collection<String> getSuggestions(String got, Map<String, Long> labels);
}

View file

@ -0,0 +1,173 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.tree;
import java.io.PrintStream;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
/**
* A branch in a parse tree, corresponding to the application of a production
*/
public class AssemblyParseBranch extends AssemblyParseTreeNode
implements Iterable<AssemblyParseTreeNode> {
// The substitutions, each corresponding to a symbol from the production's RHS
private final List<AssemblyParseTreeNode> substs = new ArrayList<>();
// The production applied to create this branch
private final AssemblyProduction prod;
/**
* Construct a branch from the given grammar and production
* @param grammar the grammar containing the production
* @param prod the production applied to create this branch
*/
public AssemblyParseBranch(AssemblyGrammar grammar, AssemblyProduction prod) {
super(grammar);
this.prod = prod;
}
@Override
public int hashCode() {
int result = prod.hashCode();
for (AssemblyParseTreeNode n : substs) {
result *= 31;
result += n.hashCode();
}
return result;
}
@Override
public boolean equals(Object obj) {
if (this.getClass() != obj.getClass()) {
return false;
}
AssemblyParseBranch that = (AssemblyParseBranch) obj;
if (!this.substs.equals(that.substs)) {
return false;
}
return true;
}
/**
* Prepend a child to this branch
* @param child the child
*
* Because LR parsers produce rightmost derivations, they necessarily populate the branches
* right to left. During reduction, each child is popped from the stack, traversing them in
* reverse order. This method prepends children so that when reduction is complete, the
* children are aligned to the corresponding symbols from the RHS of the production.
*/
public void addChild(AssemblyParseTreeNode child) {
assert expects().equals(child.getSym());
this.substs.add(0, child);
child.setParent(this);
}
/**
* See what symbol is expected next
*
* The child added next must be associated with the token expected next.
* @return the symbol
*/
protected AssemblySymbol expects() {
if (!isComplete()) {
return prod.get(prod.size() - substs.size() - 1);
}
return null;
}
/**
* Check if the branch is full
* @return true if every symbol on the RHS has a corresonding child
*/
protected boolean isComplete() {
return prod.size() == substs.size();
}
@Override
public AssemblyNonTerminal getSym() {
return prod.getLHS();
}
@Override
protected void print(PrintStream out, String indent) {
out.print(indent + getSym() + " := " + prod);
Collection<AssemblyConstructorSemantic> sems = grammar.getSemantics(prod);
if (!sems.isEmpty()) {
out.print(" (" + StringUtils.join(sems, ", ") + ")");
}
out.println();
for (AssemblyParseTreeNode s : substs) {
s.print(out, " " + indent);
}
}
@Override
public String toString() {
return this.prod.getLHS().toString();
}
/**
* Get the production applied to create this branch
* @return
*/
public AssemblyProduction getProduction() {
return prod;
}
/**
* Get the list of children, indexed by corresponding symbol from the RHS
* @return
*/
public List<AssemblyParseTreeNode> getSubstitutions() {
return Collections.unmodifiableList(substs);
}
@Override
public Iterator<AssemblyParseTreeNode> iterator() {
return getSubstitutions().iterator();
}
/**
* Get the <em>i</em>th child, corresponding to the <em>i</em>th symbol from the RHS
* @param i the position
* @return the child
*/
public AssemblyParseTreeNode getSubstitution(int i) {
return substs.get(i);
}
@Override
public boolean isConstructor() {
return true;
}
@Override
public String generateString() {
StringBuilder sb = new StringBuilder();
for (AssemblyParseTreeNode node : substs) {
sb.append(node.generateString());
}
return sb.toString();
}
}

View file

@ -0,0 +1,90 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.tree;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.symbol.*;
/**
* A token having a numeric value
*
* @see AssemblyFixedNumericTerminal
* @see AssemblyNumericMapTerminal
* @see AssemblyNumericTerminal
* @see AssemblyStringMapTerminal
*/
public class AssemblyParseNumericToken extends AssemblyParseToken {
protected final long val;
/**
* Construct a numeric terminal having the given string and numeric values
* @param grammar the grammar containing the terminal
* @param term the terminal that matched this token
* @param str the portion of the input comprising this token
* @param val the numeric value represented by this token
*/
public AssemblyParseNumericToken(AssemblyGrammar grammar, AssemblyTerminal term, String str,
long val) {
super(grammar, term, str);
this.val = val;
}
@Override
public int hashCode() {
int result = term.hashCode();
result *= 31;
result += str.hashCode();
result *= 31;
result += Long.hashCode(val);
return result;
}
@Override
public boolean equals(Object obj) {
if (this.getClass() != obj.getClass()) {
return false;
}
AssemblyParseNumericToken that = (AssemblyParseNumericToken) obj;
if (!this.term.equals(that.term)) {
return false;
}
if (!this.str.equals(that.str)) {
return false;
}
if (this.val != that.val) {
return false;
}
return true;
}
@Override
public String toString() {
return "'" + str + "'=>" + val;
}
/**
* Get the numeric value of the token
* @return the value
*/
public long getNumericValue() {
return val;
}
@Override
public boolean isNumeric() {
return true;
}
}

View file

@ -0,0 +1,95 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.tree;
import java.io.PrintStream;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyStringTerminal;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyTerminal;
/**
* A string token
*
* @see AssemblyStringTerminal
*/
public class AssemblyParseToken extends AssemblyParseTreeNode {
protected final AssemblyTerminal term;
protected final String str;
/**
* Construct a new token having the given string value
* @param grammar the grammar containing the terminal
* @param term the terminal that matched this token
* @param str the portion of the input comprising this token
*/
public AssemblyParseToken(AssemblyGrammar grammar, AssemblyTerminal term, String str) {
super(grammar);
this.term = term;
this.str = str;
}
@Override
public int hashCode() {
int result = term.hashCode();
result *= 31;
result += str.hashCode();
return result;
}
@Override
public boolean equals(Object obj) {
if (this.getClass() != obj.getClass()) {
return false;
}
AssemblyParseToken that = (AssemblyParseToken) obj;
if (!this.term.equals(that.term)) {
return false;
}
if (!this.str.equals(that.str)) {
return false;
}
return true;
}
/**
* Get the portion of the input comprising the token
* @return the string value
*/
public String getString() {
return str;
}
@Override
public AssemblyTerminal getSym() {
return term;
}
@Override
protected void print(PrintStream out, String indent) {
out.println(indent + term + " := " + toString());
}
@Override
public String toString() {
return "'" + str + "'";
}
@Override
public String generateString() {
return str;
}
}

View file

@ -0,0 +1,108 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.tree;
import java.io.PrintStream;
import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
/**
* A node in a parse tree
*/
public abstract class AssemblyParseTreeNode {
protected AssemblyParseBranch parent = null;
protected final AssemblyGrammar grammar;
/**
* Construct a node for a tree parsed by the given grammar
* @param grammar the grammar
*/
public AssemblyParseTreeNode(AssemblyGrammar grammar) {
this.grammar = grammar;
}
/**
* Get the symbol for which this node is substituted
*
* For a branch, this is the LHS of the corresponding production. For a token, this is the
* terminal whose tokenizer matched it.
* @return the symbol
*/
public abstract AssemblySymbol getSym();
/**
* Get the branch which contains this node
* @return
*/
public AssemblyParseBranch getParent() {
return parent;
}
/**
* Set the branch which contains this node
* @param parent
*/
protected void setParent(AssemblyParseBranch parent) {
// NOTE: Cannot assert, since the LR parser may backtrack and reassign.
this.parent = parent;
}
/**
* For debugging: Display this parse tree via the given stream
* @param out the stream
*/
public void print(PrintStream out) {
print(out, "");
}
/**
* For debugging: Display the tree with the given indent
* @param out the stream
* @param indent the indent
*/
protected abstract void print(PrintStream out, String indent);
/**
* Check if this node yields a subconstructor resolution
* @return true if this node yields a subconstructor resolution
*/
public boolean isConstructor() {
return false;
}
/**
* Check if this node yields a numeric value
* @return true if this node yields a numeric value
*/
public boolean isNumeric() {
return false;
}
/**
* Get the grammar used to parse the tree
* @return the grammar
*/
public AssemblyGrammar getGrammar() {
return grammar;
}
/**
* Generate the string that this node parsed
* @return the string
*/
public abstract String generateString();
}

View file

@ -0,0 +1,252 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.util;
import java.io.*;
import java.util.Stack;
/**
* A debugging, timing, and diagnostic tool
*
* TODO: I should probably remove this and rely on the Msg.trace() method, or at the very least,
* refactor this to use that.
*/
public class DbgTimer extends PrintStream {
// a stack of start times
Stack<Long> timeStack = new Stack<>();
/**
* Create a new debugging timer, wrapping the given output stream
* @param out the stream
*/
public DbgTimer(OutputStream out) {
super(new TabbingOutputStream(out));
TabbingOutputStream tos = (TabbingOutputStream) this.out;
tos.setTimeStack(timeStack);
}
/**
* Create a new debugging timer, wrapping standard out
*/
public DbgTimer() {
this(System.out);
}
/**
* A (rather slow) output stream that indents every line of its output
*/
public static class TabbingOutputStream extends OutputStream {
protected static final int STATE_NOLINE = 0;
protected static final int STATE_LINE = 1;
protected OutputStream out;
protected int state = STATE_NOLINE;
protected Stack<Long> timeStack;
/**
* Create a new stream wrapping another
* @param out the stream to wrap
*/
private TabbingOutputStream(OutputStream out) {
this.out = out;
}
/**
* Start a new (indented) line of output
* @throws IOException
*/
protected void startln() throws IOException {
for (@SuppressWarnings("unused")
Long l : timeStack) {
out.write(' ');
out.write(' ');
}
}
/**
* Workaround: Set the time stack reference
* @param timeStack the stack
*/
protected void setTimeStack(Stack<Long> timeStack) {
this.timeStack = timeStack;
}
/**
* {@inheritDoc}
*
* Parses each line and prepends the indentation as they are printed
*/
@Override
public void write(int b) throws IOException {
if (b == '\n' || b == '\r') {
out.write(b);
state = STATE_NOLINE;
}
else if (state == STATE_NOLINE) {
startln();
out.write(b);
state = STATE_LINE;
}
else {
out.write(b);
}
}
@Override
public void close() throws IOException {
if (out == System.out || out == System.err) {
out.flush(); // might as well
return;
}
try (OutputStream s = out) {
s.flush();
}
}
@Override
public void flush() throws IOException {
out.flush();
}
}
/** An instance that prints to standard out */
public static final DbgTimer ACTIVE = new DbgTimer();
/** An instance that prints to /dev/null */
public static final DbgTimer INACTIVE = new DbgTimer(new OutputStream() {
@Override
public void write(int b) throws IOException {
// This prevents inefficient squelching of debug messages. It is much better to squelch
// at the original print call (many overridden below). If one was missed, please
// override it too. Also see the TODO in the class documentation above.
throw new AssertionError("INTERNAL: Should not be here.");
}
}) {
@Override
public void print(String msg) {
// Nothing
}
@Override
public void println(String msg) {
// Nothing
}
@Override
public void println() {
// Nothing
}
@Override
public void print(Object msg) {
// Nothing
}
@Override
public void println(Object msg) {
// Nothing
}
@Override
public DbgCtx start(Object message) {
return null;
}
@Override
public void stop() {
// Nothing
}
};
/**
* Start a new, possibly long-running, task
* @param message the message to print when the task begins
* @return a context to close when the task ends
*
* This is meant to be used idiomatically, as in a try-with-resources block:
* <pre>
* {@code
* try (DbgCtx dc = dbg.start("Twiddling the frobs:")) {
* // do some classy twiddling
* } // this will automatically print done and the time elapsed within the try block
* }
* </pre>
*
* This idiom is preferred because the task will be stopped even if an error occurs, if the
* method returns from within the block, etc.
*/
public DbgCtx start(Object message) {
println(message);
flush();
timeStack.push(System.currentTimeMillis());
return new DbgCtx(this);
}
/**
* Stop the current task
*
* This will print done and the elapsed time since the start of the task. The "current task" is
* determined from the stack.
*/
public void stop() {
long time = System.currentTimeMillis() - timeStack.pop();
flush();
println("Done after " + time + "ms");
}
/**
* Replace the wrapped output stream (usually temporarily)
* @see #resetOutputStream(TabbingOutputStream)
* @param s the replacement stream
* @return the original stream, wrapped in a tabbing stream
*/
public TabbingOutputStream setOutputStream(OutputStream s) {
flush();
TabbingOutputStream old = (TabbingOutputStream) this.out;
TabbingOutputStream tos = new TabbingOutputStream(s);
tos.setTimeStack(timeStack);
this.out = tos;
return old;
}
/**
* Put the original tabbing stream back
* @see #setOutputStream(OutputStream)
* @param s the original wrapped stream
* @return the replacement stream, wrapped in a tabbing stream
*/
public TabbingOutputStream resetOutputStream(TabbingOutputStream s) {
flush();
TabbingOutputStream old = (TabbingOutputStream) this.out;
this.out = s;
return old;
}
/**
* A context for idiomatic use of the {@link DbgTimer} in a try-with-resources block
*/
public static class DbgCtx implements AutoCloseable {
private DbgTimer dbg;
private DbgCtx(DbgTimer dbg) {
this.dbg = dbg;
}
@Override
public void close() {
dbg.stop();
}
}
}

View file

@ -0,0 +1,85 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.util;
import ghidra.program.model.listing.Program;
/**
* A convenience context for transaction IDs on a Ghidra program database
*
* This is meant to be used idiomatically, as in a try-with-resources block:
*
* <pre>
* {@code
* try (GhidraDBTransaction t = new GhidraDBTransaction(program, "Demo")) {
* program.getMemory().....
* t.commit();
* }
* }
* </pre>
*
* This idiom is very useful if there is complex logic in your transaction, it's very easy to
* forget to close the transaction, especially if an error occurs, leaving the database in an open
* transaction indefinitely. Try try-with-resources block will ensure that the transaction is
* closed in all circumstances. Note, however, that in order for the transaction to be committed,
* you must call {@link #commit()}.
*
* Any exceptions within the block will cause {@code t.commit()} to be skipped, thus aborting the
* transaction.
*/
public class GhidraDBTransaction implements AutoCloseable {
protected Program program;
protected int tid;
protected boolean open;
/**
* Start a transaction on the given program with the given description
* @param program the program to modify
* @param description a description of the transaction
*/
public GhidraDBTransaction(Program program, String description) {
this.program = program;
this.tid = program.startTransaction(description);
this.open = true;
}
/**
* Finish the transaction
*
* If this is called before {@link #commit()}, then the transaction is aborted. This is called
* automatically at the close of a try-with-resources block.
*/
@Override
public void close() {
if (open) {
program.endTransaction(tid, false);
open = false;
}
}
/**
* Finish the transaction, and commit
*
* This MUST be called in order to commit the transaction. The transaction is immediately
* closed, and any further modifications to the database will likely result in an error.
*/
public void commit() {
if (open) {
program.endTransaction(tid, true);
open = false;
}
}
}

View file

@ -0,0 +1,77 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.util;
import java.util.Collection;
import java.util.Iterator;
/**
* Utilities for {@link Collection}s
*/
public class SleighUtil {
/**
* Compare two collections by their corresponding elements in order
*
* If the collections have differing sizes, the ordering does not matter. The smaller
* collection precedes the larger. Otherwise, each corresponding pair of elements are compared.
* Once an unequal pair is found, the collections are ordered by those elements. This is
* analogous to {@link String} comparison.
* @param a the first set
* @param b the second set
* @return a comparison result as in {@link Comparable#compareTo(Object)}
*/
public static <T extends Comparable<T>> int compareInOrder(Collection<T> a, Collection<T> b) {
int result;
result = a.size() - b.size();
if (result != 0) {
return result;
}
Iterator<T> ita = a.iterator();
Iterator<T> itb = b.iterator();
while (ita.hasNext()) {
result = ita.next().compareTo(itb.next());
if (result != 0) {
return result;
}
}
return 0;
}
/**
* Compare two byte arrays by their corresponding entries
*
* If the two arrays have differing lengths, the shorter precedes the longer. Otherwise, they
* are compared as in C's {@code memcmp}, except that Java {@code byte}s are signed.
* @param a the first array
* @param b the second array
* @return a comparison result as in {@link Comparable#compareTo(Object)}
*/
public static int compareArrays(byte[] a, byte[] b) {
int result;
result = a.length - b.length;
if (result != 0) {
return result;
}
for (int i = 0; i < a.length; i++) {
result = a[i] - b[i];
if (result != 0) {
return result;
}
}
return 0;
}
}

View file

@ -0,0 +1,50 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.util;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseActionGotoTable;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseTransitionTable;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
/**
* An entry in a (sparse) LR(0) transition table or LALR(1) action/goto table
*
* @see AssemblyParseTransitionTable
* @see AssemblyParseActionGotoTable
* @param <T> the type of each entry in a table cell
*/
public class TableEntry<T> extends TableEntryKey {
private final T value;
/**
* Create a new table entry with the given value at the given state and symbol
* @param state the row
* @param sym the column
* @param value the value
*/
public TableEntry(int state, AssemblySymbol sym, T value) {
super(state, sym);
this.value = value;
}
/**
* Get the value of the entry
* @return the value
*/
public T getValue() {
return value;
}
}

View file

@ -0,0 +1,95 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.assembler.sleigh.util;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseActionGotoTable;
import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseTransitionTable;
import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol;
/**
* A key in a (sparse) LR(0) transition table or LALR(1) action/goto table
*
* @see AssemblyParseTransitionTable
* @see AssemblyParseActionGotoTable
*/
public class TableEntryKey implements Comparable<TableEntryKey> {
private final int state;
private final AssemblySymbol sym;
/**
* Create a new key for the given state and symbol
* @param state the row
* @param sym the column
*/
public TableEntryKey(int state, AssemblySymbol sym) {
this.state = state;
this.sym = sym;
}
@Override
public int hashCode() {
int result = 0;
result += state;
result *= 31;
result += sym.hashCode();
return result;
}
@Override
public boolean equals(Object that) {
if (!(that instanceof TableEntryKey)) {
return false;
}
TableEntryKey ek = (TableEntryKey) that;
if (this.state != ek.state) {
return false;
}
if (!this.sym.equals(ek.sym)) {
return false;
}
return true;
}
@Override
public int compareTo(TableEntryKey that) {
int result;
result = this.state - that.state;
if (result != 0) {
return result;
}
result = this.sym.compareTo(that.sym);
if (result != 0) {
return result;
}
return 0;
}
/**
* Get the state (row) of the key in the table
* @return the state
*/
public int getState() {
return state;
}
/**
* Get the symbol (column) of the entry in the table
* @return the symbol
*/
public AssemblySymbol getSym() {
return sym;
}
}

View file

@ -0,0 +1,59 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.analysis;
import ghidra.program.model.address.Address;
public class ReferenceAddressPair {
private Address source;
private Address destination;
public ReferenceAddressPair(Address source, Address destination) {
if (source == null) {
source = Address.NO_ADDRESS;
}
if (destination == null) {
destination = Address.NO_ADDRESS;
}
this.source = source;
this.destination = destination;
}
public Address getSource() {
return source;
}
public Address getDestination() {
return destination;
}
@Override
public int hashCode() {
int hash1 = source.hashCode();
int hash2 = destination.hashCode();
return hash1 ^ hash2;
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof ReferenceAddressPair)) {
return false;
}
ReferenceAddressPair otherPair = (ReferenceAddressPair) obj;
return source.equals(otherPair.source) & destination.equals(otherPair.destination);
}
}

View file

@ -0,0 +1,82 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.data;
import java.util.ArrayList;
import ghidra.program.model.address.Address;
import ghidra.program.model.data.*;
import ghidra.program.model.lang.DataTypeProviderContext;
import ghidra.program.model.listing.*;
import ghidra.program.model.symbol.Symbol;
public class ProgramProviderContext implements DataTypeProviderContext {
Program program;
Address addr;
public ProgramProviderContext(Program program, Address addr) {
this.program = program;
this.addr = addr;
}
@Override
public DataTypeComponent[] getDataTypeComponents(int start, int end) {
ArrayList<DataTypeComponent> list = new ArrayList<DataTypeComponent>();
for (int offset = start; offset <= end;) {
DataTypeComponent dtc = getDataTypeComponent(offset);
if (dtc == null) {
break;
}
list.add(dtc);
offset += dtc.getLength();
}
return list.toArray(new DataTypeComponent[list.size()]);
}
@Override
public DataTypeComponent getDataTypeComponent(int offset) {
Data data = getData(offset);
if (data == null) {
return null;
}
DataType dt = data.getDataType();
int length = data.getLength();
String label = null;
Symbol symbol = data.getPrimarySymbol();
if (symbol != null && !symbol.isDynamic()) {
label = symbol.getName();
}
String comment = data.getComment(CodeUnit.EOL_COMMENT);
return new DataTypeComponentImpl(dt, null, length, 0, offset, label, comment);
}
private Data getData(int offset) {
Address offAddr = addr.addWrap(offset);
return program.getListing().getDataAt(offAddr);
}
@Override
public String getUniqueName(String baseName) {
return program.getListing().getDataTypeManager().getUniqueName(CategoryPath.ROOT, baseName);
}
}

View file

@ -0,0 +1,95 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.core.data;
import ghidra.program.model.address.Address;
import ghidra.program.model.data.*;
import ghidra.program.model.lang.DataTypeProviderContext;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Program;
import ghidra.program.util.ProgramLocation;
import java.util.ArrayList;
public class ProgramStructureProviderContext implements DataTypeProviderContext {
Program program;
Address addr;
Structure struct = null;
int myoffset;
public ProgramStructureProviderContext(Program program, ProgramLocation loc) {
this.program = program;
int dataPath[] = loc.getComponentPath();
Data data = program.getListing().getDefinedDataContaining(loc.getAddress());
data = data.getComponent(dataPath);
this.addr = data.getMinAddress();
myoffset = data.getParentOffset();
data = data.getParent();
struct = (Structure) data.getDataType();
}
public ProgramStructureProviderContext(Program program, Address addr, Structure struct,
int myOffset) {
this.program = program;
this.addr = addr;
this.struct = struct;
this.myoffset = myOffset;
}
@Override
public DataTypeComponent getDataTypeComponent(int offset) {
int poffset = myoffset + offset;
if (poffset < 0 || poffset >= struct.getLength()) {
return null;
}
return struct.getComponentAt(poffset);
}
/**
* Get an array of CodePrototypes that begin at or after start up to end.
* Prototypes that exist before start are not returned
* Prototypes that exist before end, but terminate after end ARE returned
* The prototypes must be contiguous from start to end
*
* @param start start offset
* @param end end offset
*
* @return array of CodePrototypes that exist between start and end.
*/
@Override
public DataTypeComponent[] getDataTypeComponents(int start, int end) {
ArrayList<DataTypeComponent> list = new ArrayList<DataTypeComponent>();
for (int offset = start; offset <= end;) {
DataTypeComponent dtc = getDataTypeComponent(offset);
if (dtc == null) {
break;
}
list.add(dtc);
offset += dtc.getLength();
}
DataTypeComponent[] dataTypeComps = new DataTypeComponent[list.size()];
return list.toArray(dataTypeComps);
}
@Override
public String getUniqueName(String baseName) {
return program.getListing().getDataTypeManager().getUniqueName(CategoryPath.ROOT, baseName);
}
}

Some files were not shown because too many files have changed in this diff Show more