mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-05 19:42:36 +02:00
1538 lines
37 KiB
Text
1538 lines
37 KiB
Text
/* ###
|
|
* IP: GHIDRA
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
%{
|
|
#include "grammar.hh"
|
|
|
|
extern int yylex(void);
|
|
extern int yyerror(const char *str);
|
|
static CParse *parse;
|
|
extern int yydebug;
|
|
%}
|
|
|
|
%union {
|
|
uint4 flags;
|
|
TypeDeclarator *dec;
|
|
vector<TypeDeclarator *> *declist;
|
|
TypeSpecifiers *spec;
|
|
vector<uint4> *ptrspec;
|
|
Datatype *type;
|
|
Enumerator *enumer;
|
|
vector<Enumerator *> *vecenum;
|
|
string *str;
|
|
uintb *i;
|
|
}
|
|
|
|
// Grammar taken from ISO/IEC 9899
|
|
|
|
%token DOTDOTDOT BADTOKEN STRUCT UNION ENUM DECLARATION_RESULT PARAM_RESULT
|
|
%token <i> NUMBER
|
|
%token <str> IDENTIFIER
|
|
%token <str> STORAGE_CLASS_SPECIFIER TYPE_QUALIFIER FUNCTION_SPECIFIER
|
|
%token <type> TYPE_NAME
|
|
|
|
%type <declist> declaration init_declarator_list parameter_list parameter_type_list
|
|
%type <declist> struct_declaration_list struct_declaration struct_declarator_list
|
|
%type <dec> declarator init_declarator direct_declarator parameter_declaration
|
|
%type <dec> abstract_declarator direct_abstract_declarator struct_declarator
|
|
%type <spec> declaration_specifiers specifier_qualifier_list
|
|
%type <flags> type_qualifier_list
|
|
%type <ptrspec> pointer
|
|
%type <i> assignment_expression
|
|
%type <type> type_specifier struct_or_union_specifier enum_specifier
|
|
%type <enumer> enumerator
|
|
%type <vecenum> enumerator_list
|
|
%%
|
|
|
|
document:
|
|
DECLARATION_RESULT declaration { parse->setResultDeclarations($2); }
|
|
| PARAM_RESULT parameter_declaration { vector<TypeDeclarator *> *res = parse->newVecDeclarator(); res->push_back($2); parse->setResultDeclarations(res); }
|
|
;
|
|
|
|
declaration:
|
|
declaration_specifiers ';' { $$ = parse->mergeSpecDecVec($1); }
|
|
| declaration_specifiers init_declarator_list ';' { $$ = parse->mergeSpecDecVec($1,$2); }
|
|
;
|
|
|
|
declaration_specifiers:
|
|
STORAGE_CLASS_SPECIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); }
|
|
| type_specifier { $$ = parse->newSpecifier(); parse->addTypeSpecifier($$,$1); }
|
|
| TYPE_QUALIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); }
|
|
| FUNCTION_SPECIFIER { $$ = parse->newSpecifier(); parse->addFuncSpecifier($$,$1); }
|
|
| STORAGE_CLASS_SPECIFIER declaration_specifiers { $$ = parse->addSpecifier($2,$1); }
|
|
| type_specifier declaration_specifiers { $$ = parse->addTypeSpecifier($2,$1); }
|
|
| TYPE_QUALIFIER declaration_specifiers { $$ = parse->addSpecifier($2,$1); }
|
|
| FUNCTION_SPECIFIER declaration_specifiers { $$ = parse->addFuncSpecifier($2,$1); }
|
|
;
|
|
|
|
init_declarator_list:
|
|
init_declarator { $$ = parse->newVecDeclarator(); $$->push_back($1); }
|
|
| init_declarator_list ',' init_declarator { $$ = $1; $$->push_back($3); }
|
|
;
|
|
|
|
init_declarator:
|
|
declarator { $$ = $1; }
|
|
//declarator = initializer
|
|
;
|
|
|
|
type_specifier:
|
|
TYPE_NAME { $$ = $1; }
|
|
| struct_or_union_specifier { $$ = $1; }
|
|
| enum_specifier { $$ = $1; }
|
|
;
|
|
|
|
struct_or_union_specifier:
|
|
STRUCT '{' struct_declaration_list '}' { $$ = parse->newStruct("",$3); }
|
|
| STRUCT IDENTIFIER '{' struct_declaration_list '}' { $$ = parse->newStruct(*$2,$4); }
|
|
| STRUCT IDENTIFIER { $$ = parse->oldStruct(*$2); }
|
|
| UNION '{' struct_declaration_list '}' { $$ = parse->newUnion("",$3); }
|
|
| UNION IDENTIFIER '{' struct_declaration_list '}' { $$ = parse->newUnion(*$2,$4); }
|
|
| UNION IDENTIFIER { $$ = parse->oldUnion(*$2); }
|
|
;
|
|
|
|
struct_declaration_list:
|
|
struct_declaration { $$ = $1; }
|
|
| struct_declaration_list struct_declaration { $$ = $1; $$->insert($$->end(),$2->begin(),$2->end()); }
|
|
;
|
|
|
|
struct_declaration:
|
|
specifier_qualifier_list struct_declarator_list ';' { $$ = parse->mergeSpecDecVec($1,$2); }
|
|
;
|
|
|
|
specifier_qualifier_list:
|
|
type_specifier { $$ = parse->newSpecifier(); parse->addTypeSpecifier($$,$1); }
|
|
| type_specifier specifier_qualifier_list { $$ = parse->addTypeSpecifier($2,$1); }
|
|
| TYPE_QUALIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); }
|
|
| TYPE_QUALIFIER specifier_qualifier_list { $$ = parse->addSpecifier($2,$1); }
|
|
;
|
|
|
|
struct_declarator_list:
|
|
struct_declarator { $$ = parse->newVecDeclarator(); $$->push_back($1); }
|
|
| struct_declarator_list ',' struct_declarator { $$ = $1; $$->push_back($3); }
|
|
;
|
|
|
|
struct_declarator:
|
|
declarator { $$ = $1; }
|
|
// declarator ':' NUMBER
|
|
;
|
|
|
|
enum_specifier:
|
|
ENUM IDENTIFIER '{' enumerator_list '}' { $$ = parse->newEnum(*$2,$4); }
|
|
| ENUM '{' enumerator_list '}' { $$ = parse->newEnum("",$3); }
|
|
| ENUM IDENTIFIER '{' enumerator_list ',' '}' { $$ = parse->newEnum(*$2,$4); }
|
|
| ENUM '{' enumerator_list ',' '}' { $$ = parse->newEnum("",$3); }
|
|
| ENUM IDENTIFIER { $$ = parse->oldEnum(*$2); }
|
|
;
|
|
|
|
enumerator_list:
|
|
enumerator { $$ = parse->newVecEnumerator(); $$->push_back($1); }
|
|
| enumerator_list ',' enumerator { $$ = $1; $$->push_back($3); }
|
|
;
|
|
|
|
enumerator:
|
|
IDENTIFIER { $$ = parse->newEnumerator(*$1); }
|
|
| IDENTIFIER '=' NUMBER { $$ = parse->newEnumerator(*$1,*$3); }
|
|
;
|
|
|
|
declarator:
|
|
direct_declarator { $$ = $1; }
|
|
| pointer direct_declarator { $$ = parse->mergePointer($1,$2); }
|
|
;
|
|
|
|
direct_declarator:
|
|
IDENTIFIER { $$ = parse->newDeclarator($1); }
|
|
| '(' declarator ')' { $$ = $2; }
|
|
| direct_declarator '[' type_qualifier_list assignment_expression ']' { $$ = parse->newArray($1,$3,$4); }
|
|
| direct_declarator '[' assignment_expression ']' { $$ = parse->newArray($1,0,$3); }
|
|
// direct_declarator '[' ']'
|
|
| direct_declarator '(' parameter_type_list ')' { $$ = parse->newFunc($1,$3); }
|
|
// direct_declarator ( identifier_list )
|
|
;
|
|
|
|
pointer:
|
|
'*' { $$ = parse->newPointer(); $$->push_back(0); }
|
|
| '*' type_qualifier_list { $$ = parse->newPointer(); $$->push_back($2); }
|
|
| '*' pointer { $$ = $2; $$->push_back(0); }
|
|
| '*' type_qualifier_list pointer { $$ = $3; $$->push_back($2); }
|
|
;
|
|
|
|
type_qualifier_list:
|
|
TYPE_QUALIFIER { $$ = parse->convertFlag($1); }
|
|
| type_qualifier_list TYPE_QUALIFIER { $$ = $1; $$ |= parse->convertFlag($2); }
|
|
;
|
|
|
|
parameter_type_list:
|
|
parameter_list { $$ = $1; }
|
|
| parameter_list ',' DOTDOTDOT { $$ = $1; $$->push_back((TypeDeclarator *)0); }
|
|
;
|
|
|
|
parameter_list:
|
|
parameter_declaration { $$ = parse->newVecDeclarator(); $$->push_back($1); }
|
|
| parameter_list ',' parameter_declaration { $$ = $1; $$->push_back($3); }
|
|
;
|
|
|
|
parameter_declaration:
|
|
declaration_specifiers declarator { $$ = parse->mergeSpecDec($1,$2); }
|
|
| declaration_specifiers { $$ = parse->mergeSpecDec($1); }
|
|
| declaration_specifiers abstract_declarator { $$ = parse->mergeSpecDec($1,$2); }
|
|
;
|
|
|
|
abstract_declarator:
|
|
pointer { $$ = parse->newDeclarator(); parse->mergePointer($1,$$); }
|
|
| direct_abstract_declarator { $$ = $1; }
|
|
| pointer direct_abstract_declarator { $$ = parse->mergePointer($1,$2); }
|
|
;
|
|
|
|
direct_abstract_declarator:
|
|
'(' abstract_declarator ')' { $$ = $2; }
|
|
// '[' assignment_expression ']'
|
|
| direct_abstract_declarator '[' assignment_expression ']' { $$ = parse->newArray($1,0,$3); }
|
|
// '(' parameter_type_list ')'
|
|
| direct_abstract_declarator '(' parameter_type_list ')' { $$ = parse->newFunc($1,$3); }
|
|
;
|
|
|
|
assignment_expression:
|
|
NUMBER { $$ = $1; }
|
|
;
|
|
|
|
%%
|
|
|
|
void GrammarToken::set(uint4 tp)
|
|
|
|
{
|
|
type = tp;
|
|
}
|
|
|
|
void GrammarToken::set(uint4 tp,char *ptr,int4 len)
|
|
|
|
{
|
|
type = tp;
|
|
switch(tp) {
|
|
case integer:
|
|
{
|
|
string charstring(ptr,len);
|
|
istringstream s(charstring);
|
|
s.unsetf(ios::dec | ios::hex | ios::oct);
|
|
intb val;
|
|
s >> val;
|
|
value.integer = (uintb)val;
|
|
}
|
|
break;
|
|
case identifier:
|
|
case stringval:
|
|
value.stringval = new string(ptr,len);
|
|
break;
|
|
case charconstant:
|
|
if (len==1)
|
|
value.integer = (uintb)*ptr;
|
|
else { // Backslash
|
|
switch(ptr[1]) {
|
|
case 'n':
|
|
value.integer = 10;
|
|
break;
|
|
case '0':
|
|
value.integer = 0;
|
|
break;
|
|
case 'a':
|
|
value.integer = 7;
|
|
break;
|
|
case 'b':
|
|
value.integer = 8;
|
|
break;
|
|
case 't':
|
|
value.integer = 9;
|
|
break;
|
|
case 'v':
|
|
value.integer = 11;
|
|
break;
|
|
case 'f':
|
|
value.integer = 12;
|
|
break;
|
|
case 'r':
|
|
value.integer = 13;
|
|
break;
|
|
default:
|
|
value.integer = (uintb)ptr[1];
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
throw LowlevelError("Bad internal grammar token set");
|
|
}
|
|
}
|
|
|
|
GrammarToken::GrammarToken(void)
|
|
|
|
{
|
|
type = 0;
|
|
value.integer = 0;
|
|
}
|
|
|
|
GrammarLexer::GrammarLexer(int4 maxbuffer)
|
|
|
|
{
|
|
buffersize = maxbuffer;
|
|
buffer = new char[ maxbuffer ];
|
|
bufstart = 0;
|
|
bufend = 0;
|
|
curlineno = 0;
|
|
state = start;
|
|
in = (istream *)0;
|
|
endoffile = true;
|
|
}
|
|
|
|
GrammarLexer::~GrammarLexer(void)
|
|
|
|
{
|
|
delete [] buffer;
|
|
}
|
|
|
|
void GrammarLexer::bumpLine(void)
|
|
|
|
{ // Keep track of a newline
|
|
curlineno += 1;
|
|
bufstart = 0;
|
|
bufend = 0;
|
|
}
|
|
|
|
uint4 GrammarLexer::moveState(char lookahead)
|
|
|
|
{ // Change finite state machine based on lookahead
|
|
uint4 res;
|
|
bool newline = false;
|
|
|
|
if (lookahead<32) {
|
|
if ((lookahead == 9)||(lookahead==11)||(lookahead==12)||
|
|
(lookahead==13))
|
|
lookahead = ' ';
|
|
else if (lookahead == '\n') {
|
|
newline = true;
|
|
lookahead = ' ';
|
|
}
|
|
else {
|
|
setError("Illegal character");
|
|
return GrammarToken::badtoken;
|
|
}
|
|
}
|
|
else if (lookahead >= 127) {
|
|
setError("Illegal character");
|
|
return GrammarToken::badtoken;
|
|
}
|
|
|
|
res = 0;
|
|
bool syntaxerror = false;
|
|
switch(state) {
|
|
case start:
|
|
switch(lookahead) {
|
|
case '/':
|
|
state = slash;
|
|
break;
|
|
case '.':
|
|
state = dot1;
|
|
break;
|
|
case '*':
|
|
case ',':
|
|
case '(':
|
|
case ')':
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
case ';':
|
|
case '=':
|
|
state = punctuation;
|
|
bufstart = bufend-1;
|
|
break;
|
|
case '-':
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
state = number;
|
|
bufstart = bufend-1;
|
|
break;
|
|
case ' ':
|
|
break; // Ignore since we are already open
|
|
case '\"':
|
|
state = doublequote;
|
|
bufstart = bufend-1;
|
|
break;
|
|
case '\'':
|
|
state = singlequote;
|
|
break;
|
|
case 'a':
|
|
case 'b':
|
|
case 'c':
|
|
case 'd':
|
|
case 'e':
|
|
case 'f':
|
|
case 'g':
|
|
case 'h':
|
|
case 'i':
|
|
case 'j':
|
|
case 'k':
|
|
case 'l':
|
|
case 'm':
|
|
case 'n':
|
|
case 'o':
|
|
case 'p':
|
|
case 'q':
|
|
case 'r':
|
|
case 's':
|
|
case 't':
|
|
case 'u':
|
|
case 'v':
|
|
case 'w':
|
|
case 'x':
|
|
case 'y':
|
|
case 'z':
|
|
case 'A':
|
|
case 'B':
|
|
case 'C':
|
|
case 'D':
|
|
case 'E':
|
|
case 'F':
|
|
case 'G':
|
|
case 'H':
|
|
case 'I':
|
|
case 'J':
|
|
case 'K':
|
|
case 'L':
|
|
case 'M':
|
|
case 'N':
|
|
case 'O':
|
|
case 'P':
|
|
case 'Q':
|
|
case 'R':
|
|
case 'S':
|
|
case 'T':
|
|
case 'U':
|
|
case 'V':
|
|
case 'W':
|
|
case 'X':
|
|
case 'Y':
|
|
case 'Z':
|
|
case '_':
|
|
state = identifier;
|
|
bufstart = bufend-1;
|
|
break;
|
|
default:
|
|
setError("Illegal character");
|
|
return GrammarToken::badtoken;
|
|
}
|
|
break;
|
|
case slash:
|
|
if (lookahead=='*')
|
|
state = c_comment;
|
|
else if (lookahead == '/')
|
|
state = endofline_comment;
|
|
else
|
|
syntaxerror = true;
|
|
break;
|
|
case dot1:
|
|
if (lookahead=='.')
|
|
state = dot2;
|
|
else
|
|
syntaxerror = true;
|
|
break;
|
|
case dot2:
|
|
if (lookahead=='.')
|
|
state = dot3;
|
|
else
|
|
syntaxerror = true;
|
|
break;
|
|
case dot3:
|
|
state = start;
|
|
res = GrammarToken::dotdotdot;
|
|
break;
|
|
case punctuation:
|
|
state = start;
|
|
res = (uint4)buffer[bufstart];
|
|
break;
|
|
case endofline_comment:
|
|
if (newline)
|
|
state = start;
|
|
break; // Anything else is part of comment
|
|
case c_comment:
|
|
if (lookahead == '/') {
|
|
if ((bufend >1)&&(buffer[bufend-2]=='*'))
|
|
state = start;
|
|
}
|
|
break; // Anything else is part of comment
|
|
case doublequote:
|
|
if (lookahead == '\"')
|
|
state = doublequoteend;
|
|
break; // Anything else is part of string
|
|
case doublequoteend:
|
|
state = start;
|
|
res = GrammarToken::stringval;
|
|
break;
|
|
case singlequote:
|
|
if (lookahead == '\\')
|
|
state = singlebackslash;
|
|
else if (lookahead == '\'')
|
|
state = singlequoteend;
|
|
break; // Anything else is part of string
|
|
case singlequoteend:
|
|
state = start;
|
|
res = GrammarToken::charconstant;
|
|
break;
|
|
case singlebackslash: // Seen backslash in a single quoted string
|
|
state = singlequote;
|
|
break;
|
|
case number:
|
|
if (lookahead=='x') {
|
|
if (((bufend-bufstart)!=2)||(buffer[bufstart]!='0'))
|
|
syntaxerror = true; // x only allowed as 0x hex indicator
|
|
}
|
|
else if ((lookahead>='0')&&(lookahead<='9')) {
|
|
}
|
|
else if ((lookahead>='A')&&(lookahead<='Z')) {
|
|
}
|
|
else if ((lookahead>='a')&&(lookahead<='z')) {
|
|
}
|
|
else if (lookahead == '_') {
|
|
}
|
|
else {
|
|
state = start;
|
|
res = GrammarToken::integer;
|
|
}
|
|
break;
|
|
case identifier:
|
|
if ((lookahead>='0')&&(lookahead<='9')) {
|
|
}
|
|
else if ((lookahead>='A')&&(lookahead<='Z')) {
|
|
}
|
|
else if ((lookahead>='a')&&(lookahead<='z')) {
|
|
}
|
|
else if (lookahead == '_') {
|
|
}
|
|
else {
|
|
state = start;
|
|
res = GrammarToken::identifier;
|
|
}
|
|
break;
|
|
}
|
|
if (syntaxerror) {
|
|
setError("Syntax error");
|
|
return GrammarToken::badtoken;
|
|
}
|
|
if (newline) bumpLine();
|
|
return res;
|
|
}
|
|
|
|
void GrammarLexer::establishToken(GrammarToken &token,uint4 val)
|
|
|
|
{
|
|
if (val < GrammarToken::integer)
|
|
token.set(val);
|
|
else {
|
|
token.set(val,buffer+bufstart,(bufend-bufstart)-1);
|
|
}
|
|
token.setPosition(filestack.back(),curlineno,bufstart);
|
|
}
|
|
|
|
void GrammarLexer::clear(void)
|
|
|
|
{ // Clear lexer for a brand new parse
|
|
filenamemap.clear();
|
|
streammap.clear();
|
|
filestack.clear();
|
|
bufstart = 0;
|
|
bufend = 0;
|
|
curlineno = 0;
|
|
state = start;
|
|
in = (istream *)0;
|
|
endoffile = true;
|
|
error.clear();
|
|
}
|
|
|
|
void GrammarLexer::writeLocation(ostream &s,int4 line,int4 filenum)
|
|
|
|
{
|
|
s << " at line " << dec << line;
|
|
s << " in " << filenamemap[filenum];
|
|
}
|
|
|
|
void GrammarLexer::writeTokenLocation(ostream &s,int4 line,int4 colno)
|
|
|
|
{
|
|
if (line!=curlineno) return; // Does line match current line in buffer
|
|
for(int4 i=0;i<bufend;++i)
|
|
s << buffer[i];
|
|
s << '\n';
|
|
for(int4 i=0;i<colno;++i)
|
|
s << ' ';
|
|
s << "^--\n";
|
|
}
|
|
|
|
void GrammarLexer::pushFile(const string &filename,istream *i)
|
|
|
|
{
|
|
int4 filenum = filenamemap.size();
|
|
filenamemap[filenum] = filename;
|
|
streammap[filenum] = i;
|
|
filestack.push_back(filenum);
|
|
in = i;
|
|
endoffile = false;
|
|
}
|
|
|
|
void GrammarLexer::popFile(void)
|
|
|
|
{
|
|
filestack.pop_back();
|
|
if (filestack.empty()) {
|
|
endoffile = true;
|
|
return;
|
|
}
|
|
int4 filenum = filestack.back();
|
|
in = streammap[filenum]; // Get previous stream
|
|
}
|
|
|
|
void GrammarLexer::getNextToken(GrammarToken &token)
|
|
|
|
{ // Read next token, return true if end of stream
|
|
char nextchar;
|
|
uint4 tok = GrammarToken::badtoken;
|
|
bool firsttimethru = true;
|
|
|
|
if (endoffile) {
|
|
token.set(GrammarToken::endoffile);
|
|
return;
|
|
}
|
|
do {
|
|
if ((!firsttimethru)||(bufend==0)) {
|
|
if (bufend >= buffersize) {
|
|
setError("Line too long");
|
|
tok = GrammarToken::badtoken;
|
|
break;
|
|
}
|
|
in->get(nextchar);
|
|
if (!(*in)) {
|
|
endoffile = true;
|
|
break;
|
|
}
|
|
buffer[bufend++] = nextchar;
|
|
}
|
|
else
|
|
nextchar = buffer[bufend-1]; // Get old lookahead token
|
|
tok = moveState(nextchar);
|
|
firsttimethru = false;
|
|
} while(tok == 0);
|
|
if (endoffile) {
|
|
buffer[bufend++] = ' '; // Simulate a space
|
|
tok = moveState(' '); // to let the final token resolve
|
|
if ((tok==0)&&(state != start)&&(state != endofline_comment)) {
|
|
setError("Incomplete token");
|
|
tok = GrammarToken::badtoken;
|
|
}
|
|
}
|
|
establishToken(token,tok);
|
|
}
|
|
|
|
Datatype *PointerModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const
|
|
|
|
{
|
|
int4 addrsize = glb->getDefaultDataSpace()->getAddrSize();
|
|
Datatype *restype;
|
|
restype = glb->types->getTypePointer(addrsize,base,glb->getDefaultDataSpace()->getWordSize());
|
|
return restype;
|
|
}
|
|
|
|
Datatype *ArrayModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const
|
|
|
|
{
|
|
Datatype *restype = glb->types->getTypeArray(arraysize,base);
|
|
return restype;
|
|
}
|
|
|
|
FunctionModifier::FunctionModifier(const vector<TypeDeclarator *> *p,bool dtdtdt)
|
|
|
|
{
|
|
paramlist = *p;
|
|
if (paramlist.size()==1) {
|
|
TypeDeclarator *decl = paramlist[0];
|
|
if (decl->numModifiers()==0) { // Check for void as an inputtype
|
|
Datatype *ct = decl->getBaseType();
|
|
if ((ct != (Datatype *)0)&&(ct->getMetatype()==TYPE_VOID))
|
|
paramlist.clear();
|
|
}
|
|
}
|
|
dotdotdot = dtdtdt;
|
|
}
|
|
|
|
void FunctionModifier::getInTypes(vector<Datatype *> &intypes,Architecture *glb) const
|
|
|
|
{
|
|
for(uint4 i=0;i<paramlist.size();++i) {
|
|
Datatype *ct = paramlist[i]->buildType(glb);
|
|
intypes.push_back( ct );
|
|
}
|
|
}
|
|
|
|
void FunctionModifier::getInNames(vector<string> &innames) const
|
|
|
|
{
|
|
for(uint4 i=0;i<paramlist.size();++i)
|
|
innames.push_back(paramlist[i]->getIdentifier());
|
|
}
|
|
|
|
bool FunctionModifier::isValid(void) const
|
|
|
|
{
|
|
for(uint4 i=0;i<paramlist.size();++i) {
|
|
TypeDeclarator *decl = paramlist[i];
|
|
if (!decl->isValid()) return false;
|
|
if (decl->numModifiers()==0) {
|
|
Datatype *ct = decl->getBaseType();
|
|
if ((ct != (Datatype *)0)&&(ct->getMetatype()==TYPE_VOID))
|
|
return false; // Extra void type
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Datatype *FunctionModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const
|
|
|
|
{
|
|
vector<Datatype *> intypes;
|
|
|
|
// Varargs is encoded as extra null pointer in paramlist
|
|
bool dotdotdot = false;
|
|
if ((!paramlist.empty())&&(paramlist.back() == (TypeDeclarator *)0)) {
|
|
dotdotdot = true;
|
|
}
|
|
|
|
getInTypes(intypes,glb);
|
|
|
|
ProtoModel *protomodel = decl->getModel(glb);
|
|
return glb->types->getTypeCode(protomodel,base,intypes,dotdotdot);
|
|
}
|
|
|
|
TypeDeclarator::~TypeDeclarator(void)
|
|
|
|
{
|
|
for(uint4 i=0;i<mods.size();++i)
|
|
delete mods[i];
|
|
}
|
|
|
|
Datatype *TypeDeclarator::buildType(Architecture *glb) const
|
|
|
|
{ // Apply modifications to the basetype, (in reverse order of binding)
|
|
Datatype *restype = basetype;
|
|
vector<TypeModifier *>::const_iterator iter;
|
|
iter = mods.end();
|
|
while(iter != mods.begin()) {
|
|
--iter;
|
|
restype = (*iter)->modType(restype,this,glb);
|
|
}
|
|
return restype;
|
|
}
|
|
|
|
ProtoModel *TypeDeclarator::getModel(Architecture *glb) const
|
|
|
|
{
|
|
// Get prototype model
|
|
ProtoModel *protomodel = (ProtoModel *)0;
|
|
if (model.size()!=0)
|
|
protomodel = glb->getModel(model);
|
|
if (protomodel == (ProtoModel *)0)
|
|
protomodel = glb->defaultfp;
|
|
return protomodel;
|
|
}
|
|
|
|
bool TypeDeclarator::getPrototype(PrototypePieces &pieces,Architecture *glb) const
|
|
|
|
{
|
|
TypeModifier *mod = (TypeModifier *)0;
|
|
if (mods.size() > 0)
|
|
mod = mods[0];
|
|
if ((mod == (TypeModifier *)0)||(mod->getType()!=TypeModifier::function_mod))
|
|
return false;
|
|
FunctionModifier *fmod = (FunctionModifier *)mod;
|
|
|
|
pieces.model = getModel(glb);
|
|
pieces.name = ident;
|
|
pieces.intypes.clear();
|
|
fmod->getInTypes(pieces.intypes,glb);
|
|
pieces.innames.clear();
|
|
fmod->getInNames(pieces.innames);
|
|
pieces.dotdotdot = fmod->isDotdotdot();
|
|
|
|
// Construct the output type
|
|
pieces.outtype = basetype;
|
|
vector<TypeModifier *>::const_iterator iter;
|
|
iter = mods.end();
|
|
--iter; // At least one modification
|
|
while(iter != mods.begin()) { // Do not apply function modifier
|
|
pieces.outtype = (*iter)->modType(pieces.outtype,this,glb);
|
|
--iter;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool TypeDeclarator::isValid(void) const
|
|
|
|
{
|
|
if (basetype == (Datatype *)0)
|
|
return false; // No basetype
|
|
|
|
int4 count=0;
|
|
if ((flags & CParse::f_typedef)!=0)
|
|
count += 1;
|
|
if ((flags & CParse::f_extern)!=0)
|
|
count += 1;
|
|
if ((flags & CParse::f_static)!=0)
|
|
count += 1;
|
|
if ((flags & CParse::f_auto)!=0)
|
|
count += 1;
|
|
if ((flags & CParse::f_register)!=0)
|
|
count += 1;
|
|
if (count > 1)
|
|
throw ParseError("Multiple storage specifiers");
|
|
|
|
count = 0;
|
|
if ((flags & CParse::f_const)!=0)
|
|
count += 1;
|
|
if ((flags & CParse::f_restrict)!=0)
|
|
count += 1;
|
|
if ((flags & CParse::f_volatile)!=0)
|
|
count += 1;
|
|
if (count > 1)
|
|
throw ParseError("Multiple type qualifiers");
|
|
|
|
for(uint4 i=0;i<mods.size();++i) {
|
|
if (!mods[i]->isValid())
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
CParse::CParse(Architecture *g,int4 maxbuf)
|
|
: lexer(maxbuf)
|
|
{
|
|
glb = g;
|
|
firsttoken = -1;
|
|
lastdecls = (vector<TypeDeclarator *> *)0;
|
|
keywords["typedef"] = f_typedef;
|
|
keywords["extern"] = f_extern;
|
|
keywords["static"] = f_static;
|
|
keywords["auto"] = f_auto;
|
|
keywords["register"] = f_register;
|
|
keywords["const"] = f_const;
|
|
keywords["restrict"] = f_restrict;
|
|
keywords["volatile"] = f_volatile;
|
|
keywords["inline"] = f_inline;
|
|
keywords["struct"] = f_struct;
|
|
keywords["union"] = f_union;
|
|
keywords["enum"] = f_enum;
|
|
}
|
|
|
|
CParse::~CParse(void)
|
|
|
|
{
|
|
clearAllocation();
|
|
}
|
|
|
|
void CParse::clear(void)
|
|
|
|
{
|
|
clearAllocation();
|
|
lasterror.clear();
|
|
lastdecls = (vector<TypeDeclarator *> *)0;
|
|
lexer.clear();
|
|
firsttoken = -1;
|
|
}
|
|
|
|
TypeDeclarator *CParse::mergeSpecDec(TypeSpecifiers *spec,TypeDeclarator *dec)
|
|
|
|
{
|
|
dec->basetype = spec->type_specifier;
|
|
dec->model = spec->function_specifier;
|
|
dec->flags |= spec->flags;
|
|
return dec;
|
|
}
|
|
|
|
TypeDeclarator *CParse::mergeSpecDec(TypeSpecifiers *spec)
|
|
|
|
{
|
|
TypeDeclarator *dec = new TypeDeclarator();
|
|
typedec_alloc.push_back(dec);
|
|
return mergeSpecDec(spec,dec);
|
|
}
|
|
|
|
vector<TypeDeclarator *> *CParse::mergeSpecDecVec(TypeSpecifiers *spec,vector<TypeDeclarator *> *declist)
|
|
|
|
{
|
|
for(uint4 i=0;i<declist->size();++i)
|
|
mergeSpecDec(spec,(*declist)[i]);
|
|
return declist;
|
|
}
|
|
|
|
vector<TypeDeclarator *> *CParse::mergeSpecDecVec(TypeSpecifiers *spec)
|
|
|
|
{
|
|
vector<TypeDeclarator *> *declist;
|
|
declist = new vector<TypeDeclarator *>();
|
|
vecdec_alloc.push_back(declist);
|
|
TypeDeclarator *dec = new TypeDeclarator();
|
|
typedec_alloc.push_back(dec);
|
|
declist->push_back( dec );
|
|
return mergeSpecDecVec(spec,declist);
|
|
}
|
|
|
|
uint4 CParse::convertFlag(string *str)
|
|
|
|
{
|
|
map<string,uint4>::const_iterator iter;
|
|
|
|
iter = keywords.find(*str);
|
|
if (iter != keywords.end())
|
|
return (*iter).second;
|
|
setError("Unknown qualifier");
|
|
return 0;
|
|
}
|
|
|
|
TypeSpecifiers *CParse::addSpecifier(TypeSpecifiers *spec,string *str)
|
|
|
|
{
|
|
uint4 flag = convertFlag(str);
|
|
spec->flags |= flag;
|
|
return spec;
|
|
}
|
|
|
|
TypeSpecifiers *CParse::addTypeSpecifier(TypeSpecifiers *spec,Datatype *tp)
|
|
|
|
{
|
|
if (spec->type_specifier!=(Datatype *)0)
|
|
setError("Multiple type specifiers");
|
|
spec->type_specifier = tp;
|
|
return spec;
|
|
}
|
|
|
|
TypeSpecifiers *CParse::addFuncSpecifier(TypeSpecifiers *spec,string *str)
|
|
|
|
{
|
|
map<string,uint4>::const_iterator iter;
|
|
|
|
iter = keywords.find(*str);
|
|
if (iter != keywords.end())
|
|
spec->flags |= (*iter).second; // A reserved specifier
|
|
else {
|
|
if (spec->function_specifier.size()!=0)
|
|
setError("Multiple parameter models");
|
|
spec->function_specifier = *str;
|
|
}
|
|
return spec;
|
|
}
|
|
|
|
TypeDeclarator *CParse::mergePointer(vector<uint4> *ptr,TypeDeclarator *dec)
|
|
|
|
{
|
|
for(uint4 i=0;i<ptr->size();++i) {
|
|
PointerModifier *newmod = new PointerModifier((*ptr)[i]);
|
|
dec->mods.push_back(newmod);
|
|
}
|
|
return dec;
|
|
}
|
|
|
|
TypeDeclarator *CParse::newDeclarator(string *str)
|
|
|
|
{
|
|
TypeDeclarator *res = new TypeDeclarator(*str);
|
|
typedec_alloc.push_back(res);
|
|
return res;
|
|
}
|
|
|
|
TypeDeclarator *CParse::newDeclarator(void)
|
|
|
|
{
|
|
TypeDeclarator *res = new TypeDeclarator();
|
|
typedec_alloc.push_back(res);
|
|
return res;
|
|
}
|
|
|
|
TypeSpecifiers *CParse::newSpecifier(void)
|
|
|
|
{
|
|
TypeSpecifiers *spec = new TypeSpecifiers();
|
|
typespec_alloc.push_back(spec);
|
|
return spec;
|
|
}
|
|
|
|
vector<TypeDeclarator *> *CParse::newVecDeclarator(void)
|
|
|
|
{
|
|
vector<TypeDeclarator *> *res = new vector<TypeDeclarator *>();
|
|
vecdec_alloc.push_back(res);
|
|
return res;
|
|
}
|
|
|
|
vector<uint4> *CParse::newPointer(void)
|
|
|
|
{
|
|
vector<uint4> *res = new vector<uint4>();
|
|
vecuint4_alloc.push_back(res);
|
|
return res;
|
|
}
|
|
|
|
TypeDeclarator *CParse::newArray(TypeDeclarator *dec,uint4 flags,uintb *num)
|
|
|
|
{
|
|
ArrayModifier *newmod = new ArrayModifier(flags,(int4)*num);
|
|
dec->mods.push_back(newmod);
|
|
return dec;
|
|
}
|
|
|
|
TypeDeclarator *CParse::newFunc(TypeDeclarator *dec,vector<TypeDeclarator *> *declist)
|
|
|
|
{
|
|
bool dotdotdot = false;
|
|
if (!declist->empty()) {
|
|
if (declist->back() == (TypeDeclarator *)0) {
|
|
dotdotdot = true;
|
|
declist->pop_back();
|
|
}
|
|
}
|
|
FunctionModifier *newmod = new FunctionModifier(declist,dotdotdot);
|
|
dec->mods.push_back(newmod);
|
|
return dec;
|
|
}
|
|
|
|
Datatype *CParse::newStruct(const string &ident,vector<TypeDeclarator *> *declist)
|
|
|
|
{ // Build a new structure
|
|
TypeStruct *res = glb->types->getTypeStruct(ident); // Create stub (for recursion)
|
|
vector<TypeField> sublist;
|
|
|
|
for(uint4 i=0;i<declist->size();++i) {
|
|
TypeDeclarator *decl = (*declist)[i];
|
|
if (!decl->isValid()) {
|
|
setError("Invalid structure declarator");
|
|
glb->types->destroyType(res);
|
|
return (Datatype *)0;
|
|
}
|
|
sublist.push_back(TypeField());
|
|
sublist.back().type = decl->buildType(glb);
|
|
sublist.back().name = decl->getIdentifier();
|
|
sublist.back().offset = -1; // Let typegrp figure out offset
|
|
}
|
|
|
|
if (!glb->types->setFields(sublist,res,-1,0)) {
|
|
setError("Bad structure definition");
|
|
glb->types->destroyType(res);
|
|
return (Datatype *)0;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
Datatype *CParse::oldStruct(const string &ident)
|
|
|
|
{
|
|
Datatype *res = glb->types->findByName(ident);
|
|
if ((res==(Datatype *)0)||(res->getMetatype() != TYPE_STRUCT))
|
|
setError("Identifier does not represent a struct as required");
|
|
return res;
|
|
}
|
|
|
|
Datatype *CParse::newUnion(const string &ident,vector<TypeDeclarator *> *declist)
|
|
|
|
{
|
|
setError("Unions are currently unsupported");
|
|
return (Datatype *)0;
|
|
}
|
|
|
|
Datatype *CParse::oldUnion(const string &ident)
|
|
|
|
{
|
|
setError("Unions are currently unsupported");
|
|
return (Datatype *)0;
|
|
}
|
|
|
|
Enumerator *CParse::newEnumerator(const string &ident)
|
|
|
|
{
|
|
Enumerator *res = new Enumerator(ident);
|
|
enum_alloc.push_back(res);
|
|
return res;
|
|
}
|
|
|
|
Enumerator *CParse::newEnumerator(const string &ident,uintb val)
|
|
|
|
{
|
|
Enumerator *res = new Enumerator(ident,val);
|
|
enum_alloc.push_back(res);
|
|
return res;
|
|
}
|
|
|
|
vector<Enumerator *> *CParse::newVecEnumerator(void)
|
|
|
|
{
|
|
vector<Enumerator *> *res = new vector<Enumerator *>();
|
|
vecenum_alloc.push_back(res);
|
|
return res;
|
|
}
|
|
|
|
Datatype *CParse::newEnum(const string &ident,vector<Enumerator *> *vecenum)
|
|
|
|
{
|
|
TypeEnum *res = glb->types->getTypeEnum(ident);
|
|
vector<string> namelist;
|
|
vector<uintb> vallist;
|
|
vector<bool> assignlist;
|
|
for(uint4 i=0;i<vecenum->size();++i) {
|
|
Enumerator *enumer = (*vecenum)[i];
|
|
namelist.push_back(enumer->enumconstant);
|
|
vallist.push_back(enumer->value);
|
|
assignlist.push_back(enumer->constantassigned);
|
|
}
|
|
if (!glb->types->setEnumValues(namelist,vallist,assignlist,res)) {
|
|
setError("Bad enumeration values");
|
|
glb->types->destroyType(res);
|
|
return (Datatype *)0;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
Datatype *CParse::oldEnum(const string &ident)
|
|
|
|
{
|
|
Datatype *res = glb->types->findByName(ident);
|
|
if ((res==(Datatype *)0)||(!res->isEnumType()))
|
|
setError("Identifier does not represent an enum as required");
|
|
return res;
|
|
}
|
|
|
|
void CParse::clearAllocation(void)
|
|
|
|
{
|
|
list<TypeDeclarator *>::iterator iter1;
|
|
|
|
for(iter1=typedec_alloc.begin();iter1!=typedec_alloc.end();++iter1)
|
|
delete *iter1;
|
|
typedec_alloc.clear();
|
|
|
|
list<TypeSpecifiers *>::iterator iter2;
|
|
for(iter2=typespec_alloc.begin();iter2!=typespec_alloc.end();++iter2)
|
|
delete *iter2;
|
|
typespec_alloc.clear();
|
|
|
|
list<vector<uint4> *>::iterator iter3;
|
|
for(iter3=vecuint4_alloc.begin();iter3!=vecuint4_alloc.end();++iter3)
|
|
delete *iter3;
|
|
vecuint4_alloc.clear();
|
|
|
|
list<vector<TypeDeclarator *> *>::iterator iter4;
|
|
for(iter4=vecdec_alloc.begin();iter4!=vecdec_alloc.end();++iter4)
|
|
delete *iter4;
|
|
vecdec_alloc.clear();
|
|
|
|
list<string *>::iterator iter5;
|
|
for(iter5=string_alloc.begin();iter5!=string_alloc.end();++iter5)
|
|
delete *iter5;
|
|
string_alloc.clear();
|
|
|
|
list<uintb *>::iterator iter6;
|
|
for(iter6=num_alloc.begin();iter6!=num_alloc.end();++iter6)
|
|
delete *iter6;
|
|
num_alloc.clear();
|
|
|
|
list<Enumerator *>::iterator iter7;
|
|
for(iter7=enum_alloc.begin();iter7!=enum_alloc.end();++iter7)
|
|
delete *iter7;
|
|
enum_alloc.clear();
|
|
|
|
list<vector<Enumerator *> *>::iterator iter8;
|
|
for(iter8=vecenum_alloc.begin();iter8!=vecenum_alloc.end();++iter8)
|
|
delete *iter8;
|
|
vecenum_alloc.clear();
|
|
}
|
|
|
|
int4 CParse::lookupIdentifier(const string &nm)
|
|
|
|
{
|
|
map<string,uint4>::const_iterator iter = keywords.find(nm);
|
|
if (iter != keywords.end()) {
|
|
switch( (*iter).second ) {
|
|
case f_typedef:
|
|
case f_extern:
|
|
case f_static:
|
|
case f_auto:
|
|
case f_register:
|
|
return STORAGE_CLASS_SPECIFIER;
|
|
case f_const:
|
|
case f_restrict:
|
|
case f_volatile:
|
|
return TYPE_QUALIFIER;
|
|
case f_inline:
|
|
return FUNCTION_SPECIFIER;
|
|
case f_struct:
|
|
return STRUCT;
|
|
case f_union:
|
|
return UNION;
|
|
case f_enum:
|
|
return ENUM;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
Datatype *tp = glb->types->findByName(nm);
|
|
if (tp != (Datatype *)0) {
|
|
yylval.type = tp;
|
|
return TYPE_NAME;
|
|
}
|
|
if (glb->hasModel(nm))
|
|
return FUNCTION_SPECIFIER;
|
|
return IDENTIFIER; // Unknown identifier
|
|
}
|
|
|
|
int4 CParse::lex(void)
|
|
|
|
{
|
|
GrammarToken tok;
|
|
|
|
if (firsttoken != -1) {
|
|
int4 retval = firsttoken;
|
|
firsttoken = -1;
|
|
return retval;
|
|
}
|
|
if (lasterror.size()!=0)
|
|
return BADTOKEN;
|
|
lexer.getNextToken(tok);
|
|
lineno = tok.getLineNo();
|
|
colno = tok.getColNo();
|
|
filenum = tok.getFileNum();
|
|
switch(tok.getType()) {
|
|
case GrammarToken::integer:
|
|
case GrammarToken::charconstant:
|
|
yylval.i = new uintb(tok.getInteger());
|
|
num_alloc.push_back(yylval.i);
|
|
return NUMBER;
|
|
case GrammarToken::identifier:
|
|
yylval.str = tok.getString();
|
|
string_alloc.push_back(yylval.str);
|
|
return lookupIdentifier(*yylval.str);
|
|
case GrammarToken::stringval:
|
|
delete tok.getString();
|
|
setError("Illegal string constant");
|
|
return BADTOKEN;
|
|
case GrammarToken::dotdotdot:
|
|
return DOTDOTDOT;
|
|
case GrammarToken::badtoken:
|
|
setError(lexer.getError()); // Error from lexer
|
|
return BADTOKEN;
|
|
case GrammarToken::endoffile:
|
|
return -1; // No more tokens
|
|
default:
|
|
return (int4)tok.getType();
|
|
}
|
|
}
|
|
|
|
void CParse::setError(const string &msg)
|
|
|
|
{
|
|
ostringstream s;
|
|
|
|
s << msg;
|
|
lexer.writeLocation(s,lineno,filenum);
|
|
s << '\n';
|
|
lexer.writeTokenLocation(s,lineno,colno);
|
|
lasterror = s.str();
|
|
}
|
|
|
|
bool CParse::runParse(uint4 doctype)
|
|
|
|
{ // Assuming the stream has been setup, parse it
|
|
switch(doctype) {
|
|
case doc_declaration:
|
|
firsttoken = DECLARATION_RESULT;
|
|
break;
|
|
case doc_parameter_declaration:
|
|
firsttoken = PARAM_RESULT;
|
|
break;
|
|
default:
|
|
throw LowlevelError("Bad document type");
|
|
}
|
|
parse = this; // Setup global object for yyparse
|
|
int4 res = yyparse();
|
|
if (res != 0) {
|
|
if (lasterror.size()==0)
|
|
setError("Syntax error");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool CParse::parseFile(const string &nm,uint4 doctype)
|
|
|
|
{ // Run the parser on a file, return true if no parse errors
|
|
clear(); // Clear out any old parsing
|
|
|
|
ifstream s(nm.c_str()); // open file
|
|
if (!s)
|
|
throw LowlevelError("Unable to open file for parsing: "+nm);
|
|
|
|
lexer.pushFile(nm,&s); // Inform lexer of filename and stream
|
|
bool res = runParse(doctype);
|
|
s.close();
|
|
return res;
|
|
}
|
|
|
|
bool CParse::parseStream(istream &s,uint4 doctype)
|
|
|
|
{
|
|
clear();
|
|
|
|
lexer.pushFile("stream",&s);
|
|
return runParse(doctype);
|
|
}
|
|
|
|
int yylex(void)
|
|
|
|
{
|
|
return parse->lex();
|
|
}
|
|
|
|
int yyerror(const char *str)
|
|
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
Datatype *parse_type(istream &s,string &name,Architecture *glb)
|
|
|
|
{
|
|
CParse parser(glb,1000);
|
|
|
|
if (!parser.parseStream(s,CParse::doc_parameter_declaration))
|
|
throw ParseError(parser.getError());
|
|
vector<TypeDeclarator *> *decls = parser.getResultDeclarations();
|
|
if ((decls == (vector<TypeDeclarator *> *)0)||(decls->size()==0))
|
|
throw ParseError("Did not parse a datatype");
|
|
if (decls->size() > 1)
|
|
throw ParseError("Parsed multiple declarations");
|
|
TypeDeclarator *decl = (*decls)[0];
|
|
if (!decl->isValid())
|
|
throw ParseError("Parsed type is invalid");
|
|
name = decl->getIdentifier();
|
|
return decl->buildType(glb);
|
|
}
|
|
|
|
void parse_protopieces(PrototypePieces &pieces,
|
|
istream &s,Architecture *glb)
|
|
{
|
|
CParse parser(glb,1000);
|
|
|
|
if (!parser.parseStream(s,CParse::doc_declaration))
|
|
throw ParseError(parser.getError());
|
|
vector<TypeDeclarator *> *decls = parser.getResultDeclarations();
|
|
if ((decls == (vector<TypeDeclarator *> *)0)||(decls->size()==0))
|
|
throw ParseError("Did not parse a datatype");
|
|
if (decls->size() > 1)
|
|
throw ParseError("Parsed multiple declarations");
|
|
TypeDeclarator *decl = (*decls)[0];
|
|
if (!decl->isValid())
|
|
throw ParseError("Parsed type is invalid");
|
|
|
|
if (!decl->getPrototype(pieces,glb))
|
|
throw ParseError("Did not parse a prototype");
|
|
}
|
|
|
|
void parse_C(Architecture *glb,istream &s)
|
|
|
|
{ // Load type data straight into datastructures
|
|
CParse parser(glb,1000);
|
|
|
|
if (!parser.parseStream(s,CParse::doc_declaration))
|
|
throw ParseError(parser.getError());
|
|
vector<TypeDeclarator *> *decls = parser.getResultDeclarations();
|
|
if ((decls == (vector<TypeDeclarator *> *)0)||(decls->size()==0))
|
|
throw ParseError("Did not parse a datatype");
|
|
if (decls->size() > 1)
|
|
throw ParseError("Parsed multiple declarations");
|
|
TypeDeclarator *decl = (*decls)[0];
|
|
if (!decl->isValid())
|
|
throw ParseError("Parsed type is invalid");
|
|
|
|
if (decl->hasProperty(CParse::f_extern)) {
|
|
PrototypePieces pieces;
|
|
if (!decl->getPrototype(pieces,glb))
|
|
throw ParseError("Did not parse prototype as expected");
|
|
glb->setPrototype(pieces);
|
|
}
|
|
else if (decl->hasProperty(CParse::f_typedef)) {
|
|
Datatype *ct = decl->buildType(glb);
|
|
if (decl->getIdentifier().size() == 0)
|
|
throw ParseError("Missing identifier for typedef");
|
|
glb->types->setName(ct,decl->getIdentifier());
|
|
}
|
|
else if (decl->getBaseType()->getMetatype()==TYPE_STRUCT) {
|
|
// We parsed a struct, treat as a typedef
|
|
}
|
|
else if (decl->getBaseType()->isEnumType()) {
|
|
// We parsed an enum, treat as a typedef
|
|
}
|
|
else
|
|
throw LowlevelError("Not sure what to do with this type");
|
|
}
|
|
|
|
void parse_toseparator(istream &s,string &name)
|
|
|
|
{ // parse to next (C) separator
|
|
char tok;
|
|
|
|
name.erase();
|
|
s >> ws;
|
|
tok = s.peek();
|
|
|
|
while((isalnum(tok))||(tok=='_')) {
|
|
s >> tok;
|
|
name += tok;
|
|
tok = s.peek();
|
|
}
|
|
}
|
|
|
|
Address parse_varnode(istream &s,int4 &size,Address &pc,uintm &uq,const TypeFactory &typegrp)
|
|
|
|
{ // Scan for a specific varnode
|
|
char tok;
|
|
int4 discard;
|
|
|
|
Address loc(parse_machaddr(s,size,typegrp));
|
|
s >> ws >> tok;
|
|
if (tok != '(')
|
|
throw ParseError("Missing '('");
|
|
s >> ws;
|
|
tok = s.peek();
|
|
pc = Address(); // pc starts out as invalid
|
|
if (tok == 'i')
|
|
s >> tok;
|
|
else if (s.peek() != ':') {
|
|
s.unsetf(ios::dec | ios::hex | ios::oct); // Let user specify base
|
|
pc = parse_machaddr(s,discard,typegrp,true);
|
|
}
|
|
s >> ws;
|
|
if (s.peek() == ':') { // Scan uniq
|
|
s >> tok >> ws >> hex >> uq; // Assume uniq is in hex
|
|
}
|
|
else
|
|
uq = ~((uintm)0);
|
|
s >> ws >> tok;
|
|
if (tok != ')')
|
|
throw ParseError("Missing ')'");
|
|
return loc;
|
|
}
|
|
|
|
Address parse_op(istream &s,uintm &uq,const TypeFactory &typegrp)
|
|
|
|
{
|
|
int4 size;
|
|
char tok;
|
|
Address loc(parse_machaddr(s,size,typegrp,true));
|
|
s >> ws >> tok;
|
|
if (tok != ':')
|
|
throw ParseError("Missing ':'");
|
|
s >> ws >> hex >> uq; // Assume uniq is in hex
|
|
return loc;
|
|
}
|
|
|
|
Address parse_machaddr(istream &s,int4 &defaultsize,const TypeFactory &typegrp,bool ignorecolon)
|
|
|
|
{ // Read Address from ASCII stream
|
|
string token;
|
|
AddrSpace *b;
|
|
int4 size = -1;
|
|
int4 oversize;
|
|
char tok;
|
|
const AddrSpaceManager *manage = typegrp.getArch();
|
|
|
|
s >> ws;
|
|
tok = s.peek();
|
|
if (tok == '[') {
|
|
s >> tok;
|
|
parse_toseparator(s,token); // scan base address token
|
|
b = manage->getSpaceByName(token);
|
|
if (b == (AddrSpace *)0)
|
|
throw ParseError("Bad address base");
|
|
s >> ws >> tok;
|
|
if (tok != ',')
|
|
throw ParseError("Missing ',' in address");
|
|
parse_toseparator(s,token); // Get the offset portion of the address
|
|
s >> ws >> tok;
|
|
if (tok == ',') { // Optional size specifier
|
|
s.unsetf(ios::dec | ios::hex | ios::oct);
|
|
s >> size;
|
|
s >> ws >> tok;
|
|
}
|
|
if (tok != ']')
|
|
throw ParseError("Missing ']' in address");
|
|
}
|
|
else if (tok == '{') {
|
|
b = manage->getJoinSpace();
|
|
s >> tok;
|
|
s >> tok;
|
|
while(tok != '}') // Scan to the matching curly brace
|
|
token += tok;
|
|
}
|
|
else {
|
|
if (tok == '0') {
|
|
b = manage->getDefaultCodeSpace();
|
|
}
|
|
else {
|
|
b = manage->getSpaceByShortcut(tok);
|
|
s >> tok;
|
|
}
|
|
if (b==(AddrSpace *)0) {
|
|
s >> token;
|
|
string errmsg = "Bad address: ";
|
|
errmsg += tok;
|
|
errmsg += token;
|
|
throw ParseError(errmsg);
|
|
}
|
|
token.erase();
|
|
s >> ws;
|
|
tok = s.peek();
|
|
if (ignorecolon) {
|
|
while((isalnum(tok))||(tok=='_')||(tok=='+')) {
|
|
token += tok;
|
|
s >> tok;
|
|
tok = s.peek();
|
|
}
|
|
}
|
|
else {
|
|
while((isalnum(tok))||(tok=='_')||(tok=='+')||(tok==':')) {
|
|
token += tok;
|
|
s >> tok;
|
|
tok = s.peek();
|
|
}
|
|
}
|
|
}
|
|
|
|
Address res(b,0);
|
|
oversize = res.read(token); // Read the address of this particular type
|
|
// oversize is "standard size"
|
|
if (oversize == -1)
|
|
throw ParseError("Bad machine address");
|
|
defaultsize = (size==-1) ? oversize : size; // If not overriden use standard
|
|
return res;
|
|
}
|
|
|