ghidra/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y
2020-05-12 14:09:59 -04:00

1538 lines
37 KiB
Text

/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
%{
#include "grammar.hh"
extern int yylex(void);
extern int yyerror(const char *str);
static CParse *parse;
extern int yydebug;
%}
%union {
uint4 flags;
TypeDeclarator *dec;
vector<TypeDeclarator *> *declist;
TypeSpecifiers *spec;
vector<uint4> *ptrspec;
Datatype *type;
Enumerator *enumer;
vector<Enumerator *> *vecenum;
string *str;
uintb *i;
}
// Grammar taken from ISO/IEC 9899
%token DOTDOTDOT BADTOKEN STRUCT UNION ENUM DECLARATION_RESULT PARAM_RESULT
%token <i> NUMBER
%token <str> IDENTIFIER
%token <str> STORAGE_CLASS_SPECIFIER TYPE_QUALIFIER FUNCTION_SPECIFIER
%token <type> TYPE_NAME
%type <declist> declaration init_declarator_list parameter_list parameter_type_list
%type <declist> struct_declaration_list struct_declaration struct_declarator_list
%type <dec> declarator init_declarator direct_declarator parameter_declaration
%type <dec> abstract_declarator direct_abstract_declarator struct_declarator
%type <spec> declaration_specifiers specifier_qualifier_list
%type <flags> type_qualifier_list
%type <ptrspec> pointer
%type <i> assignment_expression
%type <type> type_specifier struct_or_union_specifier enum_specifier
%type <enumer> enumerator
%type <vecenum> enumerator_list
%%
document:
DECLARATION_RESULT declaration { parse->setResultDeclarations($2); }
| PARAM_RESULT parameter_declaration { vector<TypeDeclarator *> *res = parse->newVecDeclarator(); res->push_back($2); parse->setResultDeclarations(res); }
;
declaration:
declaration_specifiers ';' { $$ = parse->mergeSpecDecVec($1); }
| declaration_specifiers init_declarator_list ';' { $$ = parse->mergeSpecDecVec($1,$2); }
;
declaration_specifiers:
STORAGE_CLASS_SPECIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); }
| type_specifier { $$ = parse->newSpecifier(); parse->addTypeSpecifier($$,$1); }
| TYPE_QUALIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); }
| FUNCTION_SPECIFIER { $$ = parse->newSpecifier(); parse->addFuncSpecifier($$,$1); }
| STORAGE_CLASS_SPECIFIER declaration_specifiers { $$ = parse->addSpecifier($2,$1); }
| type_specifier declaration_specifiers { $$ = parse->addTypeSpecifier($2,$1); }
| TYPE_QUALIFIER declaration_specifiers { $$ = parse->addSpecifier($2,$1); }
| FUNCTION_SPECIFIER declaration_specifiers { $$ = parse->addFuncSpecifier($2,$1); }
;
init_declarator_list:
init_declarator { $$ = parse->newVecDeclarator(); $$->push_back($1); }
| init_declarator_list ',' init_declarator { $$ = $1; $$->push_back($3); }
;
init_declarator:
declarator { $$ = $1; }
//declarator = initializer
;
type_specifier:
TYPE_NAME { $$ = $1; }
| struct_or_union_specifier { $$ = $1; }
| enum_specifier { $$ = $1; }
;
struct_or_union_specifier:
STRUCT '{' struct_declaration_list '}' { $$ = parse->newStruct("",$3); }
| STRUCT IDENTIFIER '{' struct_declaration_list '}' { $$ = parse->newStruct(*$2,$4); }
| STRUCT IDENTIFIER { $$ = parse->oldStruct(*$2); }
| UNION '{' struct_declaration_list '}' { $$ = parse->newUnion("",$3); }
| UNION IDENTIFIER '{' struct_declaration_list '}' { $$ = parse->newUnion(*$2,$4); }
| UNION IDENTIFIER { $$ = parse->oldUnion(*$2); }
;
struct_declaration_list:
struct_declaration { $$ = $1; }
| struct_declaration_list struct_declaration { $$ = $1; $$->insert($$->end(),$2->begin(),$2->end()); }
;
struct_declaration:
specifier_qualifier_list struct_declarator_list ';' { $$ = parse->mergeSpecDecVec($1,$2); }
;
specifier_qualifier_list:
type_specifier { $$ = parse->newSpecifier(); parse->addTypeSpecifier($$,$1); }
| type_specifier specifier_qualifier_list { $$ = parse->addTypeSpecifier($2,$1); }
| TYPE_QUALIFIER { $$ = parse->newSpecifier(); parse->addSpecifier($$,$1); }
| TYPE_QUALIFIER specifier_qualifier_list { $$ = parse->addSpecifier($2,$1); }
;
struct_declarator_list:
struct_declarator { $$ = parse->newVecDeclarator(); $$->push_back($1); }
| struct_declarator_list ',' struct_declarator { $$ = $1; $$->push_back($3); }
;
struct_declarator:
declarator { $$ = $1; }
// declarator ':' NUMBER
;
enum_specifier:
ENUM IDENTIFIER '{' enumerator_list '}' { $$ = parse->newEnum(*$2,$4); }
| ENUM '{' enumerator_list '}' { $$ = parse->newEnum("",$3); }
| ENUM IDENTIFIER '{' enumerator_list ',' '}' { $$ = parse->newEnum(*$2,$4); }
| ENUM '{' enumerator_list ',' '}' { $$ = parse->newEnum("",$3); }
| ENUM IDENTIFIER { $$ = parse->oldEnum(*$2); }
;
enumerator_list:
enumerator { $$ = parse->newVecEnumerator(); $$->push_back($1); }
| enumerator_list ',' enumerator { $$ = $1; $$->push_back($3); }
;
enumerator:
IDENTIFIER { $$ = parse->newEnumerator(*$1); }
| IDENTIFIER '=' NUMBER { $$ = parse->newEnumerator(*$1,*$3); }
;
declarator:
direct_declarator { $$ = $1; }
| pointer direct_declarator { $$ = parse->mergePointer($1,$2); }
;
direct_declarator:
IDENTIFIER { $$ = parse->newDeclarator($1); }
| '(' declarator ')' { $$ = $2; }
| direct_declarator '[' type_qualifier_list assignment_expression ']' { $$ = parse->newArray($1,$3,$4); }
| direct_declarator '[' assignment_expression ']' { $$ = parse->newArray($1,0,$3); }
// direct_declarator '[' ']'
| direct_declarator '(' parameter_type_list ')' { $$ = parse->newFunc($1,$3); }
// direct_declarator ( identifier_list )
;
pointer:
'*' { $$ = parse->newPointer(); $$->push_back(0); }
| '*' type_qualifier_list { $$ = parse->newPointer(); $$->push_back($2); }
| '*' pointer { $$ = $2; $$->push_back(0); }
| '*' type_qualifier_list pointer { $$ = $3; $$->push_back($2); }
;
type_qualifier_list:
TYPE_QUALIFIER { $$ = parse->convertFlag($1); }
| type_qualifier_list TYPE_QUALIFIER { $$ = $1; $$ |= parse->convertFlag($2); }
;
parameter_type_list:
parameter_list { $$ = $1; }
| parameter_list ',' DOTDOTDOT { $$ = $1; $$->push_back((TypeDeclarator *)0); }
;
parameter_list:
parameter_declaration { $$ = parse->newVecDeclarator(); $$->push_back($1); }
| parameter_list ',' parameter_declaration { $$ = $1; $$->push_back($3); }
;
parameter_declaration:
declaration_specifiers declarator { $$ = parse->mergeSpecDec($1,$2); }
| declaration_specifiers { $$ = parse->mergeSpecDec($1); }
| declaration_specifiers abstract_declarator { $$ = parse->mergeSpecDec($1,$2); }
;
abstract_declarator:
pointer { $$ = parse->newDeclarator(); parse->mergePointer($1,$$); }
| direct_abstract_declarator { $$ = $1; }
| pointer direct_abstract_declarator { $$ = parse->mergePointer($1,$2); }
;
direct_abstract_declarator:
'(' abstract_declarator ')' { $$ = $2; }
// '[' assignment_expression ']'
| direct_abstract_declarator '[' assignment_expression ']' { $$ = parse->newArray($1,0,$3); }
// '(' parameter_type_list ')'
| direct_abstract_declarator '(' parameter_type_list ')' { $$ = parse->newFunc($1,$3); }
;
assignment_expression:
NUMBER { $$ = $1; }
;
%%
void GrammarToken::set(uint4 tp)
{
type = tp;
}
void GrammarToken::set(uint4 tp,char *ptr,int4 len)
{
type = tp;
switch(tp) {
case integer:
{
string charstring(ptr,len);
istringstream s(charstring);
s.unsetf(ios::dec | ios::hex | ios::oct);
intb val;
s >> val;
value.integer = (uintb)val;
}
break;
case identifier:
case stringval:
value.stringval = new string(ptr,len);
break;
case charconstant:
if (len==1)
value.integer = (uintb)*ptr;
else { // Backslash
switch(ptr[1]) {
case 'n':
value.integer = 10;
break;
case '0':
value.integer = 0;
break;
case 'a':
value.integer = 7;
break;
case 'b':
value.integer = 8;
break;
case 't':
value.integer = 9;
break;
case 'v':
value.integer = 11;
break;
case 'f':
value.integer = 12;
break;
case 'r':
value.integer = 13;
break;
default:
value.integer = (uintb)ptr[1];
break;
}
}
break;
default:
throw LowlevelError("Bad internal grammar token set");
}
}
GrammarToken::GrammarToken(void)
{
type = 0;
value.integer = 0;
}
GrammarLexer::GrammarLexer(int4 maxbuffer)
{
buffersize = maxbuffer;
buffer = new char[ maxbuffer ];
bufstart = 0;
bufend = 0;
curlineno = 0;
state = start;
in = (istream *)0;
endoffile = true;
}
GrammarLexer::~GrammarLexer(void)
{
delete [] buffer;
}
void GrammarLexer::bumpLine(void)
{ // Keep track of a newline
curlineno += 1;
bufstart = 0;
bufend = 0;
}
uint4 GrammarLexer::moveState(char lookahead)
{ // Change finite state machine based on lookahead
uint4 res;
bool newline = false;
if (lookahead<32) {
if ((lookahead == 9)||(lookahead==11)||(lookahead==12)||
(lookahead==13))
lookahead = ' ';
else if (lookahead == '\n') {
newline = true;
lookahead = ' ';
}
else {
setError("Illegal character");
return GrammarToken::badtoken;
}
}
else if (lookahead >= 127) {
setError("Illegal character");
return GrammarToken::badtoken;
}
res = 0;
bool syntaxerror = false;
switch(state) {
case start:
switch(lookahead) {
case '/':
state = slash;
break;
case '.':
state = dot1;
break;
case '*':
case ',':
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case ';':
case '=':
state = punctuation;
bufstart = bufend-1;
break;
case '-':
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
state = number;
bufstart = bufend-1;
break;
case ' ':
break; // Ignore since we are already open
case '\"':
state = doublequote;
bufstart = bufend-1;
break;
case '\'':
state = singlequote;
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
case 'u':
case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
case 'H':
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'O':
case 'P':
case 'Q':
case 'R':
case 'S':
case 'T':
case 'U':
case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
case '_':
state = identifier;
bufstart = bufend-1;
break;
default:
setError("Illegal character");
return GrammarToken::badtoken;
}
break;
case slash:
if (lookahead=='*')
state = c_comment;
else if (lookahead == '/')
state = endofline_comment;
else
syntaxerror = true;
break;
case dot1:
if (lookahead=='.')
state = dot2;
else
syntaxerror = true;
break;
case dot2:
if (lookahead=='.')
state = dot3;
else
syntaxerror = true;
break;
case dot3:
state = start;
res = GrammarToken::dotdotdot;
break;
case punctuation:
state = start;
res = (uint4)buffer[bufstart];
break;
case endofline_comment:
if (newline)
state = start;
break; // Anything else is part of comment
case c_comment:
if (lookahead == '/') {
if ((bufend >1)&&(buffer[bufend-2]=='*'))
state = start;
}
break; // Anything else is part of comment
case doublequote:
if (lookahead == '\"')
state = doublequoteend;
break; // Anything else is part of string
case doublequoteend:
state = start;
res = GrammarToken::stringval;
break;
case singlequote:
if (lookahead == '\\')
state = singlebackslash;
else if (lookahead == '\'')
state = singlequoteend;
break; // Anything else is part of string
case singlequoteend:
state = start;
res = GrammarToken::charconstant;
break;
case singlebackslash: // Seen backslash in a single quoted string
state = singlequote;
break;
case number:
if (lookahead=='x') {
if (((bufend-bufstart)!=2)||(buffer[bufstart]!='0'))
syntaxerror = true; // x only allowed as 0x hex indicator
}
else if ((lookahead>='0')&&(lookahead<='9')) {
}
else if ((lookahead>='A')&&(lookahead<='Z')) {
}
else if ((lookahead>='a')&&(lookahead<='z')) {
}
else if (lookahead == '_') {
}
else {
state = start;
res = GrammarToken::integer;
}
break;
case identifier:
if ((lookahead>='0')&&(lookahead<='9')) {
}
else if ((lookahead>='A')&&(lookahead<='Z')) {
}
else if ((lookahead>='a')&&(lookahead<='z')) {
}
else if (lookahead == '_') {
}
else {
state = start;
res = GrammarToken::identifier;
}
break;
}
if (syntaxerror) {
setError("Syntax error");
return GrammarToken::badtoken;
}
if (newline) bumpLine();
return res;
}
void GrammarLexer::establishToken(GrammarToken &token,uint4 val)
{
if (val < GrammarToken::integer)
token.set(val);
else {
token.set(val,buffer+bufstart,(bufend-bufstart)-1);
}
token.setPosition(filestack.back(),curlineno,bufstart);
}
void GrammarLexer::clear(void)
{ // Clear lexer for a brand new parse
filenamemap.clear();
streammap.clear();
filestack.clear();
bufstart = 0;
bufend = 0;
curlineno = 0;
state = start;
in = (istream *)0;
endoffile = true;
error.clear();
}
void GrammarLexer::writeLocation(ostream &s,int4 line,int4 filenum)
{
s << " at line " << dec << line;
s << " in " << filenamemap[filenum];
}
void GrammarLexer::writeTokenLocation(ostream &s,int4 line,int4 colno)
{
if (line!=curlineno) return; // Does line match current line in buffer
for(int4 i=0;i<bufend;++i)
s << buffer[i];
s << '\n';
for(int4 i=0;i<colno;++i)
s << ' ';
s << "^--\n";
}
void GrammarLexer::pushFile(const string &filename,istream *i)
{
int4 filenum = filenamemap.size();
filenamemap[filenum] = filename;
streammap[filenum] = i;
filestack.push_back(filenum);
in = i;
endoffile = false;
}
void GrammarLexer::popFile(void)
{
filestack.pop_back();
if (filestack.empty()) {
endoffile = true;
return;
}
int4 filenum = filestack.back();
in = streammap[filenum]; // Get previous stream
}
void GrammarLexer::getNextToken(GrammarToken &token)
{ // Read next token, return true if end of stream
char nextchar;
uint4 tok = GrammarToken::badtoken;
bool firsttimethru = true;
if (endoffile) {
token.set(GrammarToken::endoffile);
return;
}
do {
if ((!firsttimethru)||(bufend==0)) {
if (bufend >= buffersize) {
setError("Line too long");
tok = GrammarToken::badtoken;
break;
}
in->get(nextchar);
if (!(*in)) {
endoffile = true;
break;
}
buffer[bufend++] = nextchar;
}
else
nextchar = buffer[bufend-1]; // Get old lookahead token
tok = moveState(nextchar);
firsttimethru = false;
} while(tok == 0);
if (endoffile) {
buffer[bufend++] = ' '; // Simulate a space
tok = moveState(' '); // to let the final token resolve
if ((tok==0)&&(state != start)&&(state != endofline_comment)) {
setError("Incomplete token");
tok = GrammarToken::badtoken;
}
}
establishToken(token,tok);
}
Datatype *PointerModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const
{
int4 addrsize = glb->getDefaultDataSpace()->getAddrSize();
Datatype *restype;
restype = glb->types->getTypePointer(addrsize,base,glb->getDefaultDataSpace()->getWordSize());
return restype;
}
Datatype *ArrayModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const
{
Datatype *restype = glb->types->getTypeArray(arraysize,base);
return restype;
}
FunctionModifier::FunctionModifier(const vector<TypeDeclarator *> *p,bool dtdtdt)
{
paramlist = *p;
if (paramlist.size()==1) {
TypeDeclarator *decl = paramlist[0];
if (decl->numModifiers()==0) { // Check for void as an inputtype
Datatype *ct = decl->getBaseType();
if ((ct != (Datatype *)0)&&(ct->getMetatype()==TYPE_VOID))
paramlist.clear();
}
}
dotdotdot = dtdtdt;
}
void FunctionModifier::getInTypes(vector<Datatype *> &intypes,Architecture *glb) const
{
for(uint4 i=0;i<paramlist.size();++i) {
Datatype *ct = paramlist[i]->buildType(glb);
intypes.push_back( ct );
}
}
void FunctionModifier::getInNames(vector<string> &innames) const
{
for(uint4 i=0;i<paramlist.size();++i)
innames.push_back(paramlist[i]->getIdentifier());
}
bool FunctionModifier::isValid(void) const
{
for(uint4 i=0;i<paramlist.size();++i) {
TypeDeclarator *decl = paramlist[i];
if (!decl->isValid()) return false;
if (decl->numModifiers()==0) {
Datatype *ct = decl->getBaseType();
if ((ct != (Datatype *)0)&&(ct->getMetatype()==TYPE_VOID))
return false; // Extra void type
}
}
return true;
}
Datatype *FunctionModifier::modType(Datatype *base,const TypeDeclarator *decl,Architecture *glb) const
{
vector<Datatype *> intypes;
// Varargs is encoded as extra null pointer in paramlist
bool dotdotdot = false;
if ((!paramlist.empty())&&(paramlist.back() == (TypeDeclarator *)0)) {
dotdotdot = true;
}
getInTypes(intypes,glb);
ProtoModel *protomodel = decl->getModel(glb);
return glb->types->getTypeCode(protomodel,base,intypes,dotdotdot);
}
TypeDeclarator::~TypeDeclarator(void)
{
for(uint4 i=0;i<mods.size();++i)
delete mods[i];
}
Datatype *TypeDeclarator::buildType(Architecture *glb) const
{ // Apply modifications to the basetype, (in reverse order of binding)
Datatype *restype = basetype;
vector<TypeModifier *>::const_iterator iter;
iter = mods.end();
while(iter != mods.begin()) {
--iter;
restype = (*iter)->modType(restype,this,glb);
}
return restype;
}
ProtoModel *TypeDeclarator::getModel(Architecture *glb) const
{
// Get prototype model
ProtoModel *protomodel = (ProtoModel *)0;
if (model.size()!=0)
protomodel = glb->getModel(model);
if (protomodel == (ProtoModel *)0)
protomodel = glb->defaultfp;
return protomodel;
}
bool TypeDeclarator::getPrototype(PrototypePieces &pieces,Architecture *glb) const
{
TypeModifier *mod = (TypeModifier *)0;
if (mods.size() > 0)
mod = mods[0];
if ((mod == (TypeModifier *)0)||(mod->getType()!=TypeModifier::function_mod))
return false;
FunctionModifier *fmod = (FunctionModifier *)mod;
pieces.model = getModel(glb);
pieces.name = ident;
pieces.intypes.clear();
fmod->getInTypes(pieces.intypes,glb);
pieces.innames.clear();
fmod->getInNames(pieces.innames);
pieces.dotdotdot = fmod->isDotdotdot();
// Construct the output type
pieces.outtype = basetype;
vector<TypeModifier *>::const_iterator iter;
iter = mods.end();
--iter; // At least one modification
while(iter != mods.begin()) { // Do not apply function modifier
pieces.outtype = (*iter)->modType(pieces.outtype,this,glb);
--iter;
}
return true;
}
bool TypeDeclarator::isValid(void) const
{
if (basetype == (Datatype *)0)
return false; // No basetype
int4 count=0;
if ((flags & CParse::f_typedef)!=0)
count += 1;
if ((flags & CParse::f_extern)!=0)
count += 1;
if ((flags & CParse::f_static)!=0)
count += 1;
if ((flags & CParse::f_auto)!=0)
count += 1;
if ((flags & CParse::f_register)!=0)
count += 1;
if (count > 1)
throw ParseError("Multiple storage specifiers");
count = 0;
if ((flags & CParse::f_const)!=0)
count += 1;
if ((flags & CParse::f_restrict)!=0)
count += 1;
if ((flags & CParse::f_volatile)!=0)
count += 1;
if (count > 1)
throw ParseError("Multiple type qualifiers");
for(uint4 i=0;i<mods.size();++i) {
if (!mods[i]->isValid())
return false;
}
return true;
}
CParse::CParse(Architecture *g,int4 maxbuf)
: lexer(maxbuf)
{
glb = g;
firsttoken = -1;
lastdecls = (vector<TypeDeclarator *> *)0;
keywords["typedef"] = f_typedef;
keywords["extern"] = f_extern;
keywords["static"] = f_static;
keywords["auto"] = f_auto;
keywords["register"] = f_register;
keywords["const"] = f_const;
keywords["restrict"] = f_restrict;
keywords["volatile"] = f_volatile;
keywords["inline"] = f_inline;
keywords["struct"] = f_struct;
keywords["union"] = f_union;
keywords["enum"] = f_enum;
}
CParse::~CParse(void)
{
clearAllocation();
}
void CParse::clear(void)
{
clearAllocation();
lasterror.clear();
lastdecls = (vector<TypeDeclarator *> *)0;
lexer.clear();
firsttoken = -1;
}
TypeDeclarator *CParse::mergeSpecDec(TypeSpecifiers *spec,TypeDeclarator *dec)
{
dec->basetype = spec->type_specifier;
dec->model = spec->function_specifier;
dec->flags |= spec->flags;
return dec;
}
TypeDeclarator *CParse::mergeSpecDec(TypeSpecifiers *spec)
{
TypeDeclarator *dec = new TypeDeclarator();
typedec_alloc.push_back(dec);
return mergeSpecDec(spec,dec);
}
vector<TypeDeclarator *> *CParse::mergeSpecDecVec(TypeSpecifiers *spec,vector<TypeDeclarator *> *declist)
{
for(uint4 i=0;i<declist->size();++i)
mergeSpecDec(spec,(*declist)[i]);
return declist;
}
vector<TypeDeclarator *> *CParse::mergeSpecDecVec(TypeSpecifiers *spec)
{
vector<TypeDeclarator *> *declist;
declist = new vector<TypeDeclarator *>();
vecdec_alloc.push_back(declist);
TypeDeclarator *dec = new TypeDeclarator();
typedec_alloc.push_back(dec);
declist->push_back( dec );
return mergeSpecDecVec(spec,declist);
}
uint4 CParse::convertFlag(string *str)
{
map<string,uint4>::const_iterator iter;
iter = keywords.find(*str);
if (iter != keywords.end())
return (*iter).second;
setError("Unknown qualifier");
return 0;
}
TypeSpecifiers *CParse::addSpecifier(TypeSpecifiers *spec,string *str)
{
uint4 flag = convertFlag(str);
spec->flags |= flag;
return spec;
}
TypeSpecifiers *CParse::addTypeSpecifier(TypeSpecifiers *spec,Datatype *tp)
{
if (spec->type_specifier!=(Datatype *)0)
setError("Multiple type specifiers");
spec->type_specifier = tp;
return spec;
}
TypeSpecifiers *CParse::addFuncSpecifier(TypeSpecifiers *spec,string *str)
{
map<string,uint4>::const_iterator iter;
iter = keywords.find(*str);
if (iter != keywords.end())
spec->flags |= (*iter).second; // A reserved specifier
else {
if (spec->function_specifier.size()!=0)
setError("Multiple parameter models");
spec->function_specifier = *str;
}
return spec;
}
TypeDeclarator *CParse::mergePointer(vector<uint4> *ptr,TypeDeclarator *dec)
{
for(uint4 i=0;i<ptr->size();++i) {
PointerModifier *newmod = new PointerModifier((*ptr)[i]);
dec->mods.push_back(newmod);
}
return dec;
}
TypeDeclarator *CParse::newDeclarator(string *str)
{
TypeDeclarator *res = new TypeDeclarator(*str);
typedec_alloc.push_back(res);
return res;
}
TypeDeclarator *CParse::newDeclarator(void)
{
TypeDeclarator *res = new TypeDeclarator();
typedec_alloc.push_back(res);
return res;
}
TypeSpecifiers *CParse::newSpecifier(void)
{
TypeSpecifiers *spec = new TypeSpecifiers();
typespec_alloc.push_back(spec);
return spec;
}
vector<TypeDeclarator *> *CParse::newVecDeclarator(void)
{
vector<TypeDeclarator *> *res = new vector<TypeDeclarator *>();
vecdec_alloc.push_back(res);
return res;
}
vector<uint4> *CParse::newPointer(void)
{
vector<uint4> *res = new vector<uint4>();
vecuint4_alloc.push_back(res);
return res;
}
TypeDeclarator *CParse::newArray(TypeDeclarator *dec,uint4 flags,uintb *num)
{
ArrayModifier *newmod = new ArrayModifier(flags,(int4)*num);
dec->mods.push_back(newmod);
return dec;
}
TypeDeclarator *CParse::newFunc(TypeDeclarator *dec,vector<TypeDeclarator *> *declist)
{
bool dotdotdot = false;
if (!declist->empty()) {
if (declist->back() == (TypeDeclarator *)0) {
dotdotdot = true;
declist->pop_back();
}
}
FunctionModifier *newmod = new FunctionModifier(declist,dotdotdot);
dec->mods.push_back(newmod);
return dec;
}
Datatype *CParse::newStruct(const string &ident,vector<TypeDeclarator *> *declist)
{ // Build a new structure
TypeStruct *res = glb->types->getTypeStruct(ident); // Create stub (for recursion)
vector<TypeField> sublist;
for(uint4 i=0;i<declist->size();++i) {
TypeDeclarator *decl = (*declist)[i];
if (!decl->isValid()) {
setError("Invalid structure declarator");
glb->types->destroyType(res);
return (Datatype *)0;
}
sublist.push_back(TypeField());
sublist.back().type = decl->buildType(glb);
sublist.back().name = decl->getIdentifier();
sublist.back().offset = -1; // Let typegrp figure out offset
}
if (!glb->types->setFields(sublist,res,-1,0)) {
setError("Bad structure definition");
glb->types->destroyType(res);
return (Datatype *)0;
}
return res;
}
Datatype *CParse::oldStruct(const string &ident)
{
Datatype *res = glb->types->findByName(ident);
if ((res==(Datatype *)0)||(res->getMetatype() != TYPE_STRUCT))
setError("Identifier does not represent a struct as required");
return res;
}
Datatype *CParse::newUnion(const string &ident,vector<TypeDeclarator *> *declist)
{
setError("Unions are currently unsupported");
return (Datatype *)0;
}
Datatype *CParse::oldUnion(const string &ident)
{
setError("Unions are currently unsupported");
return (Datatype *)0;
}
Enumerator *CParse::newEnumerator(const string &ident)
{
Enumerator *res = new Enumerator(ident);
enum_alloc.push_back(res);
return res;
}
Enumerator *CParse::newEnumerator(const string &ident,uintb val)
{
Enumerator *res = new Enumerator(ident,val);
enum_alloc.push_back(res);
return res;
}
vector<Enumerator *> *CParse::newVecEnumerator(void)
{
vector<Enumerator *> *res = new vector<Enumerator *>();
vecenum_alloc.push_back(res);
return res;
}
Datatype *CParse::newEnum(const string &ident,vector<Enumerator *> *vecenum)
{
TypeEnum *res = glb->types->getTypeEnum(ident);
vector<string> namelist;
vector<uintb> vallist;
vector<bool> assignlist;
for(uint4 i=0;i<vecenum->size();++i) {
Enumerator *enumer = (*vecenum)[i];
namelist.push_back(enumer->enumconstant);
vallist.push_back(enumer->value);
assignlist.push_back(enumer->constantassigned);
}
if (!glb->types->setEnumValues(namelist,vallist,assignlist,res)) {
setError("Bad enumeration values");
glb->types->destroyType(res);
return (Datatype *)0;
}
return res;
}
Datatype *CParse::oldEnum(const string &ident)
{
Datatype *res = glb->types->findByName(ident);
if ((res==(Datatype *)0)||(!res->isEnumType()))
setError("Identifier does not represent an enum as required");
return res;
}
void CParse::clearAllocation(void)
{
list<TypeDeclarator *>::iterator iter1;
for(iter1=typedec_alloc.begin();iter1!=typedec_alloc.end();++iter1)
delete *iter1;
typedec_alloc.clear();
list<TypeSpecifiers *>::iterator iter2;
for(iter2=typespec_alloc.begin();iter2!=typespec_alloc.end();++iter2)
delete *iter2;
typespec_alloc.clear();
list<vector<uint4> *>::iterator iter3;
for(iter3=vecuint4_alloc.begin();iter3!=vecuint4_alloc.end();++iter3)
delete *iter3;
vecuint4_alloc.clear();
list<vector<TypeDeclarator *> *>::iterator iter4;
for(iter4=vecdec_alloc.begin();iter4!=vecdec_alloc.end();++iter4)
delete *iter4;
vecdec_alloc.clear();
list<string *>::iterator iter5;
for(iter5=string_alloc.begin();iter5!=string_alloc.end();++iter5)
delete *iter5;
string_alloc.clear();
list<uintb *>::iterator iter6;
for(iter6=num_alloc.begin();iter6!=num_alloc.end();++iter6)
delete *iter6;
num_alloc.clear();
list<Enumerator *>::iterator iter7;
for(iter7=enum_alloc.begin();iter7!=enum_alloc.end();++iter7)
delete *iter7;
enum_alloc.clear();
list<vector<Enumerator *> *>::iterator iter8;
for(iter8=vecenum_alloc.begin();iter8!=vecenum_alloc.end();++iter8)
delete *iter8;
vecenum_alloc.clear();
}
int4 CParse::lookupIdentifier(const string &nm)
{
map<string,uint4>::const_iterator iter = keywords.find(nm);
if (iter != keywords.end()) {
switch( (*iter).second ) {
case f_typedef:
case f_extern:
case f_static:
case f_auto:
case f_register:
return STORAGE_CLASS_SPECIFIER;
case f_const:
case f_restrict:
case f_volatile:
return TYPE_QUALIFIER;
case f_inline:
return FUNCTION_SPECIFIER;
case f_struct:
return STRUCT;
case f_union:
return UNION;
case f_enum:
return ENUM;
default:
break;
}
}
Datatype *tp = glb->types->findByName(nm);
if (tp != (Datatype *)0) {
yylval.type = tp;
return TYPE_NAME;
}
if (glb->hasModel(nm))
return FUNCTION_SPECIFIER;
return IDENTIFIER; // Unknown identifier
}
int4 CParse::lex(void)
{
GrammarToken tok;
if (firsttoken != -1) {
int4 retval = firsttoken;
firsttoken = -1;
return retval;
}
if (lasterror.size()!=0)
return BADTOKEN;
lexer.getNextToken(tok);
lineno = tok.getLineNo();
colno = tok.getColNo();
filenum = tok.getFileNum();
switch(tok.getType()) {
case GrammarToken::integer:
case GrammarToken::charconstant:
yylval.i = new uintb(tok.getInteger());
num_alloc.push_back(yylval.i);
return NUMBER;
case GrammarToken::identifier:
yylval.str = tok.getString();
string_alloc.push_back(yylval.str);
return lookupIdentifier(*yylval.str);
case GrammarToken::stringval:
delete tok.getString();
setError("Illegal string constant");
return BADTOKEN;
case GrammarToken::dotdotdot:
return DOTDOTDOT;
case GrammarToken::badtoken:
setError(lexer.getError()); // Error from lexer
return BADTOKEN;
case GrammarToken::endoffile:
return -1; // No more tokens
default:
return (int4)tok.getType();
}
}
void CParse::setError(const string &msg)
{
ostringstream s;
s << msg;
lexer.writeLocation(s,lineno,filenum);
s << '\n';
lexer.writeTokenLocation(s,lineno,colno);
lasterror = s.str();
}
bool CParse::runParse(uint4 doctype)
{ // Assuming the stream has been setup, parse it
switch(doctype) {
case doc_declaration:
firsttoken = DECLARATION_RESULT;
break;
case doc_parameter_declaration:
firsttoken = PARAM_RESULT;
break;
default:
throw LowlevelError("Bad document type");
}
parse = this; // Setup global object for yyparse
int4 res = yyparse();
if (res != 0) {
if (lasterror.size()==0)
setError("Syntax error");
return false;
}
return true;
}
bool CParse::parseFile(const string &nm,uint4 doctype)
{ // Run the parser on a file, return true if no parse errors
clear(); // Clear out any old parsing
ifstream s(nm.c_str()); // open file
if (!s)
throw LowlevelError("Unable to open file for parsing: "+nm);
lexer.pushFile(nm,&s); // Inform lexer of filename and stream
bool res = runParse(doctype);
s.close();
return res;
}
bool CParse::parseStream(istream &s,uint4 doctype)
{
clear();
lexer.pushFile("stream",&s);
return runParse(doctype);
}
int yylex(void)
{
return parse->lex();
}
int yyerror(const char *str)
{
return 0;
}
Datatype *parse_type(istream &s,string &name,Architecture *glb)
{
CParse parser(glb,1000);
if (!parser.parseStream(s,CParse::doc_parameter_declaration))
throw ParseError(parser.getError());
vector<TypeDeclarator *> *decls = parser.getResultDeclarations();
if ((decls == (vector<TypeDeclarator *> *)0)||(decls->size()==0))
throw ParseError("Did not parse a datatype");
if (decls->size() > 1)
throw ParseError("Parsed multiple declarations");
TypeDeclarator *decl = (*decls)[0];
if (!decl->isValid())
throw ParseError("Parsed type is invalid");
name = decl->getIdentifier();
return decl->buildType(glb);
}
void parse_protopieces(PrototypePieces &pieces,
istream &s,Architecture *glb)
{
CParse parser(glb,1000);
if (!parser.parseStream(s,CParse::doc_declaration))
throw ParseError(parser.getError());
vector<TypeDeclarator *> *decls = parser.getResultDeclarations();
if ((decls == (vector<TypeDeclarator *> *)0)||(decls->size()==0))
throw ParseError("Did not parse a datatype");
if (decls->size() > 1)
throw ParseError("Parsed multiple declarations");
TypeDeclarator *decl = (*decls)[0];
if (!decl->isValid())
throw ParseError("Parsed type is invalid");
if (!decl->getPrototype(pieces,glb))
throw ParseError("Did not parse a prototype");
}
void parse_C(Architecture *glb,istream &s)
{ // Load type data straight into datastructures
CParse parser(glb,1000);
if (!parser.parseStream(s,CParse::doc_declaration))
throw ParseError(parser.getError());
vector<TypeDeclarator *> *decls = parser.getResultDeclarations();
if ((decls == (vector<TypeDeclarator *> *)0)||(decls->size()==0))
throw ParseError("Did not parse a datatype");
if (decls->size() > 1)
throw ParseError("Parsed multiple declarations");
TypeDeclarator *decl = (*decls)[0];
if (!decl->isValid())
throw ParseError("Parsed type is invalid");
if (decl->hasProperty(CParse::f_extern)) {
PrototypePieces pieces;
if (!decl->getPrototype(pieces,glb))
throw ParseError("Did not parse prototype as expected");
glb->setPrototype(pieces);
}
else if (decl->hasProperty(CParse::f_typedef)) {
Datatype *ct = decl->buildType(glb);
if (decl->getIdentifier().size() == 0)
throw ParseError("Missing identifier for typedef");
glb->types->setName(ct,decl->getIdentifier());
}
else if (decl->getBaseType()->getMetatype()==TYPE_STRUCT) {
// We parsed a struct, treat as a typedef
}
else if (decl->getBaseType()->isEnumType()) {
// We parsed an enum, treat as a typedef
}
else
throw LowlevelError("Not sure what to do with this type");
}
void parse_toseparator(istream &s,string &name)
{ // parse to next (C) separator
char tok;
name.erase();
s >> ws;
tok = s.peek();
while((isalnum(tok))||(tok=='_')) {
s >> tok;
name += tok;
tok = s.peek();
}
}
Address parse_varnode(istream &s,int4 &size,Address &pc,uintm &uq,const TypeFactory &typegrp)
{ // Scan for a specific varnode
char tok;
int4 discard;
Address loc(parse_machaddr(s,size,typegrp));
s >> ws >> tok;
if (tok != '(')
throw ParseError("Missing '('");
s >> ws;
tok = s.peek();
pc = Address(); // pc starts out as invalid
if (tok == 'i')
s >> tok;
else if (s.peek() != ':') {
s.unsetf(ios::dec | ios::hex | ios::oct); // Let user specify base
pc = parse_machaddr(s,discard,typegrp,true);
}
s >> ws;
if (s.peek() == ':') { // Scan uniq
s >> tok >> ws >> hex >> uq; // Assume uniq is in hex
}
else
uq = ~((uintm)0);
s >> ws >> tok;
if (tok != ')')
throw ParseError("Missing ')'");
return loc;
}
Address parse_op(istream &s,uintm &uq,const TypeFactory &typegrp)
{
int4 size;
char tok;
Address loc(parse_machaddr(s,size,typegrp,true));
s >> ws >> tok;
if (tok != ':')
throw ParseError("Missing ':'");
s >> ws >> hex >> uq; // Assume uniq is in hex
return loc;
}
Address parse_machaddr(istream &s,int4 &defaultsize,const TypeFactory &typegrp,bool ignorecolon)
{ // Read Address from ASCII stream
string token;
AddrSpace *b;
int4 size = -1;
int4 oversize;
char tok;
const AddrSpaceManager *manage = typegrp.getArch();
s >> ws;
tok = s.peek();
if (tok == '[') {
s >> tok;
parse_toseparator(s,token); // scan base address token
b = manage->getSpaceByName(token);
if (b == (AddrSpace *)0)
throw ParseError("Bad address base");
s >> ws >> tok;
if (tok != ',')
throw ParseError("Missing ',' in address");
parse_toseparator(s,token); // Get the offset portion of the address
s >> ws >> tok;
if (tok == ',') { // Optional size specifier
s.unsetf(ios::dec | ios::hex | ios::oct);
s >> size;
s >> ws >> tok;
}
if (tok != ']')
throw ParseError("Missing ']' in address");
}
else if (tok == '{') {
b = manage->getJoinSpace();
s >> tok;
s >> tok;
while(tok != '}') // Scan to the matching curly brace
token += tok;
}
else {
if (tok == '0') {
b = manage->getDefaultCodeSpace();
}
else {
b = manage->getSpaceByShortcut(tok);
s >> tok;
}
if (b==(AddrSpace *)0) {
s >> token;
string errmsg = "Bad address: ";
errmsg += tok;
errmsg += token;
throw ParseError(errmsg);
}
token.erase();
s >> ws;
tok = s.peek();
if (ignorecolon) {
while((isalnum(tok))||(tok=='_')||(tok=='+')) {
token += tok;
s >> tok;
tok = s.peek();
}
}
else {
while((isalnum(tok))||(tok=='_')||(tok=='+')||(tok==':')) {
token += tok;
s >> tok;
tok = s.peek();
}
}
}
Address res(b,0);
oversize = res.read(token); // Read the address of this particular type
// oversize is "standard size"
if (oversize == -1)
throw ParseError("Bad machine address");
defaultsize = (size==-1) ? oversize : size; // If not overriden use standard
return res;
}