mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-04 18:29:37 +02:00
628 lines
16 KiB
Text
628 lines
16 KiB
Text
/* ###
|
|
* IP: GHIDRA
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
%{
|
|
#include "xml.hh"
|
|
// CharData mode look for '<' '&' or "]]>"
|
|
// Name mode look for non-name char
|
|
// CData mode looking for "]]>"
|
|
// Entity mode looking for ending ';'
|
|
// AttValue mode looking for endquote or '&'
|
|
// Comment mode looking for "--"
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
|
|
class XmlScan {
|
|
public:
|
|
enum mode { CharDataMode, CDataMode, AttValueSingleMode,
|
|
AttValueDoubleMode, CommentMode, CharRefMode,
|
|
NameMode, SNameMode, SingleMode };
|
|
enum token { CharDataToken = 258,
|
|
CDataToken = 259,
|
|
AttValueToken = 260,
|
|
CommentToken =261,
|
|
CharRefToken = 262,
|
|
NameToken = 263,
|
|
SNameToken = 264,
|
|
ElementBraceToken = 265,
|
|
CommandBraceToken = 266 };
|
|
private:
|
|
mode curmode;
|
|
istream &s;
|
|
string *lvalue; // Current string being built
|
|
int4 lookahead[4];
|
|
int4 pos;
|
|
bool endofstream; // Has end of stream been reached
|
|
void clearlvalue(void);
|
|
int4 getxmlchar(void) {
|
|
char c;
|
|
int4 ret=lookahead[pos];
|
|
if (!endofstream) {
|
|
s.get(c);
|
|
if (s.eof()||(c=='\0')) {
|
|
endofstream = true;
|
|
lookahead[pos] = '\n';
|
|
}
|
|
else
|
|
lookahead[pos] = c;
|
|
}
|
|
else
|
|
lookahead[pos] = -1;
|
|
pos = (pos+1)&3;
|
|
return ret;
|
|
}
|
|
int4 next(int4 i) { return lookahead[(pos+i)&3]; }
|
|
bool isLetter(int4 val) { return (((val>=0x41)&&(val<=0x5a))||((val>=0x61)&&(val<=0x7a))); }
|
|
bool isInitialNameChar(int4 val);
|
|
bool isNameChar(int4 val);
|
|
bool isChar(int4 val);
|
|
int4 scanSingle(void);
|
|
int4 scanCharData(void);
|
|
int4 scanCData(void);
|
|
int4 scanAttValue(int4 quote);
|
|
int4 scanCharRef(void);
|
|
int4 scanComment(void);
|
|
int4 scanName(void);
|
|
int4 scanSName(void);
|
|
public:
|
|
XmlScan(istream &t);
|
|
~XmlScan(void);
|
|
void setmode(mode m) { curmode = m; }
|
|
int4 nexttoken(void); // Interface for bison
|
|
string *lval(void) { string *ret = lvalue; lvalue = (string *)0; return ret; }
|
|
};
|
|
|
|
struct NameValue {
|
|
string *name;
|
|
string *value;
|
|
};
|
|
|
|
extern int yylex(void);
|
|
extern int yyerror(const char *str);
|
|
extern void print_content(const string &str);
|
|
extern int4 convertEntityRef(const string &ref);
|
|
extern int4 convertCharRef(const string &ref);
|
|
static XmlScan *global_scan;
|
|
static ContentHandler *handler;
|
|
extern int yydebug;
|
|
%}
|
|
|
|
%union {
|
|
int4 i;
|
|
string *str;
|
|
Attributes *attr;
|
|
NameValue *pair;
|
|
}
|
|
|
|
%expect 8
|
|
|
|
%token <str> CHARDATA CDATA ATTVALUE COMMENT CHARREF NAME SNAME ELEMBRACE COMMBRACE
|
|
%type <str> AttValue attsinglemid attdoublemid ETag CDSect CharRef EntityRef
|
|
%type <i> Reference
|
|
%type <attr> EmptyElemTag STag stagstart
|
|
%type <pair> SAttribute
|
|
%%
|
|
|
|
document: element Misc;
|
|
| prolog element Misc;
|
|
whitespace: ' '
|
|
| '\n'
|
|
| '\r'
|
|
| '\t';
|
|
S: whitespace
|
|
| S whitespace ;
|
|
|
|
attsinglemid: '\'' { $$ = new string; global_scan->setmode(XmlScan::AttValueSingleMode); }
|
|
| attsinglemid ATTVALUE { $$ = $1; *$$ += *$2; delete $2; global_scan->setmode(XmlScan::AttValueSingleMode); }
|
|
| attsinglemid Reference { $$ = $1; *$$ += $2; global_scan->setmode(XmlScan::AttValueSingleMode); };
|
|
attdoublemid: '"' { $$ = new string; global_scan->setmode(XmlScan::AttValueDoubleMode); }
|
|
| attdoublemid ATTVALUE { $$ = $1; *$$ += *$2; delete $2; global_scan->setmode(XmlScan::AttValueDoubleMode); }
|
|
| attdoublemid Reference { $$ = $1; *$$ += $2; global_scan->setmode(XmlScan::AttValueDoubleMode); };
|
|
AttValue: attsinglemid '\'' { $$ = $1; }
|
|
| attdoublemid '"' { $$ = $1; };
|
|
elemstart: ELEMBRACE { global_scan->setmode(XmlScan::NameMode); delete $1; };
|
|
commentstart: COMMBRACE '!' '-' '-' { global_scan->setmode(XmlScan::CommentMode); delete $1; } ;
|
|
Comment: commentstart COMMENT '-' '-' '>' { delete $2; } ;
|
|
PI: COMMBRACE '?' { delete $1; yyerror("Processing instructions are not supported"); YYERROR; };
|
|
CDSect: CDStart CDATA CDEnd { $$ = $2; } ;
|
|
CDStart: COMMBRACE '!' '[' 'C' 'D' 'A' 'T' 'A' '[' { global_scan->setmode(XmlScan::CDataMode); delete $1; } ;
|
|
CDEnd: ']' ']' '>' ;
|
|
|
|
doctypepro: doctypedecl
|
|
| doctypepro Misc;
|
|
prologpre: XMLDecl
|
|
| Misc
|
|
| prologpre Misc;
|
|
prolog: prologpre doctypepro
|
|
| prologpre ;
|
|
|
|
doctypedecl: COMMBRACE '!' 'D' 'O' 'C' 'T' 'Y' 'P' 'E' { delete $1; yyerror("DTD's not supported"); YYERROR; };
|
|
Eq: '='
|
|
| S '='
|
|
| Eq S ;
|
|
Misc: Comment
|
|
| PI
|
|
| S ;
|
|
|
|
VersionInfo: S 'v' 'e' 'r' 's' 'i' 'o' 'n' Eq AttValue { handler->setVersion(*$10); delete $10; };
|
|
EncodingDecl: S 'e' 'n' 'c' 'o' 'd' 'i' 'n' 'g' Eq AttValue { handler->setEncoding(*$11); delete $11; };
|
|
xmldeclstart: COMMBRACE '?' 'x' 'm' 'l' VersionInfo
|
|
XMLDecl: xmldeclstart '?' '>'
|
|
| xmldeclstart S '?' '>'
|
|
| xmldeclstart EncodingDecl '?' '>'
|
|
| xmldeclstart EncodingDecl S '?' '>' ;
|
|
|
|
element: EmptyElemTag { handler->endElement($1->getelemURI(),$1->getelemName(),$1->getelemName()); delete $1; }
|
|
| STag content ETag { handler->endElement($1->getelemURI(),$1->getelemName(),$1->getelemName()); delete $1; delete $3; } ;
|
|
|
|
STag: stagstart '>' { handler->startElement($1->getelemURI(),$1->getelemName(),$1->getelemName(),*$1); $$ = $1; }
|
|
| stagstart S '>' { handler->startElement($1->getelemURI(),$1->getelemName(),$1->getelemName(),*$1); $$ = $1; };
|
|
EmptyElemTag: stagstart '/' '>' { handler->startElement($1->getelemURI(),$1->getelemName(),$1->getelemName(),*$1); $$ = $1; }
|
|
| stagstart S '/' '>' { handler->startElement($1->getelemURI(),$1->getelemName(),$1->getelemName(),*$1); $$ = $1; };
|
|
|
|
stagstart: elemstart NAME { $$ = new Attributes($2); global_scan->setmode(XmlScan::SNameMode); }
|
|
| stagstart SAttribute { $$ = $1; $$->add_attribute( $2->name, $2->value); delete $2; global_scan->setmode(XmlScan::SNameMode); };
|
|
SAttribute: SNAME Eq AttValue { $$ = new NameValue; $$->name = $1; $$->value = $3; };
|
|
etagbrace: COMMBRACE '/' { global_scan->setmode(XmlScan::NameMode); delete $1; };
|
|
ETag: etagbrace NAME '>' { $$ = $2; }
|
|
| etagbrace NAME S '>' { $$ = $2; };
|
|
|
|
content: { global_scan->setmode(XmlScan::CharDataMode); }
|
|
| content CHARDATA { print_content( *$2 ); delete $2; global_scan->setmode(XmlScan::CharDataMode); }
|
|
| content element { global_scan->setmode(XmlScan::CharDataMode); }
|
|
| content Reference { string *tmp=new string(); *tmp += $2; print_content(*tmp); delete tmp; global_scan->setmode(XmlScan::CharDataMode); }
|
|
| content CDSect { print_content( *$2 ); delete $2; global_scan->setmode(XmlScan::CharDataMode); }
|
|
| content PI { global_scan->setmode(XmlScan::CharDataMode); }
|
|
| content Comment { global_scan->setmode(XmlScan::CharDataMode); };
|
|
|
|
Reference: EntityRef { $$ = convertEntityRef(*$1); delete $1; }
|
|
| CharRef { $$ = convertCharRef(*$1); delete $1; };
|
|
|
|
refstart: '&' { global_scan->setmode(XmlScan::NameMode); } ;
|
|
charrefstart: refstart '#' { global_scan->setmode(XmlScan::CharRefMode); };
|
|
CharRef: charrefstart CHARREF ';' { $$ = $2; };
|
|
EntityRef: refstart NAME ';' { $$ = $2; };
|
|
%%
|
|
|
|
XmlScan::XmlScan(istream &t) : s(t)
|
|
|
|
{
|
|
curmode = SingleMode;
|
|
lvalue = (string *)0;
|
|
pos = 0;
|
|
endofstream = false;
|
|
getxmlchar(); getxmlchar(); getxmlchar(); getxmlchar(); // Fill lookahead buffer
|
|
}
|
|
|
|
XmlScan::~XmlScan(void)
|
|
|
|
{
|
|
clearlvalue();
|
|
}
|
|
|
|
void XmlScan::clearlvalue(void)
|
|
|
|
{
|
|
if (lvalue != (string *)0)
|
|
delete lvalue;
|
|
}
|
|
|
|
int4 XmlScan::scanSingle(void)
|
|
|
|
{
|
|
int4 res = getxmlchar();
|
|
if (res == '<') {
|
|
if (isInitialNameChar(next(0))) return ElementBraceToken;
|
|
return CommandBraceToken;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
int4 XmlScan::scanCharData(void)
|
|
|
|
{ // look for '<' '&' or ']]>'
|
|
clearlvalue();
|
|
lvalue = new string();
|
|
|
|
while(next(0) != -1) {
|
|
if (next(0) == '<') break;
|
|
if (next(0) == '&') break;
|
|
if (next(0) == ']')
|
|
if (next(1)== ']')
|
|
if (next(2)=='>')
|
|
break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
if (lvalue->size()==0)
|
|
return scanSingle();
|
|
return CharDataToken;
|
|
}
|
|
|
|
int4 XmlScan::scanCData(void)
|
|
|
|
{ // Look for "]]>" and non-Char
|
|
clearlvalue();
|
|
lvalue = new string();
|
|
|
|
while(next(0) != -1) {
|
|
if (next(0)==']')
|
|
if (next(1)==']')
|
|
if (next(2)=='>')
|
|
break;
|
|
if (!isChar(next(0))) break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
return CDataToken; // CData can be empty
|
|
}
|
|
|
|
int4 XmlScan::scanCharRef(void)
|
|
|
|
{
|
|
int4 v;
|
|
clearlvalue();
|
|
lvalue = new string();
|
|
if (next(0) == 'x') {
|
|
*lvalue += getxmlchar();
|
|
while(next(0) != -1) {
|
|
v = next(0);
|
|
if (v < '0') break;
|
|
if ((v>'9')&&(v<'A')) break;
|
|
if ((v>'F')&&(v<'a')) break;
|
|
if (v>'f') break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
if (lvalue->size()==1)
|
|
return 'x'; // Must be at least 1 hex digit
|
|
}
|
|
else {
|
|
while(next(0) != -1) {
|
|
v = next(0);
|
|
if (v<'0') break;
|
|
if (v>'9') break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
if (lvalue->size()==0)
|
|
return scanSingle();
|
|
}
|
|
return CharRefToken;
|
|
}
|
|
|
|
int4 XmlScan::scanAttValue(int4 quote)
|
|
|
|
{
|
|
clearlvalue();
|
|
lvalue = new string();
|
|
while(next(0) != -1) {
|
|
if (next(0) == quote) break;
|
|
if (next(0) == '<') break;
|
|
if (next(0) == '&') break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
if (lvalue->size() == 0)
|
|
return scanSingle();
|
|
return AttValueToken;
|
|
}
|
|
|
|
int4 XmlScan::scanComment(void)
|
|
|
|
{
|
|
clearlvalue();
|
|
lvalue = new string();
|
|
|
|
while(next(0) != -1) {
|
|
if (next(0)=='-')
|
|
if (next(1)=='-')
|
|
break;
|
|
if (!isChar(next(0))) break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
return CommentToken;
|
|
}
|
|
|
|
int4 XmlScan::scanName(void)
|
|
|
|
{ // Scan a Name or return single non-name character
|
|
clearlvalue();
|
|
lvalue = new string();
|
|
|
|
if (!isInitialNameChar(next(0)))
|
|
return scanSingle();
|
|
*lvalue += getxmlchar();
|
|
while(next(0) != -1) {
|
|
if (!isNameChar(next(0))) break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
return NameToken;
|
|
}
|
|
|
|
int4 XmlScan::scanSName(void)
|
|
|
|
{ // Scan Name, allow white space before
|
|
int4 whitecount = 0;
|
|
while((next(0)==' ')||(next(0)=='\n')||(next(0)=='\r')||(next(0)=='\t')) {
|
|
whitecount += 1;
|
|
getxmlchar();
|
|
}
|
|
clearlvalue();
|
|
lvalue = new string();
|
|
if (!isInitialNameChar(next(0))) { // First non-whitespace is not Name char
|
|
if (whitecount > 0)
|
|
return ' ';
|
|
return scanSingle();
|
|
}
|
|
*lvalue += getxmlchar();
|
|
while(next(0) != -1) {
|
|
if (!isNameChar(next(0))) break;
|
|
*lvalue += getxmlchar();
|
|
}
|
|
if (whitecount>0)
|
|
return SNameToken;
|
|
return NameToken;
|
|
}
|
|
|
|
bool XmlScan::isInitialNameChar(int4 val)
|
|
|
|
{
|
|
if (isLetter(val)) return true;
|
|
if ((val=='_')||(val==':')) return true;
|
|
return false;
|
|
}
|
|
|
|
bool XmlScan::isNameChar(int4 val)
|
|
|
|
{
|
|
if (isLetter(val)) return true;
|
|
if ((val>='0')&&(val<='9')) return true;
|
|
if ((val=='.')||(val=='-')||(val=='_')||(val==':')) return true;
|
|
return false;
|
|
}
|
|
|
|
bool XmlScan::isChar(int4 val)
|
|
|
|
{
|
|
if (val>=0x20) return true;
|
|
if ((val == 0xd)||(val==0xa)||(val==0x9)) return true;
|
|
return false;
|
|
}
|
|
|
|
int4 XmlScan::nexttoken(void)
|
|
|
|
{
|
|
mode mymode = curmode;
|
|
curmode = SingleMode;
|
|
switch(mymode) {
|
|
case CharDataMode:
|
|
return scanCharData();
|
|
case CDataMode:
|
|
return scanCData();
|
|
case AttValueSingleMode:
|
|
return scanAttValue('\'');
|
|
case AttValueDoubleMode:
|
|
return scanAttValue('"');
|
|
case CommentMode:
|
|
return scanComment();
|
|
case CharRefMode:
|
|
return scanCharRef();
|
|
case NameMode:
|
|
return scanName();
|
|
case SNameMode:
|
|
return scanSName();
|
|
case SingleMode:
|
|
return scanSingle();
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void print_content(const string &str)
|
|
|
|
{
|
|
uint4 i;
|
|
for(i=0;i<str.size();++i) {
|
|
if (str[i]==' ') continue;
|
|
if (str[i]=='\n') continue;
|
|
if (str[i]=='\r') continue;
|
|
if (str[i]=='\t') continue;
|
|
break;
|
|
}
|
|
if (i==str.size())
|
|
handler->ignorableWhitespace(str.c_str(),0,str.size());
|
|
else
|
|
handler->characters(str.c_str(),0,str.size());
|
|
}
|
|
|
|
int4 convertEntityRef(const string &ref)
|
|
|
|
{
|
|
if (ref == "lt") return '<';
|
|
if (ref == "amp") return '&';
|
|
if (ref == "gt") return '>';
|
|
if (ref == "quot") return '"';
|
|
if (ref == "apos") return '\'';
|
|
return -1;
|
|
}
|
|
|
|
int4 convertCharRef(const string &ref)
|
|
|
|
{
|
|
uint4 i;
|
|
int4 mult,val,cur;
|
|
|
|
if (ref[0]=='x') {
|
|
i = 1;
|
|
mult = 16;
|
|
}
|
|
else {
|
|
i = 0;
|
|
mult = 10;
|
|
}
|
|
val = 0;
|
|
for(;i<ref.size();++i) {
|
|
if (ref[i]<='9') cur = ref[i]-'0';
|
|
else if (ref[i]<='F') cur = 10+ref[i]-'A';
|
|
else cur=10+ref[i]-'a';
|
|
val *= mult;
|
|
val += cur;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
int yylex(void)
|
|
|
|
{
|
|
int res = global_scan->nexttoken();
|
|
if (res>255)
|
|
yylval.str = global_scan->lval();
|
|
return res;
|
|
}
|
|
|
|
int yyerror(const char *str)
|
|
|
|
{
|
|
handler->setError(str);
|
|
return 0;
|
|
}
|
|
|
|
int4 xml_parse(istream &i,ContentHandler *hand,int4 dbg)
|
|
|
|
{
|
|
#if YYDEBUG
|
|
yydebug = dbg;
|
|
#endif
|
|
global_scan = new XmlScan(i);
|
|
handler = hand;
|
|
handler->startDocument();
|
|
int4 res = yyparse();
|
|
if (res == 0)
|
|
handler->endDocument();
|
|
delete global_scan;
|
|
return res;
|
|
}
|
|
|
|
void TreeHandler::startElement(const string &namespaceURI,const string &localName,
|
|
const string &qualifiedName,const Attributes &atts)
|
|
{
|
|
Element *newel = new Element(cur);
|
|
cur->addChild(newel);
|
|
cur = newel;
|
|
newel->setName(localName);
|
|
for(int4 i=0;i<atts.getLength();++i)
|
|
newel->addAttribute(atts.getLocalName(i),atts.getValue(i));
|
|
}
|
|
|
|
void TreeHandler::endElement(const string &namespaceURI,const string &localName,
|
|
const string &qualifiedName)
|
|
{
|
|
cur = cur->getParent();
|
|
}
|
|
|
|
void TreeHandler::characters(const char *text,int4 start,int4 length)
|
|
|
|
{
|
|
cur->addContent(text,start,length);
|
|
}
|
|
|
|
Element::~Element(void)
|
|
|
|
{
|
|
List::iterator iter;
|
|
|
|
for(iter=children.begin();iter!=children.end();++iter)
|
|
delete *iter;
|
|
}
|
|
|
|
const string &Element::getAttributeValue(const string &nm) const
|
|
|
|
{
|
|
for(uint4 i=0;i<attr.size();++i)
|
|
if (attr[i] == nm)
|
|
return value[i];
|
|
throw XmlError("Unknown attribute: "+nm);
|
|
}
|
|
|
|
DocumentStorage::~DocumentStorage(void)
|
|
|
|
{
|
|
for(int4 i=0;i<doclist.size();++i) {
|
|
if (doclist[i] != (Document *)0)
|
|
delete doclist[i];
|
|
}
|
|
}
|
|
|
|
Document *DocumentStorage::parseDocument(istream &s)
|
|
|
|
{
|
|
doclist.push_back((Document *)0);
|
|
doclist.back() = xml_tree(s);
|
|
return doclist.back();
|
|
}
|
|
|
|
Document *DocumentStorage::openDocument(const string &filename)
|
|
|
|
{ // Open and parse an XML file, return Document object
|
|
ifstream s(filename.c_str());
|
|
if (!s)
|
|
throw XmlError("Unable to open xml document "+filename);
|
|
Document *res = parseDocument(s);
|
|
s.close();
|
|
return res;
|
|
}
|
|
|
|
void DocumentStorage::registerTag(const Element *el)
|
|
|
|
{ // Register a tag under its name
|
|
tagmap[el->getName()] = el;
|
|
}
|
|
|
|
const Element *DocumentStorage::getTag(const string &nm) const
|
|
|
|
{ // Retrieve a registered tag by name
|
|
map<string,const Element *>::const_iterator iter;
|
|
|
|
iter = tagmap.find(nm);
|
|
if (iter != tagmap.end())
|
|
return (*iter).second;
|
|
return (const Element *)0;
|
|
}
|
|
|
|
Document *xml_tree(istream &i)
|
|
|
|
{
|
|
Document *doc = new Document();
|
|
TreeHandler handle(doc);
|
|
if (0!=xml_parse(i,&handle)) {
|
|
delete doc;
|
|
throw XmlError(handle.getError());
|
|
}
|
|
return doc;
|
|
}
|
|
|
|
void xml_escape(ostream &s,const char *str)
|
|
|
|
{ // Escape xml tag indicators
|
|
while(*str!='\0') {
|
|
if (*str < '?') {
|
|
if (*str=='<') s << "<";
|
|
else if (*str=='>') s << ">";
|
|
else if (*str=='&') s << "&";
|
|
else if (*str=='"') s << """;
|
|
else if (*str=='\'') s << "'";
|
|
else s << *str;
|
|
}
|
|
else
|
|
s << *str;
|
|
str++;
|
|
}
|
|
}
|