Doxygen for xml.hh

This commit is contained in:
caheckman 2020-02-03 18:05:28 -05:00
parent 5277e7acf7
commit cf4a4e825e
3 changed files with 296 additions and 99 deletions

View file

@ -457,7 +457,7 @@ RECURSIVE = NO
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slgh_compile.hh slgh_compile.hh slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc ifacedecomp.hh ifacedecomp.cc ifaceterm.hh ifaceterm.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc interface.hh interface.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc inject_sleigh.hh inject_sleigh.cc context.hh context.cc consolemain.cc sleighexample.cc
EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slgh_compile.hh slgh_compile.hh slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc ifacedecomp.hh ifacedecomp.cc ifaceterm.hh ifaceterm.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc interface.hh interface.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc inject_sleigh.hh inject_sleigh.cc context.hh context.cc consolemain.cc sleighexample.cc xml.cc
# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
# directories that are symbolic links (a Unix filesystem feature) are excluded

View file

@ -13,6 +13,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file xml.hh
/// \brief Lightweight (and incomplete) XML parser for marshaling data to and from the decompiler
#ifndef __CPUI_XML__
#define __CPUI_XML__
@ -25,33 +27,40 @@
using namespace std;
/// \brief The \e attributes for a single XML element
///
/// A container for name/value pairs (of strings) for the formal attributes, as collected during parsing.
/// This object is used to initialize the Element object but is not part of the final, in memory, DOM model.
/// This also holds other properties of the element that are unused in this implementation,
/// including the \e namespace URI.
class Attributes {
string *elementname;
string bogus_uri;
string prefix;
vector<string *> name;
vector<string *> value;
static string bogus_uri; ///< A placeholder for the namespace URI that should be attached to the element
// static string prefix;
string *elementname; ///< The name of the XML element
vector<string *> name; ///< List of names for each formal XML attribute
vector<string *> value; ///< List of values for each formal XML attribute
public:
Attributes(string *el) { elementname = el; }
Attributes(string *el) { elementname = el; } ///< Construct from element name string
~Attributes(void) {
for(uint4 i=0;i<name.size();++i) { delete name[i]; delete value[i]; }
delete elementname;
}
const string &getelemURI(void) const { return bogus_uri; }
const string &getelemName(void) const { return *elementname; }
void add_attribute(string *nm,string *vl) { name.push_back(nm); value.push_back(vl); }
} ///< Destructor
const string &getelemURI(void) const { return bogus_uri; } ///< Get the namespace URI associated with this element
const string &getelemName(void) const { return *elementname; } ///< Get the name of this element
void add_attribute(string *nm,string *vl) { name.push_back(nm); value.push_back(vl); } ///< Add a formal attribute
// The official SAX interface
int4 getLength(void) const { return name.size(); }
const string &getURI(int4 index) const { return bogus_uri; }
const string &getLocalName(int4 index) const { return *name[index]; }
const string &getQName(int4 index) const { return *name[index]; }
int4 getLength(void) const { return name.size(); } ///< Get the number of attributes associated with the element
const string &getURI(int4 i) const { return bogus_uri; } ///< Get the namespace URI associated with the i-th attribute
const string &getLocalName(int4 i) const { return *name[i]; } ///< Get the local name of the i-th attribute
const string &getQName(int4 i) const { return *name[i]; } ///< Get the qualified name of the i-th attribute
// int4 getIndex(const string &uri,const string &localName) const;
// int4 getIndex(const string &qualifiedName) const;
// const string &getType(int4 index) const;
// const string &getType(const string &uri,const string &localName) const;
// const string &getType(const string &qualifiedName) const;
const string &getValue(int4 index) const { return *value[index]; }
const string &getValue(int4 i) const { return *value[i]; } ///< Get the value of the i-th attribute
//const string &getValue(const string &uri,const string &localName) const;
/// \brief Get the value of the attribute with the given qualified name
const string &getValue(const string &qualifiedName) const {
for(uint4 i=0;i<name.size();++i)
if (*name[i] == qualifiedName) return *value[i];
@ -59,72 +68,158 @@ public:
}
};
typedef void *Locator;
typedef void *Locator; ///< Placeholder for a document locator object
/// \brief The SAX interface for parsing XML documents
///
/// This is the formal interface for handling the low-level string pieces of an XML document as
/// they are scanned by the parser.
class ContentHandler {
public:
virtual ~ContentHandler(void) {}
virtual void setDocumentLocator(Locator locator)=0;
virtual void startDocument(void)=0;
virtual void endDocument(void)=0;
virtual void startPrefixMapping(const string &prefix,const string &uri)=0;
virtual void endPrefixMapping(const string &prefix)=0;
virtual ~ContentHandler(void) {} ///< Destructor
virtual void setDocumentLocator(Locator locator)=0; ///< Set the Locator object for documents
virtual void startDocument(void)=0; ///< Start processing a new XML document
virtual void endDocument(void)=0; ///< End processing for the current XML document
virtual void startPrefixMapping(const string &prefix,const string &uri)=0; ///< Start a new prefix to namespace URI mapping
virtual void endPrefixMapping(const string &prefix)=0; ///< Finish the current prefix
/// \brief Callback indicating a new XML element has started.
///
/// \param namespaceURI is the namespace to which the new element belongs
/// \param localName is the local name of the new element
/// \param qualifiedName is the fully qualified name of the new element
/// \param atts is the set of (previously parsed) attributes to attach to the new element
virtual void startElement(const string &namespaceURI,const string &localName,
const string &qualifiedName,const Attributes &atts)=0;
/// \brief Callback indicating parsing of the current XML element is finished.
///
/// \param namespaceURI is the namespace to which the element belongs
/// \param localName is the local name of the new element
/// \param qualifiedName is the fully qualified name of the element.
virtual void endElement(const string &namespaceURI,const string &localName,
const string &qualifiedName)=0;
/// \brief Callback with raw characters to be inserted in the current XML element
///
/// \param text is an array of character data being inserted.
/// \param start is the first character within the array to insert.
/// \param length is the number of characters to insert.
virtual void characters(const char *text,int4 start,int4 length)=0;
/// \brief Callback with whitespace character data for the current XML element
///
/// \param text is an array of character data that can be inserted.
/// \param start is the first character within the array to insert.
/// \param length is the number of characters to insert.
virtual void ignorableWhitespace(const char *text,int4 start,int4 length)=0;
/// \brief Set the XML version as specified by the current document
///
/// \param version is the parsed version string
virtual void setVersion(const string &version)=0;
/// \brief Set the character encoding as specified by the current document
///
/// \param encoding is the parsed encoding string
virtual void setEncoding(const string &encoding)=0;
/// \brief Callback for a formal \e processing \e instruction seen in the current document
///
/// \param target is the target instruction to process
/// \param data is (optional) character data for the instruction
virtual void processingInstruction(const string &target,const string &data)=0;
/// \brief Callback for an XML entity skipped by the parser
///
/// \param name is the name of the entity being skipped
virtual void skippedEntity(const string &name)=0;
/// \brief Callback for handling an error condition during XML parsing
///
/// \param errmsg is a message describing the error condition
virtual void setError(const string &errmsg)=0;
};
class Element;
typedef vector<Element *> List;
typedef vector<Element *> List; ///< A list of XML elements
/// \brief An XML element. A node in the DOM tree.
///
/// This is the main node for the in-memory representation of the XML (DOM) tree.
class Element {
string name;
string content;
vector<string> attr;
vector<string> value;
string name; ///< The (local) name of the element
string content; ///< Character content of the element
vector<string> attr; ///< A list of attribute names for \b this element
vector<string> value; ///< a (corresponding) list of attribute values for \b this element
protected:
Element *parent;
List children;
Element *parent; ///< The parent Element (or null)
List children; ///< A list of child Element objects
public:
Element(Element *par) { parent = par; }
~Element(void);
void setName(const string &nm) { name = nm; }
Element(Element *par) { parent = par; } ///< Constructor given a parent Element
~Element(void); ///< Destructor
void setName(const string &nm) { name = nm; } ///< Set the local name of the element
/// \brief Append new character content to \b this element
///
/// \param str is an array of character data
/// \param start is the index of the first character to append
/// \param length is the number of characters to append
void addContent(const char *str,int4 start,int4 length) {
// for(int4 i=0;i<length;++i) content += str[start+i]; }
content.append(str+start,length); }
/// \brief Add a new child Element to the model, with \b this as the parent
///
/// \param child is the new child Element
void addChild(Element *child) { children.push_back(child); }
/// \brief Add a new name/value attribute pair to \b this element
///
/// \param nm is the name of the attribute
/// \param vl is the value of the attribute
void addAttribute(const string &nm,const string &vl) {
attr.push_back(nm); value.push_back(vl); }
Element *getParent(void) const { return parent; }
const string &getName(void) const { return name; }
const List &getChildren(void) const { return children; }
const string &getContent(void) const { return content; }
Element *getParent(void) const { return parent; } ///< Get the parent Element
const string &getName(void) const { return name; } ///< Get the local name of \b this element
const List &getChildren(void) const { return children; } ///< Get the list of child elements
const string &getContent(void) const { return content; } ///< Get the character content of \b this element
/// \brief Get an attribute value by name
///
/// Look up the value for the given attribute name and return it. An exception is
/// thrown if the attribute does not exist.
/// \param nm is the name of the attribute
/// \return the corresponding attribute value
const string &getAttributeValue(const string &nm) const;
int4 getNumAttributes(void) const { return attr.size(); }
const string &getAttributeName(int4 i) const { return attr[i]; }
const string &getAttributeValue(int4 i) const { return value[i]; }
int4 getNumAttributes(void) const { return attr.size(); } ///< Get the number of attributes for \b this element
const string &getAttributeName(int4 i) const { return attr[i]; } ///< Get the name of the i-th attribute
const string &getAttributeValue(int4 i) const { return value[i]; } ///< Get the value of the i-th attribute
};
/// \brief A complete in-memory XML document.
///
/// This is actually just an Element object itself, with the document's \e root element
/// as its only child, which owns all the child documents below it in DOM the hierarchy.
class Document : public Element {
public:
Document(void) : Element((Element *)0) {}
Element *getRoot(void) const { return *children.begin(); }
Document(void) : Element((Element *)0) {} ///< Construct an (empty) document
Element *getRoot(void) const { return *children.begin(); } ///< Get the root Element of the document
};
/// \brief A SAX interface implementation for constructing an in-memory DOM model.
///
/// This implementation builds a DOM model of the XML stream being parsed, creating an
/// Element object for each XML element tag in the stream. This handler is initialized with
/// a root Element object, which after parsing is complete will own all parsed elements.
class TreeHandler : public ContentHandler {
Element *root;
Element *cur;
string error;
Element *root; ///< The root XML element being processed by \b this handler
Element *cur; ///< The \e current XML element being processed by \b this handler
string error; ///< The last error condition returned by the parser (if not empty)
public:
TreeHandler(Element *rt) { root = rt; cur = root; }
TreeHandler(Element *rt) { root = rt; cur = root; } ///< Constructor given root Element
virtual ~TreeHandler(void) {}
virtual void setDocumentLocator(Locator locator) {}
virtual void startDocument(void) {}
@ -142,31 +237,96 @@ public:
virtual void setEncoding(const string &val) {}
virtual void skippedEntity(const string &name) {}
virtual void setError(const string &errmsg) { error = errmsg; }
const string &getError(void) const { return error; }
const string &getError(void) const { return error; } ///< Get the current error message
};
// Class for managing xml documents during initialization
/// \brief A container for parsed XML documents
///
/// This holds multiple XML documents that have already been parsed. Documents
/// can be put in this container, either by handing it a stream via parseDocument()
/// or a filename via openDocument(). If they are explicitly registered, specific
/// XML Elements can be looked up by name via getTag().
class DocumentStorage {
vector<Document *> doclist;
map<string,const Element *> tagmap;
vector<Document *> doclist; ///< The list of documents held by this container
map<string,const Element *> tagmap; ///< The map from name to registered XML elements
public:
~DocumentStorage(void);
~DocumentStorage(void); ///< Destructor
/// \brief Parse an XML document from the given stream
///
/// Parsing starts immediately on the stream, attempting to make an in-memory DOM tree.
/// An XmlException is thrown for any parsing error.
/// \param s is the given stream to parse
/// \return the in-memory DOM tree
Document *parseDocument(istream &s);
/// \brief Open and parse an XML file
///
/// The given filename is opened on the local filesystem and an attempt is made to parse
/// its contents into an in-memory DOM tree. An XmlException is thrown for any parsing error.
/// \param filename is the name of the XML document file
/// \return the in-memory DOM tree
Document *openDocument(const string &filename);
/// \brief Register the given XML Element object under its tag name
///
/// Only one Element can be stored on \b this object per tag name.
/// \param el is the given XML element
void registerTag(const Element *el);
/// \brief Retrieve a registered XML Element by name
///
/// \param nm is the XML tag name
/// \return the matching registered Element or null
const Element *getTag(const string &nm) const;
};
/// \brief An exception thrown by the XML parser
///
/// This object holds the error message as passed to the SAX interface callback
/// and is thrown as a formal exception.
struct XmlError {
string explain; // Explanatory string
XmlError(const string &s) { explain = s; }
string explain; ///< Explanatory string
XmlError(const string &s) { explain = s; } ///< Constructor
};
/// \brief Start-up the XML parser given a stream and a handler
///
/// This runs the low-level XML parser.
/// \param i is the given stream to get character data from
/// \param hand is the ContentHandler that stores or processes the XML content events
/// \param dbg is non-zero if the parser should output debug information during its parse
/// \return 0 if there is no error during parsing or a (non-zero) error condition
extern int4 xml_parse(istream &i,ContentHandler *hand,int4 dbg=0);
/// \brief Parse the given XML stream into an in-memory document
///
/// The stream is parsed using the standard ContentHandler for producing an in-memory
/// DOM representation of the XML document.
/// \param i is the given stream
/// \return the in-memory XML document
extern Document *xml_tree(istream &i);
/// \brief Send the given character array to a stream, escaping characters with special XML meaning
///
/// This makes the following character substitutions:
/// - '<' => "&lt;"
/// - '>' => "&gt;"
/// - '&' => "&amp;"
/// - '"' => "&quot;"
/// - '\'' => "&apos;"
///
/// \param s is the stream to write to
/// \param str is the given character array to escape
extern void xml_escape(ostream &s,const char *str);
// Some helper functions for producing XML
// Some helper functions for writing XML documents directly to a stream
/// \brief Output an XML attribute name/value pair to stream
///
/// \param s is the output stream
/// \param attr is the name of the attribute
/// \param val is the attribute value
inline void a_v(ostream &s,const string &attr,const string &val)
{
@ -175,18 +335,33 @@ inline void a_v(ostream &s,const string &attr,const string &val)
s << "\"";
}
/// \brief Output the given signed integer as an XML attribute value
///
/// \param s is the output stream
/// \param attr is the name of the attribute
/// \param val is the given integer value
inline void a_v_i(ostream &s,const string &attr,intb val)
{
s << ' ' << attr << "=\"" << dec << val << "\"";
}
/// \brief Output the given unsigned integer as an XML attribute value
///
/// \param s is the output stream
/// \param attr is the name of the attribute
/// \param val is the given unsigned integer value
inline void a_v_u(ostream &s,const string &attr,uintb val)
{
s << ' ' << attr << "=\"0x" << hex << val << "\"";
}
/// \brief Output the given boolean value as an XML attribute
///
/// \param s is the output stream
/// \param attr is the name of the attribute
/// \param val is the given boolean value
inline void a_v_b(ostream &s,const string &attr,bool val)
{
@ -198,6 +373,12 @@ inline void a_v_b(ostream &s,const string &attr,bool val)
s << "\"";
}
/// \brief Read an XML attribute value as a boolean
///
/// This method is intended to recognize the strings, "true", "yes", and "1"
/// as a \b true value. Anything else is returned as \b false.
/// \param attr is the given XML attribute value (as a string)
/// \return either \b true or \b false
inline bool xml_readbool(const string &attr)
{

View file

@ -25,11 +25,20 @@
#include <iostream>
#include <string>
string Attributes::bogus_uri("http://unused.uri");
/// \brief The XML character scanner
///
/// Tokenize a byte stream suitably for the main XML parser. The scanner expects an ASCII or UTF-8
/// encoding. Characters is XML tag and attribute names are restricted to ASCII "letters", but
/// extended UTF-8 characters can be used in any other character data: attribute values, content, comments.
class XmlScan {
public:
/// \brief Modes of the scanner
enum mode { CharDataMode, CDataMode, AttValueSingleMode,
AttValueDoubleMode, CommentMode, CharRefMode,
NameMode, SNameMode, SingleMode };
/// \brief Additional tokens returned by the scanner, in addition to byte values 00-ff
enum token { CharDataToken = 258,
CDataToken = 259,
AttValueToken = 260,
@ -40,13 +49,19 @@ public:
ElementBraceToken = 265,
CommandBraceToken = 266 };
private:
mode curmode;
istream &s;
string *lvalue; // Current string being built
int4 lookahead[4];
int4 pos;
bool endofstream; // Has end of stream been reached
void clearlvalue(void);
mode curmode; ///< The current scanning mode
istream &s; ///< The stream being scanned
string *lvalue; ///< Current string being built
int4 lookahead[4]; ///< Lookahead into the byte stream
int4 pos; ///< Current position in the lookahead buffer
bool endofstream; ///< Has end of stream been reached
void clearlvalue(void); ///< Clear the current token string
/// \brief Get the next byte in the stream
///
/// Maintain a lookahead of 4 bytes at all times so that we can check for special
/// XML character sequences without consuming.
/// \return the next byte value as an integer
int4 getxmlchar(void) {
char c;
int4 ret=lookahead[pos];
@ -64,40 +79,41 @@ private:
pos = (pos+1)&3;
return ret;
}
int4 next(int4 i) { return lookahead[(pos+i)&3]; }
bool isLetter(int4 val) { return (((val>=0x41)&&(val<=0x5a))||((val>=0x61)&&(val<=0x7a))); }
bool isInitialNameChar(int4 val);
bool isNameChar(int4 val);
bool isChar(int4 val);
int4 scanSingle(void);
int4 scanCharData(void);
int4 scanCData(void);
int4 scanAttValue(int4 quote);
int4 scanCharRef(void);
int4 scanComment(void);
int4 scanName(void);
int4 scanSName(void);
int4 next(int4 i) { return lookahead[(pos+i)&3]; } ///< Peek at the next (i-th) byte without consuming
bool isLetter(int4 val) { return (((val>=0x41)&&(val<=0x5a))||((val>=0x61)&&(val<=0x7a))); } ///< Is the given byte a \e letter
bool isInitialNameChar(int4 val); ///< Is the given byte/character the valid start of an XML name
bool isNameChar(int4 val); ///< Is the given byte/character valid for an XML name
bool isChar(int4 val); ///< Is the given byte/character valid as an XML character
int4 scanSingle(void); ///< Scan for the next token in Single Character mode
int4 scanCharData(void); ///< Scan for the next token is Character Data mode
int4 scanCData(void); ///< Scan for the next token in CDATA mode
int4 scanAttValue(int4 quote); ///< Scan for the next token in Attribute Value mode
int4 scanCharRef(void); ///< Scan for the next token in Character Reference mode
int4 scanComment(void); ///< Scan for the next token in Comment mode
int4 scanName(void); ///< Scan a Name or return single non-name character
int4 scanSName(void); ///< Scan Name, allow white space before
public:
XmlScan(istream &t);
~XmlScan(void);
void setmode(mode m) { curmode = m; }
int4 nexttoken(void); // Interface for bison
string *lval(void) { string *ret = lvalue; lvalue = (string *)0; return ret; }
XmlScan(istream &t); ///< Construct scanner given a stream
~XmlScan(void); ///< Destructor
void setmode(mode m) { curmode = m; } ///< Set the scanning mode
int4 nexttoken(void); ///< Get the next token
string *lval(void) { string *ret = lvalue; lvalue = (string *)0; return ret; } ///< Return the last \e lvalue string
};
/// \brief A parsed name/value pair
struct NameValue {
string *name;
string *value;
string *name; ///< The name
string *value; ///< The value
};
extern int yylex(void);
extern int yyerror(const char *str);
extern void print_content(const string &str);
extern int4 convertEntityRef(const string &ref);
extern int4 convertCharRef(const string &ref);
static XmlScan *global_scan;
static ContentHandler *handler;
extern int yydebug;
extern int yylex(void); ///< Interface to the scanner
extern int yyerror(const char *str); ///< Interface for registering an error in parsing
extern void print_content(const string &str); ///< Send character data to the ContentHandler
extern int4 convertEntityRef(const string &ref); ///< Convert an XML entity to its equivalent character
extern int4 convertCharRef(const string &ref); ///< Convert an XML character reference to its equivalent character
static XmlScan *global_scan; ///< Global reference to the scanner
static ContentHandler *handler; ///< Global reference to the content handler
extern int yydebug; ///< Debug mode
%}
%union {
@ -233,11 +249,11 @@ int4 XmlScan::scanSingle(void)
int4 XmlScan::scanCharData(void)
{ // look for '<' '&' or ']]>'
{
clearlvalue();
lvalue = new string();
while(next(0) != -1) {
while(next(0) != -1) { // look for '<' '&' or ']]>'
if (next(0) == '<') break;
if (next(0) == '&') break;
if (next(0) == ']')
@ -253,11 +269,11 @@ int4 XmlScan::scanCharData(void)
int4 XmlScan::scanCData(void)
{ // Look for "]]>" and non-Char
{
clearlvalue();
lvalue = new string();
while(next(0) != -1) {
while(next(0) != -1) { // Look for "]]>" and non-Char
if (next(0)==']')
if (next(1)==']')
if (next(2)=='>')
@ -334,7 +350,7 @@ int4 XmlScan::scanComment(void)
int4 XmlScan::scanName(void)
{ // Scan a Name or return single non-name character
{
clearlvalue();
lvalue = new string();
@ -350,7 +366,7 @@ int4 XmlScan::scanName(void)
int4 XmlScan::scanSName(void)
{ // Scan Name, allow white space before
{
int4 whitecount = 0;
while((next(0)==' ')||(next(0)=='\n')||(next(0)=='\r')||(next(0)=='\t')) {
whitecount += 1;
@ -571,7 +587,7 @@ Document *DocumentStorage::parseDocument(istream &s)
Document *DocumentStorage::openDocument(const string &filename)
{ // Open and parse an XML file, return Document object
{
ifstream s(filename.c_str());
if (!s)
throw XmlError("Unable to open xml document "+filename);
@ -582,13 +598,13 @@ Document *DocumentStorage::openDocument(const string &filename)
void DocumentStorage::registerTag(const Element *el)
{ // Register a tag under its name
{
tagmap[el->getName()] = el;
}
const Element *DocumentStorage::getTag(const string &nm) const
{ // Retrieve a registered tag by name
{
map<string,const Element *>::const_iterator iter;
iter = tagmap.find(nm);
@ -611,7 +627,7 @@ Document *xml_tree(istream &i)
void xml_escape(ostream &s,const char *str)
{ // Escape xml tag indicators
{
while(*str!='\0') {
if (*str < '?') {
if (*str=='<') s << "&lt;";