mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-03 09:49:23 +02:00
753 lines
40 KiB
C++
753 lines
40 KiB
C++
/* ###
|
|
* IP: GHIDRA
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#ifndef __MARSHAL_HH__
|
|
#define __MARSHAL_HH__
|
|
|
|
#include "xml.hh"
|
|
#include "opcodes.hh"
|
|
#include <list>
|
|
#include <unordered_map>
|
|
|
|
namespace ghidra {
|
|
|
|
using std::list;
|
|
using std::unordered_map;
|
|
|
|
/// \brief An annotation for a data element to being transferred to/from a stream
|
|
///
|
|
/// This class parallels the XML concept of an \b attribute on an element. An AttributeId describes
|
|
/// a particular piece of data associated with an ElementId. The defining characteristic of the AttributeId is
|
|
/// its name. Internally this name is associated with an integer id. The name (and id) uniquely determine
|
|
/// the data being labeled, within the context of a specific ElementId. Within this context, an AttributeId labels either
|
|
/// - An unsigned integer
|
|
/// - A signed integer
|
|
/// - A boolean value
|
|
/// - A string
|
|
///
|
|
/// The same AttributeId can be used to label a different type of data when associated with a different ElementId.
|
|
class AttributeId {
|
|
static unordered_map<string,uint4> lookupAttributeId; ///< A map of AttributeId names to their associated id
|
|
static vector<AttributeId *> &getList(void); ///< Retrieve the list of static AttributeId
|
|
string name; ///< The name of the attribute
|
|
uint4 id; ///< The (internal) id of the attribute
|
|
public:
|
|
AttributeId(const string &nm,uint4 i,int4 scope=0); ///< Construct given a name and id
|
|
const string &getName(void) const { return name; } ///< Get the attribute's name
|
|
uint4 getId(void) const { return id; } ///< Get the attribute's id
|
|
bool operator==(const AttributeId &op2) const { return (id == op2.id); } ///< Test equality with another AttributeId
|
|
static uint4 find(const string &nm,int4 scope); ///< Find the id associated with a specific attribute name
|
|
static void initialize(void); ///< Populate a hashtable with all AttributeId objects
|
|
friend bool operator==(uint4 id,const AttributeId &op2) { return (id == op2.id); } ///< Test equality of a raw integer id with an AttributeId
|
|
friend bool operator==(const AttributeId &op1,uint4 id) { return (op1.id == id); } ///< Test equality of an AttributeId with a raw integer id
|
|
};
|
|
|
|
/// \brief An annotation for a specific collection of hierarchical data
|
|
///
|
|
/// This class parallels the XML concept of an \b element. An ElementId describes a collection of data, where each
|
|
/// piece is annotated by a specific AttributeId. In addition, each ElementId can contain zero or more \e child
|
|
/// ElementId objects, forming a hierarchy of annotated data. Each ElementId has a name, which is unique at least
|
|
/// within the context of its parent ElementId. Internally this name is associated with an integer id. A special
|
|
/// AttributeId ATTRIB_CONTENT is used to label the XML element's text content, which is traditionally not labeled
|
|
/// as an attribute.
|
|
class ElementId {
|
|
static unordered_map<string,uint4> lookupElementId; ///< A map of ElementId names to their associated id
|
|
static vector<ElementId *> &getList(void); ///< Retrieve the list of static ElementId
|
|
string name; ///< The name of the element
|
|
uint4 id; ///< The (internal) id of the attribute
|
|
public:
|
|
ElementId(const string &nm,uint4 i,int4 scope=0); ///< Construct given a name and id
|
|
const string &getName(void) const { return name; } ///< Get the element's name
|
|
uint4 getId(void) const { return id; } ///< Get the element's id
|
|
bool operator==(const ElementId &op2) const { return (id == op2.id); } ///< Test equality with another ElementId
|
|
static uint4 find(const string &nm,int4 scope); ///< Find the id associated with a specific element name
|
|
static void initialize(void); ///< Populate a hashtable with all ElementId objects
|
|
friend bool operator==(uint4 id,const ElementId &op2) { return (id == op2.id); } ///< Test equality of a raw integer id with an ElementId
|
|
friend bool operator==(const ElementId &op1,uint4 id) { return (op1.id == id); } ///< Test equality of an ElementId with a raw integer id
|
|
friend bool operator!=(uint4 id,const ElementId &op2) { return (id != op2.id); } ///< Test inequality of a raw integer id with an ElementId
|
|
friend bool operator!=(const ElementId &op1,uint4 id) { return (op1.id != id); } ///< Test inequality of an ElementId with a raw integer id
|
|
};
|
|
|
|
class AddrSpace;
|
|
class AddrSpaceManager;
|
|
|
|
/// \brief A class for reading structured data from a stream
|
|
///
|
|
/// All data is loosely structured as with an XML document. A document contains a nested set
|
|
/// of \b elements, with labels corresponding to the ElementId class. A single element can hold
|
|
/// zero or more attributes and zero or more child elements. An attribute holds a primitive
|
|
/// data element (bool, integer, string) and is labeled by an AttributeId. The document is traversed
|
|
/// using a sequence of openElement() and closeElement() calls, intermixed with read*() calls to extract
|
|
/// the data. The elements are traversed in a depth first order. Attributes within an element can
|
|
/// be traversed in order using repeated calls to the getNextAttributeId() method, followed by a calls to
|
|
/// one of the read*(void) methods to extract the data. Alternately a read*(AttributeId) call can be used
|
|
/// to extract data for an attribute known to be in the element. There is a special content attribute
|
|
/// whose data can be extracted using a read*(AttributeId) call that is passed the special ATTRIB_CONTENT id.
|
|
/// This attribute will not be traversed by getNextAttribute().
|
|
class Decoder {
|
|
protected:
|
|
const AddrSpaceManager *spcManager; ///< Manager for decoding address space attributes
|
|
public:
|
|
Decoder(const AddrSpaceManager *spc) { spcManager = spc; } ///< Base constructor
|
|
|
|
const AddrSpaceManager *getAddrSpaceManager(void) const { return spcManager; } ///< Get the manager used for address space decoding
|
|
virtual ~Decoder(void) {} ///< Destructor
|
|
|
|
/// \brief Prepare to decode a given stream
|
|
///
|
|
/// Called once before any decoding. Currently this is assumed to make an internal copy of the stream data,
|
|
/// i.e. the input stream is cleared before any decoding takes place.
|
|
/// \param s is the given input stream to be decode
|
|
/// \return \b true if the stream was fully ingested
|
|
virtual void ingestStream(istream &s)=0;
|
|
|
|
/// \brief Peek at the next child element of the current parent, without traversing in (opening) it.
|
|
///
|
|
/// The element id is returned, which can be compared to ElementId labels.
|
|
/// If there are no remaining child elements to traverse, 0 is returned.
|
|
/// \return the element id or 0
|
|
virtual uint4 peekElement(void)=0;
|
|
|
|
/// \brief Open (traverse into) the next child element of the current parent.
|
|
///
|
|
/// The child becomes the current parent. The list of attributes is initialized for use with getNextAttributeId.
|
|
/// \return the id of the child element
|
|
virtual uint4 openElement(void)=0;
|
|
|
|
/// \brief Open (traverse into) the next child element, which must be of a specific type
|
|
///
|
|
/// The child becomes the current parent, and its attributes are initialized for use with getNextAttributeId.
|
|
/// The child must match the given element id or an exception is thrown.
|
|
/// \param elemId is the given element id to match
|
|
/// \return the id of the child element
|
|
virtual uint4 openElement(const ElementId &elemId)=0;
|
|
|
|
/// \brief Close the current element
|
|
///
|
|
/// The data for the current element is considered fully processed. If the element has additional children,
|
|
/// an exception is thrown. The stream must indicate the end of the element in some way.
|
|
/// \param id is the id of the element to close (which must be the current element)
|
|
virtual void closeElement(uint4 id)=0;
|
|
|
|
/// \brief Close the current element, skipping any child elements that have not yet been parsed
|
|
///
|
|
/// This closes the given element, which must be current. If there are child elements that have not been
|
|
/// parsed, this is not considered an error, and they are skipped over in the parse.
|
|
/// \param id is the id of the element to close (which must be the current element)
|
|
virtual void closeElementSkipping(uint4 id)=0;
|
|
|
|
/// \brief Get the next attribute id for the current element
|
|
///
|
|
/// Attributes are automatically set up for traversal using this method, when the element is opened.
|
|
/// If all attributes have been traversed (or there are no attributes), 0 is returned.
|
|
/// \return the id of the next attribute or 0
|
|
virtual uint4 getNextAttributeId(void)=0;
|
|
|
|
/// \brief Get the id for the (current) attribute, assuming it is indexed
|
|
///
|
|
/// Assuming the previous call to getNextAttributeId() returned the id of ATTRIB_UNKNOWN,
|
|
/// reinterpret the attribute as being an indexed form of the given attribute. If the attribute
|
|
/// matches, return this indexed id, otherwise return ATTRIB_UNKNOWN.
|
|
/// \param attribId is the attribute being indexed
|
|
/// \return the indexed id or ATTRIB_UNKNOWN
|
|
virtual uint4 getIndexedAttributeId(const AttributeId &attribId)=0;
|
|
|
|
/// \brief Reset attribute traversal for the current element
|
|
///
|
|
/// Attributes for a single element can be traversed more than once using the getNextAttributeId method.
|
|
virtual void rewindAttributes(void)=0;
|
|
|
|
/// \brief Parse the current attribute as a boolean value
|
|
///
|
|
/// The last attribute, as returned by getNextAttributeId, is treated as a boolean, and its value is returned.
|
|
/// \return the boolean value associated with the current attribute.
|
|
virtual bool readBool(void)=0;
|
|
|
|
/// \brief Find and parse a specific attribute in the current element as a boolean value
|
|
///
|
|
/// The set of attributes for the current element is searched for a match to the given attribute id.
|
|
/// This attribute is then parsed as a boolean and its value returned.
|
|
/// If there is no attribute matching the id, an exception is thrown.
|
|
/// Parsing via getNextAttributeId is reset.
|
|
/// \param attribId is the specific attribute id to match
|
|
/// \return the boolean value
|
|
virtual bool readBool(const AttributeId &attribId)=0;
|
|
|
|
/// \brief Parse the current attribute as a signed integer value
|
|
///
|
|
/// The last attribute, as returned by getNextAttributeId, is treated as a signed integer, and its value is returned.
|
|
/// \return the signed integer value associated with the current attribute.
|
|
virtual intb readSignedInteger(void)=0;
|
|
|
|
/// \brief Find and parse a specific attribute in the current element as a signed integer
|
|
///
|
|
/// The set of attributes for the current element is searched for a match to the given attribute id.
|
|
/// This attribute is then parsed as a signed integer and its value returned.
|
|
/// If there is no attribute matching the id, an exception is thrown.
|
|
/// Parsing via getNextAttributeId is reset.
|
|
/// \param attribId is the specific attribute id to match
|
|
/// \return the signed integer value
|
|
virtual intb readSignedInteger(const AttributeId &attribId)=0;
|
|
|
|
/// \brief Parse the current attribute as either a signed integer value or a string.
|
|
///
|
|
/// If the attribute is an integer, its value is returned. If the attribute is a string, it must match an
|
|
/// expected string passed to the method, and a predetermined integer value associated with the string is returned.
|
|
/// If the attribute neither matches the expected string nor is an integer, the return value is undefined.
|
|
/// \param expect is the string value to expect if the attribute is encoded as a string
|
|
/// \param expectval is the integer value to return if the attribute matches the expected string
|
|
/// \return the encoded integer or the integer value associated with the expected string
|
|
virtual intb readSignedIntegerExpectString(const string &expect,intb expectval)=0;
|
|
|
|
/// \brief Find and parse a specific attribute in the current element as either a signed integer or a string.
|
|
///
|
|
/// If the attribute is an integer, its value is parsed and returned.
|
|
/// If the attribute is encoded as a string, it must match an expected string passed to this method.
|
|
/// In this case, a predetermined integer value is passed back, indicating a matching string was parsed.
|
|
/// If the attribute neither matches the expected string nor is an integer, the return value is undefined.
|
|
/// If there is no attribute matching the id, an exception is thrown.
|
|
/// \param attribId is the specific attribute id to match
|
|
/// \param expect is the string to expect, if the attribute is not encoded as an integer
|
|
/// \param expectval is the integer value to return if the attribute matches the expected string
|
|
/// \return the encoded integer or the integer value associated with the expected string
|
|
virtual intb readSignedIntegerExpectString(const AttributeId &attribId,const string &expect,intb expectval)=0;
|
|
|
|
/// \brief Parse the current attribute as an unsigned integer value
|
|
///
|
|
/// The last attribute, as returned by getNextAttributeId, is treated as an unsigned integer, and its value is returned.
|
|
/// \return the unsigned integer value associated with the current attribute.
|
|
virtual uintb readUnsignedInteger(void)=0;
|
|
|
|
/// \brief Find and parse a specific attribute in the current element as an unsigned integer
|
|
///
|
|
/// The set of attributes for the current element is searched for a match to the given attribute id.
|
|
/// This attribute is then parsed as an unsigned integer and its value returned.
|
|
/// If there is no attribute matching the id, an exception is thrown.
|
|
/// Parsing via getNextAttributeId is reset.
|
|
/// \param attribId is the specific attribute id to match
|
|
/// \return the unsigned integer value
|
|
virtual uintb readUnsignedInteger(const AttributeId &attribId)=0;
|
|
|
|
/// \brief Parse the current attribute as a string
|
|
///
|
|
/// The last attribute, as returned by getNextAttributeId, is returned as a string.
|
|
/// \return the string associated with the current attribute.
|
|
virtual string readString(void)=0;
|
|
|
|
/// \brief Find the specific attribute in the current element and return it as a string
|
|
///
|
|
/// The set of attributes for the current element is searched for a match to the given attribute id.
|
|
/// This attribute is then returned as a string. If there is no attribute matching the id, and exception is thrown.
|
|
/// Parse via getNextAttributeId is reset.
|
|
/// \param attribId is the specific attribute id to match
|
|
/// \return the string associated with the attribute
|
|
virtual string readString(const AttributeId &attribId)=0;
|
|
|
|
/// \brief Parse the current attribute as an address space
|
|
///
|
|
/// The last attribute, as returned by getNextAttributeId, is returned as an address space.
|
|
/// \return the address space associated with the current attribute.
|
|
virtual AddrSpace *readSpace(void)=0;
|
|
|
|
/// \brief Find the specific attribute in the current element and return it as an address space
|
|
///
|
|
/// Search attributes from the current element for a match to the given attribute id.
|
|
/// Return this attribute as an address space. If there is no attribute matching the id, an exception is thrown.
|
|
/// Parse via getNextAttributeId is reset.
|
|
/// \param attribId is the specific attribute id to match
|
|
/// \return the address space associated with the attribute
|
|
virtual AddrSpace *readSpace(const AttributeId &attribId)=0;
|
|
|
|
/// \brief Parse the current attribute as a p-code OpCode
|
|
///
|
|
/// The last attribute, as returned by getNextAttributeId, is returned as an OpCode.
|
|
/// \return the OpCode associated with the current attribute
|
|
virtual OpCode readOpcode(void)=0;
|
|
|
|
/// \brief Find the specific attribute in the current element and return it as an OpCode
|
|
///
|
|
/// Search attributes from the current element for a match to the given attribute id.
|
|
/// Return this attribute as an OpCode. If there is no matching attribute id, an exception is thrown.
|
|
/// Parse via getNextAttributeId is reset.
|
|
/// \param attribId is the specific attribute id to match
|
|
/// \return the OpCode associated with the attribute
|
|
virtual OpCode readOpcode(AttributeId &attribId)=0;
|
|
|
|
/// \brief Skip parsing of the next element
|
|
///
|
|
/// The element skipped is the one that would be opened by the next call to openElement.
|
|
void skipElement(void) {
|
|
uint4 elemId = openElement();
|
|
closeElementSkipping(elemId);
|
|
}
|
|
};
|
|
|
|
/// \brief A class for writing structured data to a stream
|
|
///
|
|
/// The resulting encoded data is structured similarly to an XML document. The document contains a nested set
|
|
/// of \b elements, with labels corresponding to the ElementId class. A single element can hold
|
|
/// zero or more attributes and zero or more child elements. An \b attribute holds a primitive
|
|
/// data element (bool, integer, string) and is labeled by an AttributeId. The document is written
|
|
/// using a sequence of openElement() and closeElement() calls, intermixed with write*() calls to encode
|
|
/// the data primitives. All primitives written using a write*() call are associated with current open element,
|
|
/// and all write*() calls for one element must come before opening any child element.
|
|
/// The traditional XML element text content can be written using the special ATTRIB_CONTENT AttributeId, which
|
|
/// must be the last write*() call associated with the specific element.
|
|
class Encoder {
|
|
public:
|
|
virtual ~Encoder(void) {} ///< Destructor
|
|
|
|
/// \brief Begin a new element in the encoding
|
|
///
|
|
/// The element will have the given ElementId annotation and becomes the \e current element.
|
|
/// \param elemId is the given ElementId annotation
|
|
virtual void openElement(const ElementId &elemId)=0;
|
|
|
|
/// \brief End the current element in the encoding
|
|
///
|
|
/// The current element must match the given annotation or an exception is thrown.
|
|
/// \param elemId is the given (expected) annotation for the current element
|
|
virtual void closeElement(const ElementId &elemId)=0;
|
|
|
|
/// \brief Write an annotated boolean value into the encoding
|
|
///
|
|
/// The boolean data is associated with the given AttributeId annotation and the current open element.
|
|
/// \param attribId is the given AttributeId annotation
|
|
/// \param val is boolean value to encode
|
|
virtual void writeBool(const AttributeId &attribId,bool val)=0;
|
|
|
|
/// \brief Write an annotated signed integer value into the encoding
|
|
///
|
|
/// The integer is associated with the given AttributeId annotation and the current open element.
|
|
/// \param attribId is the given AttributeId annotation
|
|
/// \param val is the signed integer value to encode
|
|
virtual void writeSignedInteger(const AttributeId &attribId,intb val)=0;
|
|
|
|
/// \brief Write an annotated unsigned integer value into the encoding
|
|
///
|
|
/// The integer is associated with the given AttributeId annotation and the current open element.
|
|
/// \param attribId is the given AttributeId annotation
|
|
/// \param val is the unsigned integer value to encode
|
|
virtual void writeUnsignedInteger(const AttributeId &attribId,uintb val)=0;
|
|
|
|
/// \brief Write an annotated string into the encoding
|
|
///
|
|
/// The string is associated with the given AttributeId annotation and the current open element.
|
|
/// \param attribId is the given AttributeId annotation
|
|
/// \param val is the string to encode
|
|
virtual void writeString(const AttributeId &attribId,const string &val)=0;
|
|
|
|
/// \brief Write an annotated string, using an indexed attribute, into the encoding
|
|
///
|
|
/// Multiple attributes with a shared name can be written to the same element by calling this method
|
|
/// multiple times with a different \b index value. The encoding will use attribute ids up to the base id
|
|
/// plus the maximum index passed in. Implementors must be careful to not use other attributes with ids
|
|
/// bigger than the base id within the element taking the indexed attribute.
|
|
/// \param attribId is the shared AttributeId
|
|
/// \param index is the unique index to associated with the string
|
|
/// \param val is the string to encode
|
|
virtual void writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val)=0;
|
|
|
|
/// \brief Write an address space reference into the encoding
|
|
///
|
|
/// The address space is associated with the given AttributeId annotation and the current open element.
|
|
/// \param attribId is the given AttributeId annotation
|
|
/// \param spc is the address space to encode
|
|
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc)=0;
|
|
|
|
/// \brief Write a p-code operation opcode into the encoding, associating it with the given annotation
|
|
///
|
|
/// \param attribId is the given annotation
|
|
/// \param opc is the opcode
|
|
virtual void writeOpcode(const AttributeId &attribId,OpCode opc)=0;
|
|
|
|
};
|
|
|
|
/// \brief An XML based decoder
|
|
///
|
|
/// The underlying transfer encoding is an XML document. The decoder can either be initialized with an
|
|
/// existing Element as the root of the data to transfer, or the ingestStream() method can be invoked
|
|
/// to read the XML document from an input stream, in which case the decoder manages the Document object.
|
|
class XmlDecode : public Decoder {
|
|
Document *document; ///< An ingested XML document, owned by \b this decoder
|
|
const Element *rootElement; ///< The root XML element to be decoded
|
|
vector<const Element *> elStack; ///< Stack of currently \e open elements
|
|
vector<List::const_iterator> iterStack; ///< Index of next child for each \e open element
|
|
int4 attributeIndex; ///< Position of \e current attribute to parse (in \e current element)
|
|
int4 scope; ///< Scope of element/attribute tags to look up
|
|
int4 findMatchingAttribute(const Element *el,const string &attribName);
|
|
public:
|
|
XmlDecode(const AddrSpaceManager *spc,const Element *root,int4 sc=0) : Decoder(spc) {
|
|
document = (Document *)0; rootElement = root; attributeIndex = -1; scope = sc; } ///< Constructor with preparsed root
|
|
XmlDecode(const AddrSpaceManager *spc,int4 sc=0) : Decoder(spc) {
|
|
document = (Document *)0; rootElement = (const Element *)0; attributeIndex = -1; scope=sc; } ///< Constructor for use with ingestStream
|
|
const Element *getCurrentXmlElement(void) const { return elStack.back(); } ///< Get pointer to underlying XML element object
|
|
virtual ~XmlDecode(void);
|
|
virtual void ingestStream(istream &s);
|
|
virtual uint4 peekElement(void);
|
|
virtual uint4 openElement(void);
|
|
virtual uint4 openElement(const ElementId &elemId);
|
|
virtual void closeElement(uint4 id);
|
|
virtual void closeElementSkipping(uint4 id);
|
|
virtual void rewindAttributes(void);
|
|
virtual uint4 getNextAttributeId(void);
|
|
virtual uint4 getIndexedAttributeId(const AttributeId &attribId);
|
|
virtual bool readBool(void);
|
|
virtual bool readBool(const AttributeId &attribId);
|
|
virtual intb readSignedInteger(void);
|
|
virtual intb readSignedInteger(const AttributeId &attribId);
|
|
virtual intb readSignedIntegerExpectString(const string &expect,intb expectval);
|
|
virtual intb readSignedIntegerExpectString(const AttributeId &attribId,const string &expect,intb expectval);
|
|
virtual uintb readUnsignedInteger(void);
|
|
virtual uintb readUnsignedInteger(const AttributeId &attribId);
|
|
virtual string readString(void);
|
|
virtual string readString(const AttributeId &attribId);
|
|
virtual AddrSpace *readSpace(void);
|
|
virtual AddrSpace *readSpace(const AttributeId &attribId);
|
|
virtual OpCode readOpcode(void);
|
|
virtual OpCode readOpcode(AttributeId &attribId);
|
|
};
|
|
|
|
/// \brief An XML based encoder
|
|
///
|
|
/// The underlying transfer encoding is an XML document. The encoder is initialized with a stream which will
|
|
/// receive the XML document as calls are made on the encoder.
|
|
class XmlEncode : public Encoder {
|
|
friend class XmlDecode;
|
|
enum {
|
|
tag_start = 0, ///< Tag has been opened, attributes can be written
|
|
tag_content = 1, ///< Opening tag and content have been written
|
|
tag_stop = 2 ///< No tag is currently being written
|
|
};
|
|
static const char spaces[]; ///< Array of ' ' characters for emitting indents
|
|
static const int4 MAX_SPACES; ///< Maximum number of leading spaces when indenting XML
|
|
ostream &outStream; ///< The stream receiving the encoded data
|
|
int4 tagStatus; ///< Stage of writing an element tag
|
|
int4 depth; ///< Depth of open elements
|
|
bool doFormatting; ///< \b true if encoder should indent and emit newlines
|
|
void newLine(void); ///< Emit a newline and proper indenting for the next tag
|
|
public:
|
|
XmlEncode(ostream &s,bool doFormat=true) : outStream(s) { depth=0; tagStatus=tag_stop; doFormatting=doFormat; } ///< Construct from a stream
|
|
virtual void openElement(const ElementId &elemId);
|
|
virtual void closeElement(const ElementId &elemId);
|
|
virtual void writeBool(const AttributeId &attribId,bool val);
|
|
virtual void writeSignedInteger(const AttributeId &attribId,intb val);
|
|
virtual void writeUnsignedInteger(const AttributeId &attribId,uintb val);
|
|
virtual void writeString(const AttributeId &attribId,const string &val);
|
|
virtual void writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val);
|
|
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
|
|
virtual void writeOpcode(const AttributeId &attribId,OpCode opc);
|
|
};
|
|
|
|
/// \brief Protocol format for PackedEncode and PackedDecode classes
|
|
///
|
|
/// All bytes in the encoding are expected to be non-zero. Element encoding looks like
|
|
/// - 01xiiiii is an element start
|
|
/// - 10xiiiii is an element end
|
|
/// - 11xiiiii is an attribute start
|
|
///
|
|
/// Where iiiii is the (first) 5 bits of the element/attribute id.
|
|
/// If x=0, the id is complete. If x=1, the next byte contains 7 more bits of the id: 1iiiiiii
|
|
///
|
|
/// After an attribute start, there follows a \e type byte: ttttllll, where the first 4 bits indicate the
|
|
/// type of attribute and final 4 bits are a \b length \b code. The types are:
|
|
/// - 1 = boolean (lengthcode=0 for false, lengthcode=1 for true)
|
|
/// - 2 = positive signed integer
|
|
/// - 3 = negative signed integer (stored in negated form)
|
|
/// - 4 = unsigned integer
|
|
/// - 5 = basic address space (encoded as the integer index of the space)
|
|
/// - 6 = special address space (lengthcode 0=>stack 1=>join 2=>fspec 3=>iop)
|
|
/// - 7 = string
|
|
///
|
|
/// All attribute types except \e boolean and \e special, have an encoded integer after the \e type byte.
|
|
/// The \b length \b code, indicates the number bytes used to encode the integer, 7-bits of info per byte, 1iiiiiii.
|
|
/// A \b length \b code of zero is used to encode an integer value of 0, with no following bytes.
|
|
///
|
|
/// For strings, the integer encoded after the \e type byte, is the actual length of the string. The
|
|
/// string data itself is stored immediately after the length integer using UTF8 format.
|
|
namespace PackedFormat {
|
|
static const uint1 HEADER_MASK = 0xc0; ///< Bits encoding the record type
|
|
static const uint1 ELEMENT_START = 0x40; ///< Header for an element start record
|
|
static const uint1 ELEMENT_END = 0x80; ///< Header for an element end record
|
|
static const uint1 ATTRIBUTE = 0xc0; ///< Header for an attribute record
|
|
static const uint1 HEADEREXTEND_MASK = 0x20; ///< Bit indicating the id extends into the next byte
|
|
static const uint1 ELEMENTID_MASK = 0x1f; ///< Bits encoding (part of) the id in the record header
|
|
static const uint1 RAWDATA_MASK = 0x7f; ///< Bits of raw data in follow-on bytes
|
|
static const int4 RAWDATA_BITSPERBYTE = 7; ///< Number of bits used in a follow-on byte
|
|
static const uint1 RAWDATA_MARKER = 0x80; ///< The unused bit in follow-on bytes. (Always set to 1)
|
|
static const int4 TYPECODE_SHIFT = 4; ///< Bit position of the type code in the type byte
|
|
static const uint1 LENGTHCODE_MASK = 0xf; ///< Bits in the type byte forming the length code
|
|
static const uint1 TYPECODE_BOOLEAN = 1; ///< Type code for the \e boolean type
|
|
static const uint1 TYPECODE_SIGNEDINT_POSITIVE = 2; ///< Type code for the \e signed \e positive \e integer type
|
|
static const uint1 TYPECODE_SIGNEDINT_NEGATIVE = 3; ///< Type code for the \e signed \e negative \e integer type
|
|
static const uint1 TYPECODE_UNSIGNEDINT = 4; ///< Type code for the \e unsigned \e integer type
|
|
static const uint1 TYPECODE_ADDRESSSPACE = 5; ///< Type code for the \e address \e space type
|
|
static const uint1 TYPECODE_SPECIALSPACE = 6; ///< Type code for the \e special \e address \e space type
|
|
static const uint1 TYPECODE_STRING = 7; ///< Type code for the \e string type
|
|
static const uint4 SPECIALSPACE_STACK = 0; ///< Special code for the \e stack space
|
|
static const uint4 SPECIALSPACE_JOIN = 1; ///< Special code for the \e join space
|
|
static const uint4 SPECIALSPACE_FSPEC = 2; ///< Special code for the \e fspec space
|
|
static const uint4 SPECIALSPACE_IOP = 3; ///< Special code for the \e iop space
|
|
static const uint4 SPECIALSPACE_SPACEBASE = 4; ///< Special code for a \e spacebase space
|
|
}
|
|
|
|
/// \brief A byte-based decoder designed to marshal info to the decompiler efficiently
|
|
///
|
|
/// The decoder expects an encoding as described in PackedFormat. When ingested, the stream bytes are
|
|
/// held in a sequence of arrays (ByteChunk). During decoding, \b this object maintains a Position in the
|
|
/// stream at the start and end of the current open element, and a Position of the next attribute to read to
|
|
/// facilitate getNextAttributeId() and associated read*() methods.
|
|
class PackedDecode : public Decoder {
|
|
public:
|
|
static const int4 BUFFER_SIZE; ///< The size, in bytes, of a single cached chunk of the input stream
|
|
private:
|
|
/// \brief A bounded array of bytes
|
|
class ByteChunk {
|
|
friend class PackedDecode;
|
|
uint1 *start; ///< Start of the byte array
|
|
uint1 *end; ///< End of the byte array
|
|
public:
|
|
ByteChunk(uint1 *s,uint1 *e) { start = s; end = e; } ///< Constructor
|
|
};
|
|
/// \brief An iterator into input stream
|
|
class Position {
|
|
friend class PackedDecode;
|
|
list<ByteChunk>::const_iterator seqIter; ///< Current byte sequence
|
|
uint1 *current; ///< Current position in sequence
|
|
uint1 *end; ///< End of current sequence
|
|
};
|
|
list<ByteChunk> inStream; ///< Incoming raw data as a sequence of byte arrays
|
|
Position startPos; ///< Position at the start of the current open element
|
|
Position curPos; ///< Position of the next attribute as returned by getNextAttributeId
|
|
Position endPos; ///< Ending position after all attributes in current open element
|
|
bool attributeRead; ///< Has the last attribute returned by getNextAttributeId been read
|
|
uint1 getByte(Position &pos) { return *pos.current; } ///< Get the byte at the current position, do not advance
|
|
uint1 getBytePlus1(Position &pos); ///< Get the byte following the current byte, do not advance position
|
|
uint1 getNextByte(Position &pos); ///< Get the byte at the current position and advance to the next byte
|
|
void advancePosition(Position &pos,int4 skip); ///< Advance the position by the given number of bytes
|
|
uint8 readInteger(int4 len); ///< Read an integer from the \e current position given its length in bytes
|
|
uint4 readLengthCode(uint1 typeByte) { return ((uint4)typeByte & PackedFormat::LENGTHCODE_MASK); } ///< Extract length code from type byte
|
|
void findMatchingAttribute(const AttributeId &attribId); ///< Find attribute matching the given id in open element
|
|
void skipAttribute(void); ///< Skip over the attribute at the current position
|
|
void skipAttributeRemaining(uint1 typeByte); ///< Skip over remaining attribute data, after a mismatch
|
|
protected:
|
|
uint1 *allocateNextInputBuffer(int4 pad); ///< Allocate the next chunk of space in the input stream
|
|
void endIngest(int4 bufPos); ///< Finish set up for reading input stream
|
|
public:
|
|
PackedDecode(const AddrSpaceManager *spcManager) : Decoder(spcManager) {} ///< Constructor
|
|
virtual ~PackedDecode(void);
|
|
virtual void ingestStream(istream &s);
|
|
virtual uint4 peekElement(void);
|
|
virtual uint4 openElement(void);
|
|
virtual uint4 openElement(const ElementId &elemId);
|
|
virtual void closeElement(uint4 id);
|
|
virtual void closeElementSkipping(uint4 id);
|
|
virtual void rewindAttributes(void);
|
|
virtual uint4 getNextAttributeId(void);
|
|
virtual uint4 getIndexedAttributeId(const AttributeId &attribId);
|
|
virtual bool readBool(void);
|
|
virtual bool readBool(const AttributeId &attribId);
|
|
virtual intb readSignedInteger(void);
|
|
virtual intb readSignedInteger(const AttributeId &attribId);
|
|
virtual intb readSignedIntegerExpectString(const string &expect,intb expectval);
|
|
virtual intb readSignedIntegerExpectString(const AttributeId &attribId,const string &expect,intb expectval);
|
|
virtual uintb readUnsignedInteger(void);
|
|
virtual uintb readUnsignedInteger(const AttributeId &attribId);
|
|
virtual string readString(void);
|
|
virtual string readString(const AttributeId &attribId);
|
|
virtual AddrSpace *readSpace(void);
|
|
virtual AddrSpace *readSpace(const AttributeId &attribId);
|
|
virtual OpCode readOpcode(void);
|
|
virtual OpCode readOpcode(AttributeId &attribId);
|
|
};
|
|
|
|
/// \brief A byte-based encoder designed to marshal from the decompiler efficiently
|
|
///
|
|
/// See PackedDecode for details of the encoding format.
|
|
class PackedEncode : public Encoder {
|
|
ostream &outStream; ///< The stream receiving the encoded data
|
|
void writeHeader(uint1 header,uint4 id); ///< Write a header, element or attribute, to stream
|
|
void writeInteger(uint1 typeByte,uint8 val); ///< Write an integer value to the stream
|
|
public:
|
|
PackedEncode(ostream &s) : outStream(s) {} ///< Construct from a stream
|
|
virtual void openElement(const ElementId &elemId);
|
|
virtual void closeElement(const ElementId &elemId);
|
|
virtual void writeBool(const AttributeId &attribId,bool val);
|
|
virtual void writeSignedInteger(const AttributeId &attribId,intb val);
|
|
virtual void writeUnsignedInteger(const AttributeId &attribId,uintb val);
|
|
virtual void writeString(const AttributeId &attribId,const string &val);
|
|
virtual void writeStringIndexed(const AttributeId &attribId,uint4 index,const string &val);
|
|
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
|
|
virtual void writeOpcode(const AttributeId &attribId,OpCode opc);
|
|
};
|
|
|
|
/// An exception is thrown if the position currently points to the last byte in the stream
|
|
/// \param pos is the position in the stream to look ahead from
|
|
/// \return the next byte
|
|
inline uint1 PackedDecode::getBytePlus1(Position &pos)
|
|
|
|
{
|
|
uint1 *ptr = pos.current + 1;
|
|
if (ptr == pos.end) {
|
|
list<ByteChunk>::const_iterator iter = pos.seqIter;
|
|
++iter;
|
|
if (iter == inStream.end())
|
|
throw DecoderError("Unexpected end of stream");
|
|
ptr = (*iter).start;
|
|
}
|
|
return *ptr;
|
|
}
|
|
|
|
/// An exception is thrown if there are no additional bytes in the stream
|
|
/// \param pos is the position of the byte
|
|
/// \return the byte at the current position
|
|
inline uint1 PackedDecode::getNextByte(Position &pos)
|
|
|
|
{
|
|
uint1 res = *pos.current;
|
|
pos.current += 1;
|
|
if (pos.current != pos.end)
|
|
return res;
|
|
++pos.seqIter;
|
|
if (pos.seqIter == inStream.end())
|
|
throw DecoderError("Unexpected end of stream");
|
|
pos.current = (*pos.seqIter).start;
|
|
pos.end = (*pos.seqIter).end;
|
|
return res;
|
|
}
|
|
|
|
/// An exception is thrown of position is advanced past the end of the stream
|
|
/// \param pos is the position being advanced
|
|
/// \param skip is the number of bytes to advance
|
|
inline void PackedDecode::advancePosition(Position &pos,int4 skip)
|
|
|
|
{
|
|
while(pos.end - pos.current <= skip) {
|
|
skip -= (pos.end - pos.current);
|
|
++pos.seqIter;
|
|
if (pos.seqIter == inStream.end())
|
|
throw DecoderError("Unexpected end of stream");
|
|
pos.current = (*pos.seqIter).start;
|
|
pos.end = (*pos.seqIter).end;
|
|
}
|
|
pos.current += skip;
|
|
}
|
|
|
|
/// Allocate an array of BUFFER_SIZE bytes and add it to the in-memory stream
|
|
/// \param pad is the number of bytes of padding to add to the allocation size, above BUFFER_SIZE
|
|
/// \return the newly allocated buffer
|
|
inline uint1 *PackedDecode::allocateNextInputBuffer(int4 pad)
|
|
|
|
{
|
|
uint1 *buf = new uint1[BUFFER_SIZE + pad];
|
|
inStream.emplace_back(buf,buf+BUFFER_SIZE);
|
|
return buf;
|
|
}
|
|
|
|
/// \param header is the type of header
|
|
/// \param id is the id associated with the element or attribute
|
|
inline void PackedEncode::writeHeader(uint1 header,uint4 id)
|
|
|
|
{
|
|
if (id > 0x1f) {
|
|
header |= PackedFormat::HEADEREXTEND_MASK;
|
|
header |= (id >> PackedFormat::RAWDATA_BITSPERBYTE);
|
|
uint1 extendByte = (id & PackedFormat::RAWDATA_MASK) | PackedFormat::RAWDATA_MARKER;
|
|
outStream.put(header);
|
|
outStream.put(extendByte);
|
|
}
|
|
else {
|
|
header |= id;
|
|
outStream.put(header);
|
|
}
|
|
}
|
|
|
|
extern ElementId ELEM_UNKNOWN; ///< Special element to represent an element with an unrecognized name
|
|
extern AttributeId ATTRIB_UNKNOWN; ///< Special attribute to represent an attribute with an unrecognized name
|
|
extern AttributeId ATTRIB_CONTENT; ///< Special attribute for XML text content of an element
|
|
|
|
/// The name is looked up in the scoped list of attributes. If the attribute is not in the list, a special
|
|
/// placeholder attribute, ATTRIB_UNKNOWN, is returned as a placeholder for attributes with unrecognized names.
|
|
/// \param nm is the name of the attribute
|
|
/// \param scope is the id of the scope in which to lookup of the name
|
|
/// \return the associated id
|
|
inline uint4 AttributeId::find(const string &nm,int4 scope)
|
|
|
|
{
|
|
if (scope == 0) { // Current only support reverse look up for scope 0
|
|
unordered_map<string,uint4>::const_iterator iter = lookupAttributeId.find(nm);
|
|
if (iter != lookupAttributeId.end())
|
|
return (*iter).second;
|
|
}
|
|
return ATTRIB_UNKNOWN.id;
|
|
}
|
|
|
|
/// The name is looked up in the scoped list of elements. If the element is not in the list, a special
|
|
/// placeholder element, ELEM_UNKNOWN, is returned as a placeholder for elements with unrecognized names.
|
|
/// \param nm is the name of the element
|
|
/// \param scope is the id of the scope in which to search
|
|
/// \return the associated id
|
|
inline uint4 ElementId::find(const string &nm,int4 scope)
|
|
|
|
{
|
|
if (scope == 0) {
|
|
unordered_map<string,uint4>::const_iterator iter = lookupElementId.find(nm);
|
|
if (iter != lookupElementId.end())
|
|
return (*iter).second;
|
|
}
|
|
return ELEM_UNKNOWN.id;
|
|
}
|
|
|
|
extern AttributeId ATTRIB_ALIGN; ///< Marshaling attribute "align"
|
|
extern AttributeId ATTRIB_BIGENDIAN; ///< Marshaling attribute "bigendian"
|
|
extern AttributeId ATTRIB_CONSTRUCTOR; ///< Marshaling attribute "constructor"
|
|
extern AttributeId ATTRIB_DESTRUCTOR; ///< Marshaling attribute "destructor"
|
|
extern AttributeId ATTRIB_EXTRAPOP; ///< Marshaling attribute "extrapop"
|
|
extern AttributeId ATTRIB_FORMAT; ///< Marshaling attribute "format"
|
|
extern AttributeId ATTRIB_HIDDENRETPARM; ///< Marshaling attribute "hiddenretparm"
|
|
extern AttributeId ATTRIB_ID; ///< Marshaling attribute "id"
|
|
extern AttributeId ATTRIB_INDEX; ///< Marshaling attribute "index"
|
|
extern AttributeId ATTRIB_INDIRECTSTORAGE; ///< Marshaling attribute "indirectstorage"
|
|
extern AttributeId ATTRIB_METATYPE; ///< Marshaling attribute "metatype"
|
|
extern AttributeId ATTRIB_MODEL; ///< Marshaling attribute "model"
|
|
extern AttributeId ATTRIB_NAME; ///< Marshaling attribute "name"
|
|
extern AttributeId ATTRIB_NAMELOCK; ///< Marshaling attribute "namelock"
|
|
extern AttributeId ATTRIB_OFFSET; ///< Marshaling attribute "offset"
|
|
extern AttributeId ATTRIB_READONLY; ///< Marshaling attribute "readonly"
|
|
extern AttributeId ATTRIB_REF; ///< Marshaling attribute "ref"
|
|
extern AttributeId ATTRIB_SIZE; ///< Marshaling attribute "size"
|
|
extern AttributeId ATTRIB_SPACE; ///< Marshaling attribute "space"
|
|
extern AttributeId ATTRIB_THISPTR; ///< Marshaling attribute "thisptr"
|
|
extern AttributeId ATTRIB_TYPE; ///< Marshaling attribute "type"
|
|
extern AttributeId ATTRIB_TYPELOCK; ///< Marshaling attribute "typelock"
|
|
extern AttributeId ATTRIB_VAL; ///< Marshaling attribute "val"
|
|
extern AttributeId ATTRIB_VALUE; ///< Marshaling attribute "value"
|
|
extern AttributeId ATTRIB_WORDSIZE; ///< Marshaling attribute "wordsize"
|
|
extern AttributeId ATTRIB_STORAGE; ///< Marshaling attribute "storage"
|
|
extern AttributeId ATTRIB_STACKSPILL; ///< Marshaling attribute "stackspill"
|
|
|
|
extern ElementId ELEM_DATA; ///< Marshaling element \<data>
|
|
extern ElementId ELEM_INPUT; ///< Marshaling element \<input>
|
|
extern ElementId ELEM_OFF; ///< Marshaling element \<off>
|
|
extern ElementId ELEM_OUTPUT; ///< Marshaling element \<output>
|
|
extern ElementId ELEM_RETURNADDRESS; ///< Marshaling element \<returnaddress>
|
|
extern ElementId ELEM_SYMBOL; ///< Marshaling element \<symbol>
|
|
extern ElementId ELEM_TARGET; ///< Marshaling element \<target>
|
|
extern ElementId ELEM_VAL; ///< Marshaling element \<val>
|
|
extern ElementId ELEM_VALUE; ///< Marshaling element \<value>
|
|
extern ElementId ELEM_VOID; ///< Marshaling element \<void>
|
|
|
|
} // End namespace ghidra
|
|
#endif
|