mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-04 18:29:37 +02:00
GP-2358 Packed protocol for decompiler marshaling
This commit is contained in:
parent
6a1a649213
commit
79c3508f54
119 changed files with 4238 additions and 2207 deletions
|
@ -17,6 +17,7 @@
|
|||
#define __CPUI_MARSHAL__
|
||||
|
||||
#include "xml.hh"
|
||||
#include <list>
|
||||
#include <unordered_map>
|
||||
|
||||
using namespace std;
|
||||
|
@ -100,11 +101,6 @@ public:
|
|||
const AddrSpaceManager *getAddrSpaceManager(void) const { return spcManager; } ///< Get the manager used for address space decoding
|
||||
virtual ~Decoder(void) {} ///< Destructor
|
||||
|
||||
/// \brief Clear any current decoding state
|
||||
///
|
||||
/// Allows the same decoder to be reused. Object is ready for new call to ingestStream.
|
||||
virtual void clear(void)=0;
|
||||
|
||||
/// \brief Prepare to decode a given stream
|
||||
///
|
||||
/// Called once before any decoding. Currently this is assumed to make an internal copy of the stream data,
|
||||
|
@ -262,11 +258,6 @@ class Encoder {
|
|||
public:
|
||||
virtual ~Encoder(void) {} ///< Destructor
|
||||
|
||||
/// \brief Clear any state associated with the encoder
|
||||
///
|
||||
/// The encoder should be ready to write a new document after this call.
|
||||
virtual void clear(void)=0;
|
||||
|
||||
/// \brief Begin a new element in the encoding
|
||||
///
|
||||
/// The element will have the given ElementId annotation and becomes the \e current element.
|
||||
|
@ -333,7 +324,6 @@ public:
|
|||
XmlDecode(const AddrSpaceManager *spc) : Decoder(spc) {
|
||||
document = (Document *)0; rootElement = (const Element *)0; attributeIndex = -1; } ///< Constructor for use with ingestStream
|
||||
virtual ~XmlDecode(void);
|
||||
virtual void clear(void);
|
||||
virtual void ingestStream(istream &s);
|
||||
virtual uint4 peekElement(void);
|
||||
virtual uint4 openElement(void);
|
||||
|
@ -364,7 +354,6 @@ class XmlEncode : public Encoder {
|
|||
bool elementTagIsOpen; ///< If \b true, new attributes can be written to the current element
|
||||
public:
|
||||
XmlEncode(ostream &s) : outStream(s) { elementTagIsOpen = false; } ///< Construct from a stream
|
||||
virtual void clear(void) { elementTagIsOpen = false; }
|
||||
virtual void openElement(const ElementId &elemId);
|
||||
virtual void closeElement(const ElementId &elemId);
|
||||
virtual void writeBool(const AttributeId &attribId,bool val);
|
||||
|
@ -374,6 +363,206 @@ public:
|
|||
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
|
||||
};
|
||||
|
||||
/// \brief Protocol format for PackedEncode and PackedDecode classes
|
||||
///
|
||||
/// All bytes in the encoding are expected to be non-zero. Element encoding looks like
|
||||
/// - 01xiiiii is an element start
|
||||
/// - 10xiiiii is an element end
|
||||
/// - 11xiiiii is an attribute start
|
||||
///
|
||||
/// Where iiiii is the (first) 5 bits of the element/attribute id.
|
||||
/// If x=0, the id is complete. If x=1, the next byte contains 7 more bits of the id: 1iiiiiii
|
||||
///
|
||||
/// After an attribute start, there follows a \e type byte: ttttllll, where the first 4 bits indicate the
|
||||
/// type of attribute and final 4 bits are a \b length \b code. The types are:
|
||||
/// - 1 = boolean (lengthcode=0 for false, lengthcode=1 for true)
|
||||
/// - 2 = positive signed integer
|
||||
/// - 3 = negative signed integer (stored in negated form)
|
||||
/// - 4 = unsigned integer
|
||||
/// - 5 = basic address space (encoded as the integer index of the space)
|
||||
/// - 6 = special address space (lengthcode 0=>stack 1=>join 2=>fspec 3=>iop)
|
||||
/// - 7 = string
|
||||
///
|
||||
/// All attribute types except \e boolean and \e special, have an encoded integer after the \e type byte.
|
||||
/// The \b length \b code, indicates the number bytes used to encode the integer, 7-bits of info per byte, 1iiiiiii.
|
||||
/// A \b length \b code of zero is used to encode an integer value of 0, with no following bytes.
|
||||
///
|
||||
/// For strings, the integer encoded after the \e type byte, is the actual length of the string. The
|
||||
/// string data itself is stored immediately after the length integer using UTF8 format.
|
||||
namespace PackedFormat {
|
||||
static const uint1 HEADER_MASK = 0xc0;
|
||||
static const uint1 ELEMENT_START = 0x40;
|
||||
static const uint1 ELEMENT_END = 0x80;
|
||||
static const uint1 ATTRIBUTE = 0xc0;
|
||||
static const uint1 HEADEREXTEND_MASK = 0x20;
|
||||
static const uint1 ELEMENTID_MASK = 0x1f;
|
||||
static const uint1 RAWDATA_MASK = 0x7f;
|
||||
static const int4 RAWDATA_BITSPERBYTE = 7;
|
||||
static const uint1 RAWDATA_MARKER = 0x80;
|
||||
static const int4 TYPECODE_SHIFT = 4;
|
||||
static const uint1 LENGTHCODE_MASK = 0xf;
|
||||
static const uint1 TYPECODE_BOOLEAN = 1;
|
||||
static const uint1 TYPECODE_SIGNEDINT_POSITIVE = 2;
|
||||
static const uint1 TYPECODE_SIGNEDINT_NEGATIVE = 3;
|
||||
static const uint1 TYPECODE_UNSIGNEDINT = 4;
|
||||
static const uint1 TYPECODE_ADDRESSSPACE = 5;
|
||||
static const uint1 TYPECODE_SPECIALSPACE = 6;
|
||||
static const uint1 TYPECODE_STRING = 7;
|
||||
static const uint4 SPECIALSPACE_STACK = 0;
|
||||
static const uint4 SPECIALSPACE_JOIN = 1;
|
||||
static const uint4 SPECIALSPACE_FSPEC = 2;
|
||||
static const uint4 SPECIALSPACE_IOP = 3;
|
||||
static const uint4 SPECIALSPACE_SPACEBASE = 4;
|
||||
}
|
||||
|
||||
/// \brief A byte-based decoder designed to marshal info to the decompiler efficiently
|
||||
///
|
||||
/// The decoder expects an encoding as described in PackedFormat. When ingested, the stream bytes are
|
||||
/// held in a sequence of arrays (ByteChunk). During decoding, \b this object maintains a Position in the
|
||||
/// stream at the start and end of the current open element, and a Position of the next attribute to read to
|
||||
/// facilitate getNextAttributeId() and associated read*() methods.
|
||||
class PackedDecode : public Decoder {
|
||||
public:
|
||||
static const int4 BUFFER_SIZE; ///< The size, in bytes, of a single cached chunk of the input stream
|
||||
private:
|
||||
/// \brief A bounded array of bytes
|
||||
class ByteChunk {
|
||||
friend class PackedDecode;
|
||||
uint1 *start; ///< Start of the byte array
|
||||
uint1 *end; ///< End of the byte array
|
||||
public:
|
||||
ByteChunk(uint1 *s,uint1 *e) { start = s; end = e; } ///< Constructor
|
||||
};
|
||||
/// \brief An iterator into input stream
|
||||
class Position {
|
||||
friend class PackedDecode;
|
||||
list<ByteChunk>::const_iterator seqIter; ///< Current byte sequence
|
||||
uint1 *current; ///< Current position in sequence
|
||||
uint1 *end; ///< End of current sequence
|
||||
};
|
||||
list<ByteChunk> inStream; ///< Incoming raw data as a sequence of byte arrays
|
||||
Position startPos; ///< Position at the start of the current open element
|
||||
Position curPos; ///< Position of the next attribute as returned by getNextAttributeId
|
||||
Position endPos; ///< Ending position after all attributes in current open element
|
||||
bool attributeRead; ///< Has the last attribute returned by getNextAttributeId been read
|
||||
uint1 getByte(Position &pos) { return *pos.current; } ///< Get the byte at the current position, do not advance
|
||||
uint1 getBytePlus1(Position &pos); ///< Get the byte following the current byte, do not advance position
|
||||
uint1 getNextByte(Position &pos); ///< Get the byte at the current position and advance to the next byte
|
||||
void advancePosition(Position &pos,int4 skip); ///< Advance the position by the given number of bytes
|
||||
uint8 readInteger(int4 len); ///< Read an integer from the \e current position given its length in bytes
|
||||
uint4 readLengthCode(uint1 typeByte) { return ((uint4)typeByte & PackedFormat::LENGTHCODE_MASK); } ///< Extract length code from type byte
|
||||
void findMatchingAttribute(const AttributeId &attribId); ///< Find attribute matching the given id in open element
|
||||
void skipAttribute(void); ///< Skip over the attribute at the current position
|
||||
void skipAttributeRemaining(uint1 typeByte); ///< Skip over remaining attribute data, after a mismatch
|
||||
public:
|
||||
PackedDecode(const AddrSpaceManager *spcManager) : Decoder(spcManager) {} ///< Constructor
|
||||
virtual ~PackedDecode(void);
|
||||
virtual void ingestStream(istream &s);
|
||||
virtual uint4 peekElement(void);
|
||||
virtual uint4 openElement(void);
|
||||
virtual uint4 openElement(const ElementId &elemId);
|
||||
virtual void closeElement(uint4 id);
|
||||
virtual void closeElementSkipping(uint4 id);
|
||||
virtual void rewindAttributes(void);
|
||||
virtual uint4 getNextAttributeId(void);
|
||||
virtual bool readBool(void);
|
||||
virtual bool readBool(const AttributeId &attribId);
|
||||
virtual intb readSignedInteger(void);
|
||||
virtual intb readSignedInteger(const AttributeId &attribId);
|
||||
virtual uintb readUnsignedInteger(void);
|
||||
virtual uintb readUnsignedInteger(const AttributeId &attribId);
|
||||
virtual string readString(void);
|
||||
virtual string readString(const AttributeId &attribId);
|
||||
virtual AddrSpace *readSpace(void);
|
||||
virtual AddrSpace *readSpace(const AttributeId &attribId);
|
||||
};
|
||||
|
||||
/// \brief A byte-based encoder designed to marshal from the decompiler efficiently
|
||||
///
|
||||
/// See PackedDecode for details of the encoding format.
|
||||
class PackedEncode : public Encoder {
|
||||
ostream &outStream; ///< The stream receiving the encoded data
|
||||
void writeHeader(uint1 header,uint4 id);
|
||||
void writeInteger(uint1 typeByte,uint8 val);
|
||||
public:
|
||||
PackedEncode(ostream &s) : outStream(s) {} ///< Construct from a stream
|
||||
virtual void openElement(const ElementId &elemId);
|
||||
virtual void closeElement(const ElementId &elemId);
|
||||
virtual void writeBool(const AttributeId &attribId,bool val);
|
||||
virtual void writeSignedInteger(const AttributeId &attribId,intb val);
|
||||
virtual void writeUnsignedInteger(const AttributeId &attribId,uintb val);
|
||||
virtual void writeString(const AttributeId &attribId,const string &val);
|
||||
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
|
||||
};
|
||||
|
||||
/// An exception is thrown if the position currently points to the last byte in the stream
|
||||
/// \param pos is the position in the stream to look ahead from
|
||||
/// \return the next byte
|
||||
inline uint1 PackedDecode::getBytePlus1(Position &pos)
|
||||
|
||||
{
|
||||
uint1 *ptr = pos.current + 1;
|
||||
if (ptr == pos.end) {
|
||||
list<ByteChunk>::const_iterator iter = pos.seqIter;
|
||||
++iter;
|
||||
if (iter == inStream.end())
|
||||
throw DecoderError("Unexpected end of stream");
|
||||
ptr = (*iter).start;
|
||||
}
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
/// An exception is thrown if there are no additional bytes in the stream
|
||||
/// \param pos is the position of the byte
|
||||
/// \return the byte at the current position
|
||||
inline uint1 PackedDecode::getNextByte(Position &pos)
|
||||
|
||||
{
|
||||
uint1 res = *pos.current;
|
||||
pos.current += 1;
|
||||
if (pos.current != pos.end)
|
||||
return res;
|
||||
++pos.seqIter;
|
||||
if (pos.seqIter == inStream.end())
|
||||
throw DecoderError("Unexpected end of stream");
|
||||
pos.current = (*pos.seqIter).start;
|
||||
pos.end = (*pos.seqIter).end;
|
||||
return res;
|
||||
}
|
||||
|
||||
/// An exception is thrown of position is advanced past the end of the stream
|
||||
/// \param pos is the position being advanced
|
||||
/// \param skip is the number of bytes to advance
|
||||
inline void PackedDecode::advancePosition(Position &pos,int4 skip)
|
||||
|
||||
{
|
||||
while(pos.end - pos.current <= skip) {
|
||||
skip -= (pos.end - pos.current);
|
||||
++pos.seqIter;
|
||||
if (pos.seqIter == inStream.end())
|
||||
throw DecoderError("Unexpected end of stream");
|
||||
pos.current = (*pos.seqIter).start;
|
||||
pos.end = (*pos.seqIter).end;
|
||||
}
|
||||
pos.current += skip;
|
||||
}
|
||||
|
||||
inline void PackedEncode::writeHeader(uint1 header,uint4 id)
|
||||
|
||||
{
|
||||
if (id > 0x1f) {
|
||||
header |= PackedFormat::HEADEREXTEND_MASK;
|
||||
header |= (id >> PackedFormat::RAWDATA_BITSPERBYTE);
|
||||
uint1 extendByte = (id & PackedFormat::RAWDATA_MASK) | PackedFormat::RAWDATA_MARKER;
|
||||
outStream.put(header);
|
||||
outStream.put(extendByte);
|
||||
}
|
||||
else {
|
||||
header |= id;
|
||||
outStream.put(header);
|
||||
}
|
||||
}
|
||||
|
||||
extern ElementId ELEM_UNKNOWN; ///< Special element to represent an element with an unrecognized name
|
||||
extern AttributeId ATTRIB_UNKNOWN; ///< Special attribute to represent an attribute with an unrecognized name
|
||||
extern AttributeId ATTRIB_CONTENT; ///< Special attribute for XML text content of an element
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue