GP-2358 Packed protocol for decompiler marshaling

This commit is contained in:
caheckman 2022-07-26 15:36:05 -04:00
parent 6a1a649213
commit 79c3508f54
119 changed files with 4238 additions and 2207 deletions

View file

@ -17,6 +17,7 @@
#define __CPUI_MARSHAL__
#include "xml.hh"
#include <list>
#include <unordered_map>
using namespace std;
@ -100,11 +101,6 @@ public:
const AddrSpaceManager *getAddrSpaceManager(void) const { return spcManager; } ///< Get the manager used for address space decoding
virtual ~Decoder(void) {} ///< Destructor
/// \brief Clear any current decoding state
///
/// Allows the same decoder to be reused. Object is ready for new call to ingestStream.
virtual void clear(void)=0;
/// \brief Prepare to decode a given stream
///
/// Called once before any decoding. Currently this is assumed to make an internal copy of the stream data,
@ -262,11 +258,6 @@ class Encoder {
public:
virtual ~Encoder(void) {} ///< Destructor
/// \brief Clear any state associated with the encoder
///
/// The encoder should be ready to write a new document after this call.
virtual void clear(void)=0;
/// \brief Begin a new element in the encoding
///
/// The element will have the given ElementId annotation and becomes the \e current element.
@ -333,7 +324,6 @@ public:
XmlDecode(const AddrSpaceManager *spc) : Decoder(spc) {
document = (Document *)0; rootElement = (const Element *)0; attributeIndex = -1; } ///< Constructor for use with ingestStream
virtual ~XmlDecode(void);
virtual void clear(void);
virtual void ingestStream(istream &s);
virtual uint4 peekElement(void);
virtual uint4 openElement(void);
@ -364,7 +354,6 @@ class XmlEncode : public Encoder {
bool elementTagIsOpen; ///< If \b true, new attributes can be written to the current element
public:
XmlEncode(ostream &s) : outStream(s) { elementTagIsOpen = false; } ///< Construct from a stream
virtual void clear(void) { elementTagIsOpen = false; }
virtual void openElement(const ElementId &elemId);
virtual void closeElement(const ElementId &elemId);
virtual void writeBool(const AttributeId &attribId,bool val);
@ -374,6 +363,206 @@ public:
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
};
/// \brief Protocol format for PackedEncode and PackedDecode classes
///
/// All bytes in the encoding are expected to be non-zero. Element encoding looks like
/// - 01xiiiii is an element start
/// - 10xiiiii is an element end
/// - 11xiiiii is an attribute start
///
/// Where iiiii is the (first) 5 bits of the element/attribute id.
/// If x=0, the id is complete. If x=1, the next byte contains 7 more bits of the id: 1iiiiiii
///
/// After an attribute start, there follows a \e type byte: ttttllll, where the first 4 bits indicate the
/// type of attribute and final 4 bits are a \b length \b code. The types are:
/// - 1 = boolean (lengthcode=0 for false, lengthcode=1 for true)
/// - 2 = positive signed integer
/// - 3 = negative signed integer (stored in negated form)
/// - 4 = unsigned integer
/// - 5 = basic address space (encoded as the integer index of the space)
/// - 6 = special address space (lengthcode 0=>stack 1=>join 2=>fspec 3=>iop)
/// - 7 = string
///
/// All attribute types except \e boolean and \e special, have an encoded integer after the \e type byte.
/// The \b length \b code, indicates the number bytes used to encode the integer, 7-bits of info per byte, 1iiiiiii.
/// A \b length \b code of zero is used to encode an integer value of 0, with no following bytes.
///
/// For strings, the integer encoded after the \e type byte, is the actual length of the string. The
/// string data itself is stored immediately after the length integer using UTF8 format.
namespace PackedFormat {
static const uint1 HEADER_MASK = 0xc0;
static const uint1 ELEMENT_START = 0x40;
static const uint1 ELEMENT_END = 0x80;
static const uint1 ATTRIBUTE = 0xc0;
static const uint1 HEADEREXTEND_MASK = 0x20;
static const uint1 ELEMENTID_MASK = 0x1f;
static const uint1 RAWDATA_MASK = 0x7f;
static const int4 RAWDATA_BITSPERBYTE = 7;
static const uint1 RAWDATA_MARKER = 0x80;
static const int4 TYPECODE_SHIFT = 4;
static const uint1 LENGTHCODE_MASK = 0xf;
static const uint1 TYPECODE_BOOLEAN = 1;
static const uint1 TYPECODE_SIGNEDINT_POSITIVE = 2;
static const uint1 TYPECODE_SIGNEDINT_NEGATIVE = 3;
static const uint1 TYPECODE_UNSIGNEDINT = 4;
static const uint1 TYPECODE_ADDRESSSPACE = 5;
static const uint1 TYPECODE_SPECIALSPACE = 6;
static const uint1 TYPECODE_STRING = 7;
static const uint4 SPECIALSPACE_STACK = 0;
static const uint4 SPECIALSPACE_JOIN = 1;
static const uint4 SPECIALSPACE_FSPEC = 2;
static const uint4 SPECIALSPACE_IOP = 3;
static const uint4 SPECIALSPACE_SPACEBASE = 4;
}
/// \brief A byte-based decoder designed to marshal info to the decompiler efficiently
///
/// The decoder expects an encoding as described in PackedFormat. When ingested, the stream bytes are
/// held in a sequence of arrays (ByteChunk). During decoding, \b this object maintains a Position in the
/// stream at the start and end of the current open element, and a Position of the next attribute to read to
/// facilitate getNextAttributeId() and associated read*() methods.
class PackedDecode : public Decoder {
public:
static const int4 BUFFER_SIZE; ///< The size, in bytes, of a single cached chunk of the input stream
private:
/// \brief A bounded array of bytes
class ByteChunk {
friend class PackedDecode;
uint1 *start; ///< Start of the byte array
uint1 *end; ///< End of the byte array
public:
ByteChunk(uint1 *s,uint1 *e) { start = s; end = e; } ///< Constructor
};
/// \brief An iterator into input stream
class Position {
friend class PackedDecode;
list<ByteChunk>::const_iterator seqIter; ///< Current byte sequence
uint1 *current; ///< Current position in sequence
uint1 *end; ///< End of current sequence
};
list<ByteChunk> inStream; ///< Incoming raw data as a sequence of byte arrays
Position startPos; ///< Position at the start of the current open element
Position curPos; ///< Position of the next attribute as returned by getNextAttributeId
Position endPos; ///< Ending position after all attributes in current open element
bool attributeRead; ///< Has the last attribute returned by getNextAttributeId been read
uint1 getByte(Position &pos) { return *pos.current; } ///< Get the byte at the current position, do not advance
uint1 getBytePlus1(Position &pos); ///< Get the byte following the current byte, do not advance position
uint1 getNextByte(Position &pos); ///< Get the byte at the current position and advance to the next byte
void advancePosition(Position &pos,int4 skip); ///< Advance the position by the given number of bytes
uint8 readInteger(int4 len); ///< Read an integer from the \e current position given its length in bytes
uint4 readLengthCode(uint1 typeByte) { return ((uint4)typeByte & PackedFormat::LENGTHCODE_MASK); } ///< Extract length code from type byte
void findMatchingAttribute(const AttributeId &attribId); ///< Find attribute matching the given id in open element
void skipAttribute(void); ///< Skip over the attribute at the current position
void skipAttributeRemaining(uint1 typeByte); ///< Skip over remaining attribute data, after a mismatch
public:
PackedDecode(const AddrSpaceManager *spcManager) : Decoder(spcManager) {} ///< Constructor
virtual ~PackedDecode(void);
virtual void ingestStream(istream &s);
virtual uint4 peekElement(void);
virtual uint4 openElement(void);
virtual uint4 openElement(const ElementId &elemId);
virtual void closeElement(uint4 id);
virtual void closeElementSkipping(uint4 id);
virtual void rewindAttributes(void);
virtual uint4 getNextAttributeId(void);
virtual bool readBool(void);
virtual bool readBool(const AttributeId &attribId);
virtual intb readSignedInteger(void);
virtual intb readSignedInteger(const AttributeId &attribId);
virtual uintb readUnsignedInteger(void);
virtual uintb readUnsignedInteger(const AttributeId &attribId);
virtual string readString(void);
virtual string readString(const AttributeId &attribId);
virtual AddrSpace *readSpace(void);
virtual AddrSpace *readSpace(const AttributeId &attribId);
};
/// \brief A byte-based encoder designed to marshal from the decompiler efficiently
///
/// See PackedDecode for details of the encoding format.
class PackedEncode : public Encoder {
ostream &outStream; ///< The stream receiving the encoded data
void writeHeader(uint1 header,uint4 id);
void writeInteger(uint1 typeByte,uint8 val);
public:
PackedEncode(ostream &s) : outStream(s) {} ///< Construct from a stream
virtual void openElement(const ElementId &elemId);
virtual void closeElement(const ElementId &elemId);
virtual void writeBool(const AttributeId &attribId,bool val);
virtual void writeSignedInteger(const AttributeId &attribId,intb val);
virtual void writeUnsignedInteger(const AttributeId &attribId,uintb val);
virtual void writeString(const AttributeId &attribId,const string &val);
virtual void writeSpace(const AttributeId &attribId,const AddrSpace *spc);
};
/// An exception is thrown if the position currently points to the last byte in the stream
/// \param pos is the position in the stream to look ahead from
/// \return the next byte
inline uint1 PackedDecode::getBytePlus1(Position &pos)
{
uint1 *ptr = pos.current + 1;
if (ptr == pos.end) {
list<ByteChunk>::const_iterator iter = pos.seqIter;
++iter;
if (iter == inStream.end())
throw DecoderError("Unexpected end of stream");
ptr = (*iter).start;
}
return *ptr;
}
/// An exception is thrown if there are no additional bytes in the stream
/// \param pos is the position of the byte
/// \return the byte at the current position
inline uint1 PackedDecode::getNextByte(Position &pos)
{
uint1 res = *pos.current;
pos.current += 1;
if (pos.current != pos.end)
return res;
++pos.seqIter;
if (pos.seqIter == inStream.end())
throw DecoderError("Unexpected end of stream");
pos.current = (*pos.seqIter).start;
pos.end = (*pos.seqIter).end;
return res;
}
/// An exception is thrown of position is advanced past the end of the stream
/// \param pos is the position being advanced
/// \param skip is the number of bytes to advance
inline void PackedDecode::advancePosition(Position &pos,int4 skip)
{
while(pos.end - pos.current <= skip) {
skip -= (pos.end - pos.current);
++pos.seqIter;
if (pos.seqIter == inStream.end())
throw DecoderError("Unexpected end of stream");
pos.current = (*pos.seqIter).start;
pos.end = (*pos.seqIter).end;
}
pos.current += skip;
}
inline void PackedEncode::writeHeader(uint1 header,uint4 id)
{
if (id > 0x1f) {
header |= PackedFormat::HEADEREXTEND_MASK;
header |= (id >> PackedFormat::RAWDATA_BITSPERBYTE);
uint1 extendByte = (id & PackedFormat::RAWDATA_MASK) | PackedFormat::RAWDATA_MARKER;
outStream.put(header);
outStream.put(extendByte);
}
else {
header |= id;
outStream.put(header);
}
}
extern ElementId ELEM_UNKNOWN; ///< Special element to represent an element with an unrecognized name
extern AttributeId ATTRIB_UNKNOWN; ///< Special attribute to represent an attribute with an unrecognized name
extern AttributeId ATTRIB_CONTENT; ///< Special attribute for XML text content of an element