mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-04 10:19:23 +02:00
Initial commit of new StringManager
This commit is contained in:
parent
6a15520aa5
commit
0eb48e441f
19 changed files with 740 additions and 236 deletions
|
@ -271,6 +271,7 @@ model {
|
|||
include "database.cc"
|
||||
include "cpool.cc"
|
||||
include "comment.cc"
|
||||
include "stringmanage.cc"
|
||||
include "fspec.cc"
|
||||
include "action.cc"
|
||||
include "loadimage.cc"
|
||||
|
@ -321,6 +322,7 @@ model {
|
|||
include "cpool_ghidra.cc"
|
||||
include "ghidra_process.cc"
|
||||
include "comment_ghidra.cc"
|
||||
include "string_ghidra.cc"
|
||||
// include "callgraph.cc" // uncomment for debug
|
||||
// include "ifacedecomp.cc" // uncomment for debug
|
||||
// include "ifaceterm.cc" // uncomment for debug
|
||||
|
|
|
@ -75,7 +75,7 @@ EXTERNAL_CONSOLEEXT_NAMES=$(subst .cc,,$(notdir $(EXTERNAL_CONSOLEEXT_SOURCE)))
|
|||
CORE= xml space float address pcoderaw translate opcodes globalcontext
|
||||
# Additional core files for any projects that decompile
|
||||
DECCORE=capability architecture options graph cover block cast typeop database cpool \
|
||||
comment fspec action loadimage grammar varnode op \
|
||||
comment stringmanage fspec action loadimage grammar varnode op \
|
||||
type variable varmap jumptable emulate emulateutil flow userop \
|
||||
funcdata funcdata_block funcdata_op funcdata_varnode pcodeinject \
|
||||
heritage prefersplit rangeutil ruleaction subflow blockaction merge double \
|
||||
|
@ -87,7 +87,7 @@ SLEIGH= sleigh pcodeparse pcodecompile sleighbase slghsymbol \
|
|||
# Additional files for the GHIDRA specific build
|
||||
GHIDRA= ghidra_arch inject_ghidra ghidra_translate loadimage_ghidra \
|
||||
typegrp_ghidra database_ghidra ghidra_context cpool_ghidra \
|
||||
ghidra_process comment_ghidra $(GHIDRAEXT_NAMES)
|
||||
ghidra_process comment_ghidra string_ghidra $(GHIDRAEXT_NAMES)
|
||||
# Additional files specific to the sleigh compiler
|
||||
SLACOMP=slgh_compile slghparse slghscan
|
||||
# Additional special files that should not be considered part of the library
|
||||
|
|
|
@ -100,6 +100,7 @@ Architecture::Architecture(void)
|
|||
loader = (LoadImage *)0;
|
||||
pcodeinjectlib = (PcodeInjectLibrary *)0;
|
||||
commentdb = (CommentDatabase *)0;
|
||||
stringManager = (StringManager *)0;
|
||||
cpool = (ConstantPool *)0;
|
||||
symboltab = new Database(this);
|
||||
context = (ContextDatabase *)0;
|
||||
|
@ -152,6 +153,8 @@ Architecture::~Architecture(void)
|
|||
delete pcodeinjectlib;
|
||||
if (commentdb != (CommentDatabase *)0)
|
||||
delete commentdb;
|
||||
if (stringManager != (StringManager *)0)
|
||||
delete stringManager;
|
||||
if (cpool != (ConstantPool *)0)
|
||||
delete cpool;
|
||||
if (context != (ContextDatabase *)0)
|
||||
|
@ -268,6 +271,7 @@ void Architecture::clearAnalysis(Funcdata *fd)
|
|||
fd->clear(); // Clear stuff internal to function
|
||||
// Clear out any analysis generated comments
|
||||
commentdb->clearType(fd->getAddress(),Comment::warning|Comment::warningheader);
|
||||
stringManager->clear();
|
||||
}
|
||||
|
||||
/// Symbols do not necessarily need to be available for the decompiler.
|
||||
|
@ -405,6 +409,7 @@ void Architecture::saveXml(ostream &s) const
|
|||
symboltab->saveXml(s);
|
||||
context->saveXml(s);
|
||||
commentdb->saveXml(s);
|
||||
stringManager->saveXml(s);
|
||||
if (!cpool->empty())
|
||||
cpool->saveXml(s);
|
||||
s << "</save_state>\n";
|
||||
|
@ -437,6 +442,8 @@ void Architecture::restoreXml(DocumentStorage &store)
|
|||
context->restoreXml(subel,this);
|
||||
else if (subel->getName() == "commentdb")
|
||||
commentdb->restoreXml(subel,this);
|
||||
else if (subel->getName() == "stringmanage")
|
||||
stringManager->restoreXml(subel,this);
|
||||
else if (subel->getName() == "constantpool")
|
||||
cpool->restoreXml(subel,*types);
|
||||
else if (subel->getName() == "optionslist")
|
||||
|
@ -575,6 +582,14 @@ void Architecture::buildCommentDB(DocumentStorage &store)
|
|||
commentdb = new CommentDatabaseInternal();
|
||||
}
|
||||
|
||||
/// Build container that holds decoded strings
|
||||
/// \param store may hold configuration information
|
||||
void Architecture::buildStringManager(DocumentStorage &store)
|
||||
|
||||
{
|
||||
stringManager = new StringManagerUnicode(this,2048);
|
||||
}
|
||||
|
||||
/// Some processor models (Java byte-code) need a database of constants.
|
||||
/// The database is always built, but may remain empty.
|
||||
/// \param store may hold configuration information
|
||||
|
@ -1237,6 +1252,7 @@ void Architecture::init(DocumentStorage &store)
|
|||
buildContext(store);
|
||||
buildTypegrp(store);
|
||||
buildCommentDB(store);
|
||||
buildStringManager(store);
|
||||
buildConstantPool(store);
|
||||
|
||||
restoreFromSpec(store);
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "loadimage.hh"
|
||||
#include "globalcontext.hh"
|
||||
#include "comment.hh"
|
||||
#include "stringmanage.hh"
|
||||
#include "userop.hh"
|
||||
#include "options.hh"
|
||||
#include "transform.hh"
|
||||
|
@ -147,6 +148,7 @@ public:
|
|||
PcodeInjectLibrary *pcodeinjectlib; ///< Pcode injection manager
|
||||
RangeList nohighptr; ///< Ranges for which high-level pointers are not possible
|
||||
CommentDatabase *commentdb; ///< Comments for this architecture
|
||||
StringManager *stringManager; ///< Manager of decoded strings
|
||||
ConstantPool *cpool; ///< Deferred constant values
|
||||
PrintLanguage *print; ///< Current high-level language printer
|
||||
vector<PrintLanguage *> printlist; ///< List of high-level language printers supported
|
||||
|
@ -227,6 +229,7 @@ protected:
|
|||
|
||||
virtual void buildTypegrp(DocumentStorage &store); ///< Build the data-type factory/container
|
||||
virtual void buildCommentDB(DocumentStorage &store); ///< Build the comment database
|
||||
virtual void buildStringManager(DocumentStorage &store); ///< Build the string manager
|
||||
virtual void buildConstantPool(DocumentStorage &store); ///< Build the constant pool
|
||||
virtual void buildInstructions(DocumentStorage &store); ///< Register the p-code operations
|
||||
virtual void buildAction(DocumentStorage &store); ///< Build the Action framework
|
||||
|
|
|
@ -615,6 +615,52 @@ void ArchitectureGhidra::getBytes(uint1 *buf,int4 size,const Address &inaddr)
|
|||
readResponseEnd(sin);
|
||||
}
|
||||
|
||||
uint4 ArchitectureGhidra::getStringData(uint1 *buf,const Address &addr,Datatype *ct,int4 maxBytes)
|
||||
|
||||
{
|
||||
sout.write("\000\000\001\004",4);
|
||||
writeStringStream(sout,"getString");
|
||||
sout.write("\000\000\001\016",4); // Beginning of string header
|
||||
addr.saveXml(sout,maxBytes);
|
||||
sout.write("\000\000\001\017",4);
|
||||
writeStringStream(sout,ct->getName());
|
||||
sout.write("\000\000\001\016",4); // Beginning of string header
|
||||
sout << dec << (int8)ct->getId(); // Pass as a signed integer
|
||||
sout.write("\000\000\001\017",4);
|
||||
|
||||
sout.write("\000\000\001\005",4);
|
||||
sout.flush();
|
||||
|
||||
readToResponse(sin);
|
||||
int4 type = readToAnyBurst(sin);
|
||||
uint4 size = 0;
|
||||
if (type == 12) {
|
||||
int4 c = sin.get();
|
||||
size ^= (c-0x20);
|
||||
c = sin.get();
|
||||
size ^= ((c-0x20)<<6);
|
||||
uint1 *dblbuf = new uint1[size * 2];
|
||||
sin.read((char *)dblbuf,size*2);
|
||||
for (int4 i=0; i < size; i++) {
|
||||
buf[i] = ((dblbuf[i*2]-'A') << 4) | (dblbuf[i*2 + 1]-'A');
|
||||
}
|
||||
delete [] dblbuf;
|
||||
}
|
||||
else if ((type&1)==1) {
|
||||
ostringstream errmsg;
|
||||
errmsg << "GHIDRA has no string in the loadimage at " << addr.getShortcut();
|
||||
addr.printRaw(errmsg);
|
||||
throw DataUnavailError(errmsg.str());
|
||||
}
|
||||
else
|
||||
throw JavaError("alignment","Expecting bytes or end of query response");
|
||||
type = readToAnyBurst(sin);
|
||||
if (type != 13)
|
||||
throw JavaError("alignment","Expecting byte alignment end");
|
||||
readResponseEnd(sin);
|
||||
return size;
|
||||
}
|
||||
|
||||
/// \brief Retrieve p-code to inject for a specific context
|
||||
///
|
||||
/// The particular injection is named and is of one of the types:
|
||||
|
|
|
@ -124,6 +124,7 @@ public:
|
|||
|
||||
bool getSendParamMeasures(void) const { return sendParamMeasures; } ///< Get the current setting for emitting parameter info
|
||||
|
||||
virtual uint4 getStringData(uint1 *buf,const Address &addr,Datatype *ct,int4 maxBytes);
|
||||
virtual void printMessage(const string &message) const;
|
||||
|
||||
static void segvHandler(int4 sig); ///< Handler for a segment violation (SIGSEGV) signal
|
||||
|
|
|
@ -245,6 +245,7 @@ void FlushNative::rawAction(void)
|
|||
ghidra->symboltab->deleteSubScopes(globscope); // Flush cached function and globals database
|
||||
ghidra->types->clearNoncore(); // Reset type information
|
||||
ghidra->commentdb->clear(); // Clear any comments
|
||||
ghidra->stringManager->clear(); // Clear string decodings
|
||||
ghidra->cpool->clear();
|
||||
res = 0;
|
||||
}
|
||||
|
|
|
@ -1164,7 +1164,7 @@ void PrintC::printUnicode(ostream &s,int4 onechar) const
|
|||
s << "\\x" << setfill('0') << setw(8) << hex << onechar;
|
||||
return;
|
||||
}
|
||||
writeUtf8(s, onechar); // emit normally
|
||||
StringManager::writeUtf8(s, onechar); // emit normally
|
||||
}
|
||||
|
||||
void PrintC::pushType(const Datatype *ct)
|
||||
|
@ -1204,32 +1204,6 @@ bool PrintC::doEmitWideCharPrefix(void) const
|
|||
return true;
|
||||
}
|
||||
|
||||
/// \brief Check if the byte buffer has a (unicode) string terminator
|
||||
///
|
||||
/// \param buffer is the byte buffer
|
||||
/// \param size is the number of bytes in the buffer
|
||||
/// \param charsize is the presumed size (in bytes) of character elements
|
||||
/// \return \b true if a string terminator is found
|
||||
bool PrintC::hasCharTerminator(uint1 *buffer,int4 size,int4 charsize)
|
||||
|
||||
{
|
||||
for(int4 i=0;i<size;i+=charsize) {
|
||||
bool isTerminator = true;
|
||||
for(int4 j=0;j<charsize;++j) {
|
||||
if (buffer[i+j] != 0) { // Non-zero bytes means character can't be a null terminator
|
||||
isTerminator = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isTerminator) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define STR_LITERAL_BUFFER_MAXSIZE 2048
|
||||
#define STR_LITERAL_BUFFER_INCREMENT 32
|
||||
|
||||
|
||||
/// \brief Print a quoted (unicode) string at the given address.
|
||||
///
|
||||
/// Data for the string is obtained directly from the LoadImage. The bytes are checked
|
||||
|
@ -1237,43 +1211,27 @@ bool PrintC::hasCharTerminator(uint1 *buffer,int4 size,int4 charsize)
|
|||
/// pass, the string is emitted.
|
||||
/// \param s is the output stream to print to
|
||||
/// \param addr is the address of the string data within the LoadImage
|
||||
/// \param charsize is the number of bytes in an encoded element (i.e. UTF8, UTF16, or UTF32)
|
||||
/// \param charType is the underlying character data-type
|
||||
/// \return \b true if a proper string was found and printed to the stream
|
||||
bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const
|
||||
bool PrintC::printCharacterConstant(ostream &s,const Address &addr,Datatype *charType) const
|
||||
|
||||
{
|
||||
uint1 buffer[STR_LITERAL_BUFFER_MAXSIZE+4]; // Additional buffer for get_codepoint skip readahead
|
||||
int4 curBufferSize = 0;
|
||||
bool foundTerminator = false;
|
||||
const uint1 *buffer;
|
||||
StringManager *manager = glb->stringManager;
|
||||
try {
|
||||
do {
|
||||
uint4 newBufferSize = curBufferSize + STR_LITERAL_BUFFER_INCREMENT;
|
||||
glb->loader->loadFill(buffer+curBufferSize,STR_LITERAL_BUFFER_INCREMENT,addr + curBufferSize);
|
||||
foundTerminator = hasCharTerminator(buffer+curBufferSize,STR_LITERAL_BUFFER_INCREMENT,charsize);
|
||||
curBufferSize = newBufferSize;
|
||||
} while ((curBufferSize < STR_LITERAL_BUFFER_MAXSIZE)&&(!foundTerminator));
|
||||
buffer = manager->getStringData(addr, charType);
|
||||
} catch(DataUnavailError &err) {
|
||||
return false;
|
||||
}
|
||||
buffer[curBufferSize] = 0; // Make sure bytes for final codepoint read are initialized
|
||||
buffer[curBufferSize+1] = 0;
|
||||
buffer[curBufferSize+2] = 0;
|
||||
buffer[curBufferSize+3] = 0;
|
||||
bool bigend = glb->translate->isBigEndian();
|
||||
bool res;
|
||||
if (isCharacterConstant(buffer,curBufferSize,charsize)) {
|
||||
if (doEmitWideCharPrefix() && charsize > 1)
|
||||
if (doEmitWideCharPrefix() && charType->getSize() > 1)
|
||||
s << 'L'; // Print symbol indicating wide character
|
||||
s << '"';
|
||||
if (!escapeCharacterData(s,buffer,curBufferSize,charsize,bigend))
|
||||
if (!escapeCharacterData(s,buffer,manager->getMaximumBytes(),charType->getSize(),glb->translate->isBigEndian()))
|
||||
s << "...\" /* TRUNCATED STRING LITERAL */";
|
||||
else s << '"';
|
||||
|
||||
res = true;
|
||||
}
|
||||
else
|
||||
res = false;
|
||||
return res;
|
||||
s << '"';
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PrintC::resetDefaultsPrintC(void)
|
||||
|
@ -1373,7 +1331,7 @@ bool PrintC::pushPtrCharConstant(uintb val,const TypePointer *ct,const Varnode *
|
|||
|
||||
ostringstream str;
|
||||
Datatype *subct = ct->getPtrTo();
|
||||
if (!printCharacterConstant(str,stringaddr,subct->getSize()))
|
||||
if (!printCharacterConstant(str,stringaddr,subct))
|
||||
return false; // Can we get a nice ASCII string
|
||||
|
||||
pushAtom(Atom(str.str(),vartoken,EmitXml::const_color,op,vn));
|
||||
|
@ -1577,7 +1535,7 @@ void PrintC::pushSymbol(const Symbol *sym,const Varnode *vn,const PcodeOp *op)
|
|||
SymbolEntry *entry = sym->getFirstWholeMap();
|
||||
if (entry != (SymbolEntry *)0) {
|
||||
ostringstream s;
|
||||
if (printCharacterConstant(s,entry->getAddr(),subct->getSize())) {
|
||||
if (printCharacterConstant(s,entry->getAddr(),subct)) {
|
||||
pushAtom(Atom(s.str(),vartoken,EmitXml::const_color,op,vn));
|
||||
return;
|
||||
}
|
||||
|
@ -1963,25 +1921,6 @@ void PrintC::setCommentStyle(const string &nm)
|
|||
throw LowlevelError("Unknown comment style. Use \"c\" or \"cplusplus\"");
|
||||
}
|
||||
|
||||
bool PrintC::isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const
|
||||
|
||||
{
|
||||
// Return true if this looks like a c-string
|
||||
// If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
|
||||
// per character. For UTF16, the surrogate reserved area gives at least some check.
|
||||
if (buf == (const uint1 *)0) return false;
|
||||
bool bigend = glb->translate->isBigEndian();
|
||||
int4 i=0;
|
||||
int4 skip = charsize;
|
||||
while(i<size) {
|
||||
int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
|
||||
if (codepoint < 0) return false;
|
||||
if (codepoint == 0) break;
|
||||
i += skip;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Emit the definition of the given data-type
|
||||
///
|
||||
/// This is currently limited to a 'struct' or 'enum' definitions. The
|
||||
|
|
|
@ -157,8 +157,7 @@ protected:
|
|||
void opFunc(const PcodeOp *op); ///< Push a \e functional expression based on the given p-code op to the RPN stack
|
||||
void opTypeCast(const PcodeOp *op); ///< Push the given p-code op using type-cast syntax to the RPN stack
|
||||
void opHiddenFunc(const PcodeOp *op); ///< Push the given p-code op as a hidden token
|
||||
static bool hasCharTerminator(uint1 *buffer,int4 size,int4 charsize);
|
||||
bool printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const;
|
||||
bool printCharacterConstant(ostream &s,const Address &addr,Datatype *charType) const;
|
||||
void resetDefaultsPrintC(void); ///< Set default values for options specific to PrintC
|
||||
virtual void pushConstant(uintb val,const Datatype *ct,
|
||||
const Varnode *vn,const PcodeOp *op);
|
||||
|
@ -204,7 +203,6 @@ public:
|
|||
virtual void resetDefaults(void);
|
||||
virtual void adjustTypeOperators(void);
|
||||
virtual void setCommentStyle(const string &nm);
|
||||
virtual bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const;
|
||||
virtual void docTypeDefinitions(const TypeFactory *typegrp);
|
||||
virtual void docAllGlobals(void);
|
||||
virtual void docSingleGlobal(const Symbol *sym);
|
||||
|
|
|
@ -190,7 +190,7 @@ void PrintJava::printUnicode(ostream &s,int4 onechar) const
|
|||
s << "\\ux" << setfill('0') << setw(8) << hex << onechar;
|
||||
return;
|
||||
}
|
||||
writeUtf8(s, onechar); // Emit normally
|
||||
StringManager::writeUtf8(s, onechar); // Emit normally
|
||||
}
|
||||
|
||||
void PrintJava::opLoad(const PcodeOp *op)
|
||||
|
|
|
@ -478,136 +478,6 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint)
|
|||
return false;
|
||||
}
|
||||
|
||||
/// Encode the given unicode codepoint as UTF8 (1, 2, 3, or 4 bytes) and
|
||||
/// write the bytes to the stream.
|
||||
/// \param s is the output stream
|
||||
/// \param codepoint is the unicode codepoint
|
||||
void PrintLanguage::writeUtf8(ostream &s,int4 codepoint)
|
||||
|
||||
{
|
||||
uint1 bytes[4];
|
||||
int4 size;
|
||||
|
||||
if (codepoint < 0)
|
||||
throw LowlevelError("Negative unicode codepoint");
|
||||
if (codepoint < 128) {
|
||||
s.put((uint1)codepoint);
|
||||
return;
|
||||
}
|
||||
int4 bits = mostsigbit_set(codepoint) + 1;
|
||||
if (bits > 21)
|
||||
throw LowlevelError("Bad unicode codepoint");
|
||||
if (bits < 12) { // Encode with two bytes
|
||||
bytes[0] = 0xc0 ^ ((codepoint >> 6)&0x1f);
|
||||
bytes[1] = 0x80 ^ (codepoint & 0x3f);
|
||||
size = 2;
|
||||
}
|
||||
else if (bits < 17) {
|
||||
bytes[0] = 0xe0 ^ ((codepoint >> 12)&0xf);
|
||||
bytes[1] = 0x80 ^ ((codepoint >> 6)&0x3f);
|
||||
bytes[2] = 0x80 ^ (codepoint & 0x3f);
|
||||
size = 3;
|
||||
}
|
||||
else {
|
||||
bytes[0] = 0xf0 ^ ((codepoint >> 18) & 7);
|
||||
bytes[1] = 0x80 ^ ((codepoint >> 12) & 0x3f);
|
||||
bytes[2] = 0x80 ^ ((codepoint >> 6) & 0x3f);
|
||||
bytes[3] = 0x80 ^ (codepoint & 0x3f);
|
||||
size = 4;
|
||||
}
|
||||
s.write((char *)bytes, size);
|
||||
}
|
||||
|
||||
/// Pull the first two bytes from the byte array and combine them in the indicated endian order
|
||||
/// \param buf is the byte array
|
||||
/// \param bigend is \b true to request big endian encoding
|
||||
/// \return the decoded UTF16 element
|
||||
inline int4 PrintLanguage::readUtf16(const uint1 *buf,bool bigend)
|
||||
|
||||
{
|
||||
int4 codepoint;
|
||||
if (bigend) {
|
||||
codepoint = buf[0];
|
||||
codepoint <<= 8;
|
||||
codepoint += buf[1];
|
||||
}
|
||||
else {
|
||||
codepoint = buf[1];
|
||||
codepoint <<= 8;
|
||||
codepoint += buf[0];
|
||||
}
|
||||
return codepoint;
|
||||
}
|
||||
|
||||
/// \brief Extract the next \e unicode \e codepoint from an array of character data
|
||||
///
|
||||
/// One or more bytes is consumed from the array, and the number of bytes used is passed back.
|
||||
/// \param buf is a pointer to the bytes in the character array
|
||||
/// \param charsize is 1 for UTF8, 2 for UTF16, or 4 for UTF32
|
||||
/// \param bigend is \b true for big endian encoding of the UTF element
|
||||
/// \param skip is a reference for passing back the number of bytes consumed
|
||||
/// \return the codepoint or -1 if the encoding is invalid
|
||||
int4 PrintLanguage::getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip)
|
||||
|
||||
{
|
||||
int4 codepoint;
|
||||
int4 sk = 0;
|
||||
if (charsize==2) { // UTF-16
|
||||
codepoint = readUtf16(buf,bigend);
|
||||
sk += 2;
|
||||
if ((codepoint>=0xD800)&&(codepoint<=0xDBFF)) { // high surrogate
|
||||
int4 trail=readUtf16(buf+2,bigend);
|
||||
sk += 2;
|
||||
if ((trail<0xDC00)||(trail>0xDFFF)) return -1; // Bad trail
|
||||
codepoint = (codepoint<<10) + trail + (0x10000 - (0xD800 << 10) - 0xDC00);
|
||||
}
|
||||
else if ((codepoint>=0xDC00)&&(codepoint<=0xDFFF)) return -1; // trail before high
|
||||
}
|
||||
else if (charsize==1) { // UTF-8
|
||||
int4 val = buf[0];
|
||||
if ((val&0x80)==0) {
|
||||
codepoint = val;
|
||||
sk = 1;
|
||||
}
|
||||
else if ((val&0xe0)==0xc0) {
|
||||
int4 val2 = buf[1];
|
||||
sk = 2;
|
||||
if ((val2&0xc0)!=0x80) return -1; // Not a valid UTF8-encoding
|
||||
codepoint = ((val&0x1f)<<6) | (val2 & 0x3f);
|
||||
}
|
||||
else if ((val&0xf0)==0xe0) {
|
||||
int4 val2 = buf[1];
|
||||
int4 val3 = buf[2];
|
||||
sk = 3;
|
||||
if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)) return -1; // invalid encoding
|
||||
codepoint = ((val&0xf)<<12) | ((val2&0x3f)<<6) | (val3 & 0x3f);
|
||||
}
|
||||
else if ((val&0xf8)==0xf0) {
|
||||
int4 val2 = buf[1];
|
||||
int4 val3 = buf[2];
|
||||
int4 val4 = buf[3];
|
||||
sk = 4;
|
||||
if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)||((val4&0xc0)!=0x80)) return -1; // invalid encoding
|
||||
codepoint = ((val&7)<<18) | ((val2&0x3f)<<12) | ((val3&0x3f)<<6) | (val4 & 0x3f);
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if (charsize == 4) { // UTF-32
|
||||
sk = 4;
|
||||
if (bigend)
|
||||
codepoint = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
|
||||
else
|
||||
codepoint = (buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0];
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
if (codepoint >= 0xd800 && codepoint <= 0xdfff)
|
||||
return -1; // Reserved for surrogates, invalid codepoints
|
||||
skip = sk;
|
||||
return codepoint;
|
||||
}
|
||||
|
||||
/// \brief Emit a byte buffer to the stream as unicode characters.
|
||||
///
|
||||
/// Characters are emitted until we reach a terminator character or \b count bytes is consumed.
|
||||
|
@ -624,7 +494,7 @@ bool PrintLanguage::escapeCharacterData(ostream &s,const uint1 *buf,int4 count,i
|
|||
int4 skip = charsize;
|
||||
int4 codepoint = 0;
|
||||
while(i<count) {
|
||||
codepoint = getCodepoint(buf+i,charsize,bigend,skip);
|
||||
codepoint = StringManager::getCodepoint(buf+i,charsize,bigend,skip);
|
||||
if (codepoint == 0 || codepoint == -1) break;
|
||||
printUnicode(s,codepoint);
|
||||
i += skip;
|
||||
|
|
|
@ -267,9 +267,6 @@ protected:
|
|||
void emitOp(const ReversePolish &entry); ///< Send an operator token from the RPN to the emitter
|
||||
void emitAtom(const Atom &atom); ///< Send an variable token from the RPN to the emitter
|
||||
static bool unicodeNeedsEscape(int4 codepoint); ///< Determine if the given codepoint needs to be escaped
|
||||
static void writeUtf8(ostream &s,int4 codepoint); ///< Write unicode character to stream in UTF8 encoding
|
||||
static int4 readUtf16(const uint1 *buf,bool bigend); ///< Read a 2-byte UTF16 element from a byte array
|
||||
static int4 getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip);
|
||||
bool escapeCharacterData(ostream &s,const uint1 *buf,int4 count,int4 charsize,bool bigend) const;
|
||||
void recurse(void); ///< Emit from the RPN stack as much as possible
|
||||
void opBinary(const OpToken *tok,const PcodeOp *op); ///< Push a binary operator onto the RPN stack
|
||||
|
@ -434,14 +431,6 @@ public:
|
|||
/// \param nm is the configuration description
|
||||
virtual void setCommentStyle(const string &nm)=0;
|
||||
|
||||
/// \brief Decide is the given byte array looks like a character string
|
||||
///
|
||||
/// This looks for encodings and/or a terminator that is appropriate for the high-level language
|
||||
/// \param buf is a pointer to the byte array
|
||||
/// \param size is the number of bytes in the array
|
||||
/// \param charsize is the size in bytes of the encoding element (i.e. UTF8, UTF16, etc.) to assume
|
||||
virtual bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const=0;
|
||||
|
||||
/// \brief Emit definitions of data-types
|
||||
///
|
||||
/// \param typegrp is the container for the data-types that should be defined
|
||||
|
|
|
@ -6409,14 +6409,8 @@ int4 RulePtrsubCharConstant::applyOp(PcodeOp *op,Funcdata &data)
|
|||
if (!scope->isReadOnly(symaddr,1,op->getAddr()))
|
||||
return 0;
|
||||
// Check if data at the address looks like a string
|
||||
uint1 buffer[128];
|
||||
try {
|
||||
data.getArch()->loader->loadFill(buffer,128,symaddr);
|
||||
} catch(DataUnavailError &err) {
|
||||
if (!data.getArch()->stringManager->isString(symaddr, basetype))
|
||||
return 0;
|
||||
}
|
||||
bool isstring = data.getArch()->print->isCharacterConstant(buffer,128,basetype->getSize());
|
||||
if (!isstring) return 0;
|
||||
|
||||
// If we reach here, the PTRSUB should be converted to a (COPY of a) pointer constant.
|
||||
bool removeCopy = false;
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "ghidra_string.hh"
|
||||
|
||||
GhidraStringManager::GhidraStringManager(ArchitectureGhidra *g,int4 max)
|
||||
: StringManager(max)
|
||||
{
|
||||
glb = g;
|
||||
testBuffer = new uint1[max];
|
||||
}
|
||||
|
||||
GhidraStringManager::~GhidraStringManager(void)
|
||||
|
||||
{
|
||||
delete [] testBuffer;
|
||||
}
|
||||
|
||||
const uint1 *GhidraStringManager::getStringData(const Address &addr,Datatype *charType)
|
||||
|
||||
{
|
||||
map<Address,const uint1 *>::iterator iter;
|
||||
iter = stringMap.find(addr);
|
||||
if (iter != stringMap.end())
|
||||
return (*iter).second;
|
||||
|
||||
int4 size = glb->getStringData(testBuffer, addr, charType, maximumBytes);
|
||||
return mapBuffer(addr, testBuffer, size);
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/// \file ghidra_string.hh
|
||||
/// \brief Implementation of the StringManager through the ghidra client
|
||||
|
||||
#ifndef __STRING_GHIDRA__
|
||||
#define __STRING_GHIDRA__
|
||||
|
||||
#include "ghidra_arch.hh"
|
||||
|
||||
/// \brief Implementation of the StringManager that queries through the ghidra client
|
||||
///
|
||||
/// This acts as a front end to Ghidra's string formats and encodings.
|
||||
/// The client translates any type of string into a UTF8 representation, and this
|
||||
/// class stores it for final presentation. Escaping the UTF8 string is left up
|
||||
/// to the PrintLanguage.
|
||||
class GhidraStringManager : public StringManager {
|
||||
ArchitectureGhidra *glb; ///< The ghidra client interface
|
||||
uint1 *testBuffer; ///< Temporary storage for storing bytes from client
|
||||
public:
|
||||
GhidraStringManager(ArchitectureGhidra *g,int4 max); ///< Constructor
|
||||
virtual ~GhidraStringManager(void);
|
||||
virtual const uint1 *getStringData(const Address &addr,Datatype *charType);
|
||||
};
|
||||
|
||||
#endif
|
391
Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.cc
Normal file
391
Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.cc
Normal file
|
@ -0,0 +1,391 @@
|
|||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "stringmanage.hh"
|
||||
#include "architecture.hh"
|
||||
|
||||
/// Before calling, we must check that there is no other buffer stored at the address.
|
||||
/// \param addr is the Address to store the buffer at
|
||||
/// \param buf is the buffer to be copied into storage
|
||||
/// \param size is the number of bytes in the buffer
|
||||
/// \return the new permanent copy of the buffer
|
||||
const uint1 *StringManager::mapBuffer(const Address &addr,const uint1 *buf,int4 size)
|
||||
|
||||
{
|
||||
uint1 *storeBuf = new uint1[size + 1];
|
||||
stringMap[addr] = storeBuf;
|
||||
memcpy(storeBuf,buf,size);
|
||||
storeBuf[size] = 0;
|
||||
return storeBuf;
|
||||
}
|
||||
|
||||
/// \param max is the maximum number of bytes to allow in a decoded string
|
||||
StringManager::StringManager(int4 max)
|
||||
|
||||
{
|
||||
maximumBytes = max;
|
||||
}
|
||||
|
||||
StringManager::~StringManager(void)
|
||||
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
void StringManager::clear(void)
|
||||
|
||||
{
|
||||
map<Address,const uint1 *>::iterator iter;
|
||||
|
||||
for(iter=stringMap.begin();iter!=stringMap.end();++iter) {
|
||||
delete [] (*iter).second;
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode the given unicode codepoint as UTF8 (1, 2, 3, or 4 bytes) and
|
||||
/// write the bytes to the stream.
|
||||
/// \param s is the output stream
|
||||
/// \param codepoint is the unicode codepoint
|
||||
void StringManager::writeUtf8(ostream &s,int4 codepoint)
|
||||
|
||||
{
|
||||
uint1 bytes[4];
|
||||
int4 size;
|
||||
|
||||
if (codepoint < 0)
|
||||
throw LowlevelError("Negative unicode codepoint");
|
||||
if (codepoint < 128) {
|
||||
s.put((uint1)codepoint);
|
||||
return;
|
||||
}
|
||||
int4 bits = mostsigbit_set(codepoint) + 1;
|
||||
if (bits > 21)
|
||||
throw LowlevelError("Bad unicode codepoint");
|
||||
if (bits < 12) { // Encode with two bytes
|
||||
bytes[0] = 0xc0 ^ ((codepoint >> 6)&0x1f);
|
||||
bytes[1] = 0x80 ^ (codepoint & 0x3f);
|
||||
size = 2;
|
||||
}
|
||||
else if (bits < 17) {
|
||||
bytes[0] = 0xe0 ^ ((codepoint >> 12)&0xf);
|
||||
bytes[1] = 0x80 ^ ((codepoint >> 6)&0x3f);
|
||||
bytes[2] = 0x80 ^ (codepoint & 0x3f);
|
||||
size = 3;
|
||||
}
|
||||
else {
|
||||
bytes[0] = 0xf0 ^ ((codepoint >> 18) & 7);
|
||||
bytes[1] = 0x80 ^ ((codepoint >> 12) & 0x3f);
|
||||
bytes[2] = 0x80 ^ ((codepoint >> 6) & 0x3f);
|
||||
bytes[3] = 0x80 ^ (codepoint & 0x3f);
|
||||
size = 4;
|
||||
}
|
||||
s.write((char *)bytes, size);
|
||||
}
|
||||
|
||||
/// Returns \b true if the data is some kind of complete string.
|
||||
/// A given character data-type can be used as a hint for the encoding.
|
||||
/// The string decoding can be cached internally.
|
||||
/// \param addr is the given address
|
||||
/// \param charType is the given character data-type
|
||||
/// \return \b true if the address represents string data
|
||||
bool StringManager::isString(const Address &addr,Datatype *charType)
|
||||
|
||||
{
|
||||
const uint1 *buffer = (const uint1 *)0;
|
||||
try {
|
||||
buffer = getStringData(addr,charType);
|
||||
}
|
||||
catch(DataUnavailError &err) {
|
||||
return false;
|
||||
}
|
||||
return (buffer != (const uint1 *)0);
|
||||
}
|
||||
|
||||
/// Write \<stringmanage> tag, with \<string> sub-tags.
|
||||
/// \param s is the stream to write to
|
||||
void StringManager::saveXml(ostream &s) const
|
||||
|
||||
{
|
||||
s << "<stringmanage>\n";
|
||||
|
||||
map<Address,const uint1 *>::const_iterator iter1;
|
||||
for(iter1=stringMap.begin();iter1!=stringMap.end();++iter1) {
|
||||
s << "<string>\n";
|
||||
(*iter1).first.saveXml(s);
|
||||
const uint1 *buf = (*iter1).second;
|
||||
s << " <bytes>\n" << setfill('0');
|
||||
for(int4 i=0;;++i) {
|
||||
if (buf[i] == 0) break;
|
||||
s << hex << setw(2) << (int4)buf[i];
|
||||
if (i%20 == 19)
|
||||
s << "\n ";
|
||||
}
|
||||
s << "\n </bytes>\n";
|
||||
}
|
||||
s << "</stringmanage>\n";
|
||||
}
|
||||
|
||||
/// Read \<stringmanage> tag, with \<string> sub-tags.
|
||||
/// \param el is the root tag element
|
||||
/// \param m is the manager for looking up AddressSpaces
|
||||
void StringManager::restoreXml(const Element *el,const AddrSpaceManager *m)
|
||||
|
||||
{
|
||||
const List &list(el->getChildren());
|
||||
List::const_iterator iter;
|
||||
iter = list.begin();
|
||||
Address addr = Address::restoreXml(*iter, m);
|
||||
++iter;
|
||||
vector<uint1> vec;
|
||||
istringstream is((*iter)->getContent());
|
||||
int4 val;
|
||||
char c1, c2;
|
||||
is >> ws;
|
||||
c1 = is.get();
|
||||
c2 = is.get();
|
||||
while ((c1 > 0) && (c2 > 0)) {
|
||||
if (c1 <= '9')
|
||||
c1 = c1 - '0';
|
||||
else if (c1 <= 'F')
|
||||
c1 = c1 + 10 - 'A';
|
||||
else
|
||||
c1 = c1 + 10 - 'a';
|
||||
if (c2 <= '9')
|
||||
c2 = c2 - '0';
|
||||
else if (c2 <= 'F')
|
||||
c2 = c2 + 10 - 'A';
|
||||
else
|
||||
c2 = c2 + 10 - 'a';
|
||||
val = c1 * 16 + c2;
|
||||
vec.push_back((uint1) val);
|
||||
is >> ws;
|
||||
c1 = is.get();
|
||||
c2 = is.get();
|
||||
}
|
||||
mapBuffer(addr,vec.data(),vec.size());
|
||||
}
|
||||
|
||||
/// \param buffer is the byte buffer
|
||||
/// \param size is the number of bytes in the buffer
|
||||
/// \param charsize is the presumed size (in bytes) of character elements
|
||||
/// \return \b true if a string terminator is found
|
||||
bool StringManager::hasCharTerminator(const uint1 *buffer,int4 size,int4 charsize)
|
||||
|
||||
{
|
||||
for(int4 i=0;i<size;i+=charsize) {
|
||||
bool isTerminator = true;
|
||||
for(int4 j=0;j<charsize;++j) {
|
||||
if (buffer[i+j] != 0) { // Non-zero bytes means character can't be a null terminator
|
||||
isTerminator = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isTerminator) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Pull the first two bytes from the byte array and combine them in the indicated endian order
|
||||
/// \param buf is the byte array
|
||||
/// \param bigend is \b true to request big endian encoding
|
||||
/// \return the decoded UTF16 element
|
||||
inline int4 StringManager::readUtf16(const uint1 *buf,bool bigend)
|
||||
|
||||
{
|
||||
int4 codepoint;
|
||||
if (bigend) {
|
||||
codepoint = buf[0];
|
||||
codepoint <<= 8;
|
||||
codepoint += buf[1];
|
||||
}
|
||||
else {
|
||||
codepoint = buf[1];
|
||||
codepoint <<= 8;
|
||||
codepoint += buf[0];
|
||||
}
|
||||
return codepoint;
|
||||
}
|
||||
|
||||
/// One or more bytes is consumed from the array, and the number of bytes used is passed back.
|
||||
/// \param buf is a pointer to the bytes in the character array
|
||||
/// \param charsize is 1 for UTF8, 2 for UTF16, or 4 for UTF32
|
||||
/// \param bigend is \b true for big endian encoding of the UTF element
|
||||
/// \param skip is a reference for passing back the number of bytes consumed
|
||||
/// \return the codepoint or -1 if the encoding is invalid
|
||||
int4 StringManager::getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip)
|
||||
|
||||
{
|
||||
int4 codepoint;
|
||||
int4 sk = 0;
|
||||
if (charsize==2) { // UTF-16
|
||||
codepoint = readUtf16(buf,bigend);
|
||||
sk += 2;
|
||||
if ((codepoint>=0xD800)&&(codepoint<=0xDBFF)) { // high surrogate
|
||||
int4 trail=readUtf16(buf+2,bigend);
|
||||
sk += 2;
|
||||
if ((trail<0xDC00)||(trail>0xDFFF)) return -1; // Bad trail
|
||||
codepoint = (codepoint<<10) + trail + (0x10000 - (0xD800 << 10) - 0xDC00);
|
||||
}
|
||||
else if ((codepoint>=0xDC00)&&(codepoint<=0xDFFF)) return -1; // trail before high
|
||||
}
|
||||
else if (charsize==1) { // UTF-8
|
||||
int4 val = buf[0];
|
||||
if ((val&0x80)==0) {
|
||||
codepoint = val;
|
||||
sk = 1;
|
||||
}
|
||||
else if ((val&0xe0)==0xc0) {
|
||||
int4 val2 = buf[1];
|
||||
sk = 2;
|
||||
if ((val2&0xc0)!=0x80) return -1; // Not a valid UTF8-encoding
|
||||
codepoint = ((val&0x1f)<<6) | (val2 & 0x3f);
|
||||
}
|
||||
else if ((val&0xf0)==0xe0) {
|
||||
int4 val2 = buf[1];
|
||||
int4 val3 = buf[2];
|
||||
sk = 3;
|
||||
if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)) return -1; // invalid encoding
|
||||
codepoint = ((val&0xf)<<12) | ((val2&0x3f)<<6) | (val3 & 0x3f);
|
||||
}
|
||||
else if ((val&0xf8)==0xf0) {
|
||||
int4 val2 = buf[1];
|
||||
int4 val3 = buf[2];
|
||||
int4 val4 = buf[3];
|
||||
sk = 4;
|
||||
if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)||((val4&0xc0)!=0x80)) return -1; // invalid encoding
|
||||
codepoint = ((val&7)<<18) | ((val2&0x3f)<<12) | ((val3&0x3f)<<6) | (val4 & 0x3f);
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if (charsize == 4) { // UTF-32
|
||||
sk = 4;
|
||||
if (bigend)
|
||||
codepoint = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
|
||||
else
|
||||
codepoint = (buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0];
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
if (codepoint >= 0xd800 && codepoint <= 0xdfff)
|
||||
return -1; // Reserved for surrogates, invalid codepoints
|
||||
skip = sk;
|
||||
return codepoint;
|
||||
}
|
||||
|
||||
/// \param g is the underlying architecture (and loadimage)
|
||||
/// \param max is the maximum number of bytes to allow in a decoded string
|
||||
StringManagerUnicode::StringManagerUnicode(Architecture *g,int4 max)
|
||||
: StringManager(max)
|
||||
{
|
||||
glb = g;
|
||||
testBuffer = new uint1[max];
|
||||
}
|
||||
|
||||
StringManagerUnicode::~StringManagerUnicode(void)
|
||||
|
||||
{
|
||||
delete [] testBuffer;
|
||||
}
|
||||
|
||||
const uint1 *StringManagerUnicode::getStringData(const Address &addr,Datatype *charType)
|
||||
|
||||
{
|
||||
map<Address,const uint1 *>::iterator iter;
|
||||
iter = stringMap.find(addr);
|
||||
if (iter != stringMap.end())
|
||||
return (*iter).second;
|
||||
|
||||
int4 curBufferSize = 0;
|
||||
int4 charsize = charType->getSize();
|
||||
bool foundTerminator = false;
|
||||
|
||||
do {
|
||||
int4 amount = 32; // Grab 32 bytes of image at a time
|
||||
uint4 newBufferSize = curBufferSize + amount;
|
||||
if (newBufferSize > maximumBytes) {
|
||||
newBufferSize = maximumBytes;
|
||||
amount = newBufferSize - curBufferSize;
|
||||
if (amount == 0) break;
|
||||
}
|
||||
glb->loader->loadFill(testBuffer+curBufferSize,amount,addr + curBufferSize);
|
||||
foundTerminator = hasCharTerminator(testBuffer+curBufferSize,amount,charsize);
|
||||
curBufferSize = newBufferSize;
|
||||
} while (!foundTerminator);
|
||||
|
||||
const uint1 *resBuffer;
|
||||
if (charsize == 1) {
|
||||
if (!isCharacterConstant(testBuffer,curBufferSize,charsize))
|
||||
return (const uint1 *)0;
|
||||
resBuffer = mapBuffer(addr,testBuffer,curBufferSize);
|
||||
}
|
||||
else {
|
||||
// We need to translate to UTF8
|
||||
ostringstream s;
|
||||
if (!writeUnicode(s, testBuffer, curBufferSize, charsize))
|
||||
return (const uint1 *)0;
|
||||
string resString = s.str();
|
||||
int4 newSize = resString.size();
|
||||
if (newSize > maximumBytes)
|
||||
newSize = maximumBytes;
|
||||
resBuffer = mapBuffer(addr,(const uint1 *)resString.c_str(),newSize);
|
||||
}
|
||||
return resBuffer;
|
||||
}
|
||||
|
||||
/// If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
|
||||
/// per character. For UTF16, the surrogate reserved area gives at least some check.
|
||||
/// \param buf is the byte array to check
|
||||
/// \param size is the size of the buffer in bytes
|
||||
/// \param charsize is the UTF encoding (1=UTF8, 2=UTF16, 4=UTF32)
|
||||
/// \return \b true if the buffer is filled with valid unicode
|
||||
bool StringManagerUnicode::isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const
|
||||
|
||||
{
|
||||
if (buf == (const uint1 *)0) return false;
|
||||
bool bigend = glb->translate->isBigEndian();
|
||||
int4 i=0;
|
||||
int4 skip = charsize;
|
||||
while(i<size) {
|
||||
int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
|
||||
if (codepoint < 0) return false;
|
||||
if (codepoint == 0) break;
|
||||
i += skip;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Assume the buffer contains a null terminated unicode encoded string.
|
||||
/// Write the characters out (as UTF8) to the stream.
|
||||
/// \param s is the output stream
|
||||
/// \param buffer is the given byte buffer
|
||||
/// \param size is the number of bytes in the buffer
|
||||
/// \param charsize specifies the encoding (1=UTF8 2=UTF16 4=UTF32)
|
||||
/// \return \b true if the byte array contains valid unicode
|
||||
bool StringManagerUnicode::writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize)
|
||||
|
||||
{
|
||||
bool bigend = glb->translate->isBigEndian();
|
||||
int4 i=0;
|
||||
int4 skip = charsize;
|
||||
while(i<size) {
|
||||
int4 codepoint = getCodepoint(buffer+i,charsize,bigend,skip);
|
||||
if (codepoint < 0) return false;
|
||||
if (codepoint == 0) break; // Terminator
|
||||
writeUtf8(s, codepoint);
|
||||
i += skip;
|
||||
}
|
||||
return true;
|
||||
}
|
80
Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.hh
Normal file
80
Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.hh
Normal file
|
@ -0,0 +1,80 @@
|
|||
/* ###
|
||||
* IP: GHIDRA
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/// \file stringmanage.hh
|
||||
/// \brief Classes for decoding and storing string data
|
||||
|
||||
#ifndef __STRING_MANAGE__
|
||||
#define __STRING_MANAGE__
|
||||
|
||||
#include "type.hh"
|
||||
|
||||
class Architecture;
|
||||
|
||||
/// \brief Storage for decoding and storing strings associated with an address
|
||||
///
|
||||
/// Looks at data in the loadimage to determine if it represents a "string".
|
||||
/// Decodes the string for presentation in the output.
|
||||
/// Stores the decoded string until its needed for presentation.
|
||||
class StringManager {
|
||||
protected:
|
||||
map<Address,const uint1 *> stringMap; ///< Map from address to string (in UTF8 format)
|
||||
int4 maximumBytes; ///< Maximum bytes (in UTF8 encoding) allowed
|
||||
|
||||
const uint1 *mapBuffer(const Address &addr,const uint1 *buf,int4 size); ///< Move a decoded buffer into storage
|
||||
public:
|
||||
StringManager(int4 max); ///< Constructor
|
||||
virtual ~StringManager(void); ///< Destructor
|
||||
|
||||
int4 getMaximumBytes(void) const { return maximumBytes; } ///< Return the maximum bytes allowed in a string decoding
|
||||
void clear(void); ///< Clear out any cached strings
|
||||
|
||||
bool isString(const Address &addr,Datatype *charType); // Determine if data at the given address is a string
|
||||
|
||||
/// \brief Retrieve string data at the given address as a UTF8 byte array
|
||||
///
|
||||
/// If the address does not represent string data, null is returned. Otherwise,
|
||||
/// the string data is fetched, converted to a UTF8 encoding, cached and returned.
|
||||
/// \param addr is the given address
|
||||
/// \param charType is a character data-type indicating the encoding
|
||||
/// \return the byte array of UTF8 data (or null)
|
||||
virtual const uint1 *getStringData(const Address &addr,Datatype *charType)=0;
|
||||
|
||||
void saveXml(ostream &s) const; ///< Save cached strings to a stream as XML
|
||||
void restoreXml(const Element *el,const AddrSpaceManager *m); ///< Restore string cache from XML
|
||||
|
||||
static bool hasCharTerminator(const uint1 *buffer,int4 size,int4 charsize); ///< Check for a unicode string terminator
|
||||
static int4 readUtf16(const uint1 *buf,bool bigend); ///< Read a UTF16 code point from a byte array
|
||||
static void writeUtf8(ostream &s,int4 codepoint); ///< Write unicode character to stream in UTF8 encoding
|
||||
static int4 getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip); ///< Extract next \e unicode \e codepoint
|
||||
};
|
||||
|
||||
/// \brief An implementation of StringManager that understands terminated unicode strings
|
||||
///
|
||||
/// This class understands UTF8, UTF16, and UTF32 encodings. It reports a string if its
|
||||
/// sees a valid encoding that is null terminated.
|
||||
class StringManagerUnicode : public StringManager {
|
||||
Architecture *glb; ///< Underlying architecture
|
||||
uint1 *testBuffer; ///< Temporary buffer for pulling in loadimage bytes
|
||||
public:
|
||||
StringManagerUnicode(Architecture *g,int4 max); ///< Constructor
|
||||
virtual ~StringManagerUnicode(void);
|
||||
|
||||
virtual const uint1 *getStringData(const Address &addr,Datatype *charType);
|
||||
bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const; ///< Return \b true if buffer looks like unicode
|
||||
bool writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize); ///< Write unicode byte array to stream (as UTF8)
|
||||
};
|
||||
|
||||
#endif
|
|
@ -18,6 +18,7 @@ package ghidra.app.decompiler;
|
|||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.math.BigInteger;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import javax.xml.parsers.SAXParser;
|
||||
|
@ -27,14 +28,15 @@ import org.xml.sax.*;
|
|||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
import ghidra.app.cmd.function.CallDepthChangeInfo;
|
||||
import ghidra.docking.settings.Settings;
|
||||
import ghidra.docking.settings.SettingsImpl;
|
||||
import ghidra.program.disassemble.Disassembler;
|
||||
import ghidra.program.model.address.*;
|
||||
import ghidra.program.model.data.DataType;
|
||||
import ghidra.program.model.data.*;
|
||||
import ghidra.program.model.lang.*;
|
||||
import ghidra.program.model.lang.ConstantPool.Record;
|
||||
import ghidra.program.model.listing.*;
|
||||
import ghidra.program.model.mem.MemoryAccessException;
|
||||
import ghidra.program.model.mem.MemoryBlock;
|
||||
import ghidra.program.model.mem.*;
|
||||
import ghidra.program.model.pcode.*;
|
||||
import ghidra.program.model.symbol.*;
|
||||
import ghidra.util.Msg;
|
||||
|
@ -65,6 +67,7 @@ public class DecompileCallback {
|
|||
private AddressFactory addrfactory;
|
||||
private ConstantPool cpool;
|
||||
private PcodeDataTypeManager dtmanage;
|
||||
private Charset utf8Charset;
|
||||
private String nativeMessage;
|
||||
private boolean showNamespace;
|
||||
|
||||
|
@ -84,6 +87,7 @@ public class DecompileCallback {
|
|||
cpool = null;
|
||||
nativeMessage = null;
|
||||
debug = null;
|
||||
utf8Charset = Charset.availableCharsets().get("UTF-8");
|
||||
}
|
||||
|
||||
private static SAXParser getSAXParser() throws PcodeXMLException {
|
||||
|
@ -1177,6 +1181,66 @@ public class DecompileCallback {
|
|||
return listing.getFunctionAt(addr);
|
||||
}
|
||||
|
||||
public byte[] getStringData(String addrString, String dtName, String dtId) {
|
||||
Address addr;
|
||||
int maxBytes;
|
||||
try {
|
||||
maxBytes = readXMLSize(addrString);
|
||||
addr = Varnode.readXMLAddress(addrString, addrfactory, funcEntry.getAddressSpace());
|
||||
if (addr == Address.NO_ADDRESS) {
|
||||
throw new PcodeXMLException("Address does not physically map");
|
||||
}
|
||||
}
|
||||
catch (PcodeXMLException e) {
|
||||
Msg.error(this, "Decompiling " + funcEntry + ": " + e.getMessage());
|
||||
return null;
|
||||
}
|
||||
Data data = program.getListing().getDataContaining(addr);
|
||||
Settings settings = SettingsImpl.NO_SETTINGS;
|
||||
AbstractStringDataType dataType = null;
|
||||
if (data != null) {
|
||||
settings = data;
|
||||
if (data.getDataType() instanceof AbstractStringDataType) {
|
||||
dataType = (AbstractStringDataType) data.getDataType();
|
||||
}
|
||||
}
|
||||
if (dataType == null) {
|
||||
DataType dt = dtmanage.findBaseType(dtName, dtId);
|
||||
if (dt instanceof AbstractStringDataType) {
|
||||
dataType = (AbstractStringDataType) dt;
|
||||
}
|
||||
else {
|
||||
if (dt != null) {
|
||||
int size = dt.getLength();
|
||||
if (size == 2) {
|
||||
dataType = TerminatedUnicodeDataType.dataType;
|
||||
}
|
||||
else if (size == 4) {
|
||||
dataType = TerminatedUnicode32DataType.dataType;
|
||||
}
|
||||
else {
|
||||
dataType = TerminatedStringDataType.dataType;
|
||||
}
|
||||
}
|
||||
else {
|
||||
dataType = TerminatedStringDataType.dataType;
|
||||
}
|
||||
}
|
||||
}
|
||||
MemoryBufferImpl buf = new MemoryBufferImpl(program.getMemory(), addr, 64);
|
||||
Object value = dataType.getValue(buf, settings, maxBytes);
|
||||
if (!(value instanceof String)) {
|
||||
return null;
|
||||
}
|
||||
String stringVal = (String) value;
|
||||
byte[] res = stringVal.getBytes(utf8Charset);
|
||||
if (res.length > maxBytes) {
|
||||
byte[] trim = new byte[maxBytes];
|
||||
System.arraycopy(res, 0, trim, 0, maxBytes);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
//==================================================================================================
|
||||
// Inner Classes
|
||||
//==================================================================================================
|
||||
|
|
|
@ -326,7 +326,12 @@ public class DecompileProcess {
|
|||
}
|
||||
break;
|
||||
case 'S':
|
||||
if (name.equals("getString")) {
|
||||
getStringData();
|
||||
}
|
||||
else {
|
||||
getSymbol(); // getSymbol
|
||||
}
|
||||
break;
|
||||
case 'T':
|
||||
if (name.equals("getType")) {
|
||||
|
@ -778,6 +783,31 @@ public class DecompileProcess {
|
|||
write(query_response_end);
|
||||
}
|
||||
|
||||
private void getStringData() throws IOException {
|
||||
String addr = readQueryString();
|
||||
String dtName = readQueryString();
|
||||
String dtId = readQueryString();
|
||||
byte[] res = callback.getStringData(addr, dtName, dtId);
|
||||
write(query_response_start);
|
||||
if ((res != null) && (res.length > 0)) {
|
||||
int sz = res.length;
|
||||
int sz1 = (sz & 0x3f) + 0x20;
|
||||
sz >>>= 6;
|
||||
int sz2 = (sz & 0x3f) + 0x20;
|
||||
write(byte_start);
|
||||
write(sz1);
|
||||
write(sz2);
|
||||
byte[] dblres = new byte[res.length * 2];
|
||||
for (int i = 0; i < res.length; i++) {
|
||||
dblres[i * 2] = (byte) (((res[i] >> 4) & 0xf) + 65);
|
||||
dblres[i * 2 + 1] = (byte) ((res[i] & 0xf) + 65);
|
||||
}
|
||||
write(dblres);
|
||||
write(byte_end);
|
||||
}
|
||||
write(query_response_end);
|
||||
}
|
||||
|
||||
private void write(byte[] bytes) throws IOException {
|
||||
if (nativeOut == null) {
|
||||
return;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue