StringData record

This commit is contained in:
caheckman 2020-04-21 15:36:18 -04:00
parent 1529e635fc
commit 7dee97339c
9 changed files with 141 additions and 88 deletions

View file

@ -622,7 +622,7 @@ void ArchitectureGhidra::getBytes(uint1 *buf,int4 size,const Address &inaddr)
readResponseEnd(sin);
}
void ArchitectureGhidra::getStringData(vector<uint1> &buffer,const Address &addr,Datatype *ct,int4 maxBytes)
void ArchitectureGhidra::getStringData(vector<uint1> &buffer,const Address &addr,Datatype *ct,int4 maxBytes,bool &isTrunc)
{
sout.write("\000\000\001\004",4);
@ -645,6 +645,7 @@ void ArchitectureGhidra::getStringData(vector<uint1> &buffer,const Address &addr
uint4 size = (c-0x20);
c = sin.get();
size ^= ((c-0x20)<<6);
isTrunc = (sin.get() != 0);
buffer.reserve(size);
uint1 *dblbuf = new uint1[size * 2];
sin.read((char *)dblbuf,size*2);

View file

@ -125,7 +125,7 @@ public:
bool getSendParamMeasures(void) const { return sendParamMeasures; } ///< Get the current setting for emitting parameter info
virtual void getStringData(vector<uint1> &buffer,const Address &addr,Datatype *ct,int4 maxBytes);
virtual void getStringData(vector<uint1> &buffer,const Address &addr,Datatype *ct,int4 maxBytes,bool &isTrunc);
virtual void printMessage(const string &message) const;
static void segvHandler(int4 sig); ///< Handler for a segment violation (SIGSEGV) signal

View file

@ -1219,20 +1219,18 @@ bool PrintC::printCharacterConstant(ostream &s,const Address &addr,Datatype *cha
StringManager *manager = glb->stringManager;
// Retrieve UTF8 version of string
const vector<uint1> &buffer(manager->getStringData(addr, charType));
bool isTrunc = false;
const vector<uint1> &buffer(manager->getStringData(addr, charType, isTrunc));
if (buffer.empty())
return false;
if (doEmitWideCharPrefix() && charType->getSize() > 1)
s << 'L'; // Print symbol indicating wide character
s << '"';
if (!escapeCharacterData(s,buffer.data(),buffer.size(),1,glb->translate->isBigEndian()))
s << "...\" /* TRUNCATED STRING LITERAL */";
else {
if (buffer.size() > manager->getMaximumBytes())
escapeCharacterData(s,buffer.data(),buffer.size(),1,glb->translate->isBigEndian());
if (isTrunc)
s << "...\" /* TRUNCATED STRING LITERAL */";
else
s << '"';
}
return true;
}

View file

@ -28,15 +28,19 @@ GhidraStringManager::~GhidraStringManager(void)
delete [] testBuffer;
}
const vector<uint1> &GhidraStringManager::getStringData(const Address &addr,Datatype *charType)
const vector<uint1> &GhidraStringManager::getStringData(const Address &addr,Datatype *charType,bool &isTrunc)
{
map<Address,vector<uint1> >::iterator iter;
map<Address,StringData>::iterator iter;
iter = stringMap.find(addr);
if (iter != stringMap.end())
return (*iter).second;
if (iter != stringMap.end()) {
isTrunc = (*iter).second.isTruncated;
return (*iter).second.byteData;
}
vector<uint1> &buffer(stringMap[addr]);
glb->getStringData(buffer, addr, charType, maximumBytes);
return buffer;
StringData &stringData(stringMap[addr]);
stringData.isTruncated = false;
glb->getStringData(stringData.byteData, addr, charType, maximumChars,stringData.isTruncated);
isTrunc = stringData.isTruncated;
return stringData.byteData;
}

View file

@ -33,7 +33,7 @@ class GhidraStringManager : public StringManager {
public:
GhidraStringManager(ArchitectureGhidra *g,int4 max); ///< Constructor
virtual ~GhidraStringManager(void);
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType);
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc);
};
#endif

View file

@ -16,11 +16,11 @@
#include "stringmanage.hh"
#include "architecture.hh"
/// \param max is the maximum number of bytes to allow in a decoded string
/// \param max is the maximum number of characters to allow before truncating string
StringManager::StringManager(int4 max)
{
maximumBytes = max;
maximumChars = max;
}
StringManager::~StringManager(void)
@ -78,7 +78,8 @@ void StringManager::writeUtf8(ostream &s,int4 codepoint)
bool StringManager::isString(const Address &addr,Datatype *charType)
{
const vector<uint1> &buffer(getStringData(addr,charType));
bool isTrunc; // unused here
const vector<uint1> &buffer(getStringData(addr,charType,isTrunc));
return !buffer.empty();
}
@ -89,14 +90,16 @@ void StringManager::saveXml(ostream &s) const
{
s << "<stringmanage>\n";
map<Address,vector<uint1> >::const_iterator iter1;
map<Address,StringData>::const_iterator iter1;
for(iter1=stringMap.begin();iter1!=stringMap.end();++iter1) {
s << "<string>\n";
(*iter1).first.saveXml(s);
const vector<uint1> &vec( (*iter1).second );
s << " <bytes>\n" << setfill('0');
for(int4 i=0;vec.size();++i) {
s << hex << setw(2) << (int4)vec[i];
const StringData &stringData( (*iter1).second );
s << " <bytes";
a_v_b(s, "trunc", stringData.isTruncated);
s << ">\n" << setfill('0');
for(int4 i=0;stringData.byteData.size();++i) {
s << hex << setw(2) << (int4)stringData.byteData[i];
if (i%20 == 19)
s << "\n ";
}
@ -116,7 +119,8 @@ void StringManager::restoreXml(const Element *el,const AddrSpaceManager *m)
iter = list.begin();
Address addr = Address::restoreXml(*iter, m);
++iter;
vector<uint1> &vec(stringMap[addr]);
StringData &stringData(stringMap[addr]);
stringData.isTruncated = xml_readbool((*iter)->getAttributeValue("trunc"));
istringstream is((*iter)->getContent());
int4 val;
char c1, c2;
@ -137,7 +141,7 @@ void StringManager::restoreXml(const Element *el,const AddrSpaceManager *m)
else
c2 = c2 + 10 - 'a';
val = c1 * 16 + c2;
vec.push_back((uint1) val);
stringData.byteData.push_back((uint1) val);
is >> ws;
c1 = is.get();
c2 = is.get();
@ -267,15 +271,19 @@ StringManagerUnicode::~StringManagerUnicode(void)
delete [] testBuffer;
}
const vector<uint1> &StringManagerUnicode::getStringData(const Address &addr,Datatype *charType)
const vector<uint1> &StringManagerUnicode::getStringData(const Address &addr,Datatype *charType,bool &isTrunc)
{
map<Address,vector<uint1> >::iterator iter;
map<Address,StringData>::iterator iter;
iter = stringMap.find(addr);
if (iter != stringMap.end())
return (*iter).second;
if (iter != stringMap.end()) {
isTrunc = (*iter).second.isTruncated;
return (*iter).second.byteData;
}
vector<uint1> &vec(stringMap[addr]); // Allocate (initially empty) byte vector
StringData &stringData(stringMap[addr]); // Allocate (initially empty) byte vector
stringData.isTruncated = false;
isTrunc = false;
int4 curBufferSize = 0;
int4 charsize = charType->getSize();
@ -285,11 +293,12 @@ const vector<uint1> &StringManagerUnicode::getStringData(const Address &addr,Dat
do {
int4 amount = 32; // Grab 32 bytes of image at a time
uint4 newBufferSize = curBufferSize + amount;
if (newBufferSize > maximumBytes) {
newBufferSize = maximumBytes;
if (newBufferSize > maximumChars) {
newBufferSize = maximumChars;
amount = newBufferSize - curBufferSize;
if (amount == 0)
break;
if (amount == 0) {
return stringData.byteData; // Could not find terminator
}
}
glb->loader->loadFill(testBuffer + curBufferSize, amount,
addr + curBufferSize);
@ -298,52 +307,56 @@ const vector<uint1> &StringManagerUnicode::getStringData(const Address &addr,Dat
curBufferSize = newBufferSize;
} while (!foundTerminator);
} catch (DataUnavailError &err) {
return vec; // Return the empty buffer
return stringData.byteData; // Return the empty buffer
}
if (charsize == 1) {
if (!isCharacterConstant(testBuffer,curBufferSize,charsize))
return vec; // Return the empty buffer
vec.reserve(curBufferSize);
vec.assign(testBuffer,testBuffer+curBufferSize);
int4 numChars = checkCharacters(testBuffer, curBufferSize, charsize);
if (numChars < 0)
return stringData.byteData; // Return the empty buffer (invalid encoding)
if (charsize == 1 && numChars < maximumChars) {
stringData.byteData.reserve(curBufferSize);
stringData.byteData.assign(testBuffer,testBuffer+curBufferSize);
}
else {
// We need to translate to UTF8
// We need to translate to UTF8 and/or truncate
ostringstream s;
if (!writeUnicode(s, testBuffer, curBufferSize, charsize))
return vec; // Return the empty buffer
return stringData.byteData; // Return the empty buffer
string resString = s.str();
int4 newSize = resString.size();
if (newSize > maximumBytes)
newSize = maximumBytes;
vector<uint1> &vec(stringMap[addr]);
vec.reserve(newSize);
stringData.byteData.reserve(newSize + 1);
const uint1 *ptr = (const uint1 *)resString.c_str();
vec.assign(ptr,ptr+newSize);
stringData.byteData.assign(ptr,ptr+newSize);
stringData.byteData[newSize] = 0; // Make sure there is a null terminator
}
return vec;
stringData.isTruncated = (numChars >= maximumChars);
isTrunc = stringData.isTruncated;
return stringData.byteData;
}
/// Check that the given buffer contains valid unicode.
/// If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
/// per character. For UTF16, the surrogate reserved area gives at least some check.
/// \param buf is the byte array to check
/// \param size is the size of the buffer in bytes
/// \param charsize is the UTF encoding (1=UTF8, 2=UTF16, 4=UTF32)
/// \return \b true if the buffer is filled with valid unicode
bool StringManagerUnicode::isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const
/// \return the number of characters or -1 if there is an invalid encoding
int4 StringManagerUnicode::checkCharacters(const uint1 *buf,int4 size,int4 charsize) const
{
if (buf == (const uint1 *)0) return false;
if (buf == (const uint1 *)0) return -1;
bool bigend = glb->translate->isBigEndian();
int4 i=0;
int4 count=0;
int4 skip = charsize;
while(i<size) {
int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
if (codepoint < 0) return false;
if (codepoint < 0) return -1;
if (codepoint == 0) break;
count += 1;
i += skip;
}
return true;
return count;
}
/// Assume the buffer contains a null terminated unicode encoded string.
@ -358,6 +371,7 @@ bool StringManagerUnicode::writeUnicode(ostream &s,uint1 *buffer,int4 size,int4
{
bool bigend = glb->translate->isBigEndian();
int4 i=0;
int4 count=0;
int4 skip = charsize;
while(i<size) {
int4 codepoint = getCodepoint(buffer+i,charsize,bigend,skip);
@ -365,6 +379,9 @@ bool StringManagerUnicode::writeUnicode(ostream &s,uint1 *buffer,int4 size,int4
if (codepoint == 0) break; // Terminator
writeUtf8(s, codepoint);
i += skip;
count += 1;
if (count >= maximumChars)
break;
}
return true;
}

View file

@ -30,13 +30,17 @@ class Architecture;
/// Stores the decoded string until its needed for presentation.
class StringManager {
protected:
map<Address,vector<uint1> > stringMap; ///< Map from address to string (in UTF8 format)
int4 maximumBytes; ///< Maximum bytes (in UTF8 encoding) allowed
class StringData {
public:
bool isTruncated; // \b true if the the string is truncated
vector<uint1> byteData; // UTF8 encoded string data
};
map<Address,StringData> stringMap; ///< Map from address to string data
int4 maximumChars; ///< Maximum characters in a string before truncating
public:
StringManager(int4 max); ///< Constructor
virtual ~StringManager(void); ///< Destructor
int4 getMaximumBytes(void) const { return maximumBytes; } ///< Return the maximum bytes allowed in a string decoding
void clear(void) { stringMap.clear(); } ///< Clear out any cached strings
bool isString(const Address &addr,Datatype *charType); // Determine if data at the given address is a string
@ -47,8 +51,9 @@ public:
/// the string data is fetched, converted to a UTF8 encoding, cached and returned.
/// \param addr is the given address
/// \param charType is a character data-type indicating the encoding
/// \param isTrunc passes back whether the string is truncated
/// \return the byte array of UTF8 data
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType)=0;
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc)=0;
void saveXml(ostream &s) const; ///< Save cached strings to a stream as XML
void restoreXml(const Element *el,const AddrSpaceManager *m); ///< Restore string cache from XML
@ -66,13 +71,13 @@ public:
class StringManagerUnicode : public StringManager {
Architecture *glb; ///< Underlying architecture
uint1 *testBuffer; ///< Temporary buffer for pulling in loadimage bytes
int4 checkCharacters(const uint1 *buf,int4 size,int4 charsize) const; ///< Make sure buffer has valid bounded set of unicode
public:
StringManagerUnicode(Architecture *g,int4 max); ///< Constructor
virtual ~StringManagerUnicode(void);
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType);
bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const; ///< Return \b true if buffer looks like unicode
bool writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize); ///< Write unicode byte array to stream (as UTF8)
virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc);
bool writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize); ///< Translate/copy unicode to UTF8
};
#endif

View file

@ -54,6 +54,13 @@ import ghidra.util.xml.XmlUtilities;
*/
public class DecompileCallback {
/**
* Data returned for a query about strings
*/
public static class StringData {
boolean isTruncated; // Did we truncate the string
public byte[] byteData; // The UTF8 encoding of the string
}
private DecompileDebug debug;
private Program program;
private Listing listing;
@ -1202,7 +1209,7 @@ public class DecompileCallback {
* If there is already data present at the address, use this to determine the
* string encoding. Otherwise use the data-type info passed in to determine the encoding.
* Check that the bytes at the address represent a valid string encoding that doesn't
* exceed the maximum byte limit passed in. Return null if the string is invalid.
* exceed the maximum character limit passed in. Return null if the string is invalid.
* Return the string translated into a UTF8 byte array otherwise. A (valid) empty
* string is returned as a zero length array.
* @param addrString is the XML encoded address and maximum byte limit
@ -1210,11 +1217,11 @@ public class DecompileCallback {
* @param dtId is the id associated with the character data-type
* @return the UTF8 encoded byte array or null
*/
public byte[] getStringData(String addrString, String dtName, String dtId) {
public StringData getStringData(String addrString, String dtName, String dtId) {
Address addr;
int maxBytes;
int maxChars;
try {
maxBytes = readXMLSize(addrString);
maxChars = readXMLSize(addrString);
addr = Varnode.readXMLAddress(addrString, addrfactory, funcEntry.getAddressSpace());
if (addr == Address.NO_ADDRESS) {
throw new PcodeXMLException("Address does not physically map");
@ -1227,25 +1234,31 @@ public class DecompileCallback {
Data data = program.getListing().getDataContaining(addr);
Settings settings = SettingsImpl.NO_SETTINGS;
AbstractStringDataType dataType = null;
StringDataInstance stringInstance = null;
int length = 0;
if (data != null) {
if (data.getDataType() instanceof AbstractStringDataType) {
// There is already a string here. Use its configuration to
// set up the StringDataInstance
settings = data;
dataType = (AbstractStringDataType) data.getDataType();
int len = data.getLength();
if (len > 0) {
long diff = addr.subtract(data.getAddress()) *
addr.getAddressSpace().getAddressableUnitSize();
if (diff < 0 || diff >= len) {
length = data.getLength();
if (length <= 0) {
return null;
}
len -= diff;
if (len < maxBytes) {
maxBytes = len;
long diff = addr.subtract(data.getAddress()) *
addr.getAddressSpace().getAddressableUnitSize();
if (diff < 0 || diff >= length) {
return null;
}
length -= diff;
MemoryBufferImpl buf = new MemoryBufferImpl(program.getMemory(), addr, 64);
stringInstance = dataType.getStringDataInstance(buf, settings, length);
}
}
}
}
if (dataType == null) {
if (stringInstance == null) {
// There is no string and/or something else at the address.
// Setup StringDataInstance based on raw memory
DataType dt = dtmanage.findBaseType(dtName, dtId);
if (dt instanceof AbstractStringDataType) {
dataType = (AbstractStringDataType) dt;
@ -1267,19 +1280,32 @@ public class DecompileCallback {
dataType = TerminatedStringDataType.dataType;
}
}
}
MemoryBufferImpl buf = new MemoryBufferImpl(program.getMemory(), addr, 64);
StringDataInstance stringInstance = dataType.getStringDataInstance(buf, settings, maxBytes);
int len = stringInstance.getStringLength();
if (len < 0 || len > maxBytes) {
stringInstance = dataType.getStringDataInstance(buf, settings, maxChars);
length = stringInstance.getStringLength();
if (length < 0 || length > maxChars) {
return null;
}
}
String stringVal;
if (stringInstance.isShowTranslation() && stringInstance.getTranslatedValue() != null) {
stringVal = stringInstance.getTranslatedValue();
}
else {
stringVal = stringInstance.getStringValue();
}
String stringVal = stringInstance.getStringValue();
if (!isValidChars(stringVal)) {
return null;
}
return stringVal.getBytes(utf8Charset);
StringData stringData = new StringData();
stringData.isTruncated = false;
if (stringVal.length() > maxChars) {
stringData.isTruncated = true;
stringVal = stringVal.substring(0, maxChars);
}
stringData.byteData = stringVal.getBytes(utf8Charset);
return stringData;
}
//==================================================================================================

View file

@ -787,9 +787,10 @@ public class DecompileProcess {
String addr = readQueryString();
String dtName = readQueryString();
String dtId = readQueryString();
byte[] res = callback.getStringData(addr, dtName, dtId);
DecompileCallback.StringData stringData = callback.getStringData(addr, dtName, dtId);
write(query_response_start);
if (res != null) {
if (stringData != null) {
byte[] res = stringData.byteData;
int sz = res.length + 1; // We add a null terminator character
int sz1 = (sz & 0x3f) + 0x20;
sz >>>= 6;
@ -797,6 +798,7 @@ public class DecompileProcess {
write(byte_start);
write(sz1);
write(sz2);
write(stringData.isTruncated ? 1 : 0);
byte[] dblres = new byte[res.length * 2 + 2];
for (int i = 0; i < res.length; i++) {
dblres[i * 2] = (byte) (((res[i] >> 4) & 0xf) + 65);