Merge remote-tracking branch 'origin/GP-1053_DecompilerConversion' into patch

This commit is contained in:
ghidra1 2021-07-06 14:22:07 -04:00
commit 2143c4961d
35 changed files with 2430 additions and 281 deletions

View file

@ -639,6 +639,21 @@ void FunctionSymbol::restoreXml(const Element *el)
}
}
/// Create a symbol either to associate a name with a constant or to force a display conversion
///
/// \param sc is the scope owning the new symbol
/// \param nm is the name of the equate (an empty string can be used for a convert)
/// \param format is the desired display conversion (0 for no conversion)
/// \param val is the constant value whose display is being altered
EquateSymbol::EquateSymbol(Scope *sc,const string &nm,uint4 format,uintb val)
: Symbol(sc, nm, (Datatype *)0)
{
value = val;
category = 1;
type = sc->getArch()->types->getBase(1,TYPE_UNKNOWN);
dispflags |= format;
}
/// An EquateSymbol should survive certain kinds of transforms during decompilation,
/// such as negation, twos-complementing, adding or subtracting 1.
/// Return \b true if the given value looks like a transform of this type relative
@ -688,7 +703,6 @@ void EquateSymbol::restoreXml(const Element *el)
TypeFactory *types = scope->getArch()->types;
type = types->getBase(1,TYPE_UNKNOWN);
checkSizeTypeLock();
}
/// Label symbols don't really have a data-type, so we just put
@ -1661,6 +1675,26 @@ Symbol *Scope::addDynamicSymbol(const string &nm,Datatype *ct,const Address &cad
return sym;
}
/// \brief Create a symbol that forces a constant display conversion
///
/// \param format is the type of conversion (Symbol::force_hex, Symbol::force_dec, etc.)
/// \param value is the constant value being converted
/// \param addr is the address of the p-code op reading the constant
/// \param hash is the dynamic hash identifying the constant
/// \return the new EquateSymbol
Symbol *Scope::addConvertSymbol(uint4 format,uintb value,Address &addr,uint8 hash)
{
Symbol *sym;
sym = new EquateSymbol(owner,"",format,value);
RangeList rnglist;
if (!addr.isInvalid())
rnglist.insertRange(addr.getSpace(),addr.getOffset(),addr.getOffset());
addDynamicMapInternal(sym,Varnode::mapped,hash,0,1,rnglist);
return sym;
}
/// Create default name given information in the Symbol and possibly a representative Varnode.
/// This method extracts the crucial properties and then uses the buildVariableName method to
/// construct the actual name.

View file

@ -270,6 +270,7 @@ public:
class EquateSymbol : public Symbol {
uintb value; ///< Value of the constant being equated
public:
EquateSymbol(Scope *sc,const string &nm,uint4 format,uintb val); ///< Constructor
EquateSymbol(Scope *sc) : Symbol(sc) { value = 0; category = 1; } ///< Constructor for use with restoreXml
uintb getValue(void) const { return value; } ///< Get the constant value
bool isValueClose(uintb op2Value,int4 size) const; ///< Is the given value similar to \b this equate
@ -712,6 +713,7 @@ public:
ExternRefSymbol *addExternalRef(const Address &addr,const Address &refaddr,const string &nm);
LabSymbol *addCodeLabel(const Address &addr,const string &nm);
Symbol *addDynamicSymbol(const string &nm,Datatype *ct,const Address &caddr,uint8 hash);
Symbol *addConvertSymbol(uint4 format,uintb value,Address &addr,uint8 hash);
string buildDefaultName(Symbol *sym,int4 &base,Varnode *vn) const; ///< Create a default name for the given Symbol
bool isReadOnly(const Address &addr,int4 size,const Address &usepoint) const;
void printBounds(ostream &s) const { rangetree.printBounds(s); } ///< Print a description of \b this Scope's \e owned memory ranges

View file

@ -1207,6 +1207,10 @@ bool Funcdata::attemptDynamicMappingLate(SymbolEntry *entry,DynamicHash &dhash)
return false;
if (vn->getSymbolEntry() == entry) return false; // Already applied it
Symbol *sym = entry->getSymbol();
if (sym->getCategory() == 1) { // Equate symbol does not depend on size
vn->setSymbolEntry(entry);
return true;
}
if (vn->getSize() != entry->getSize()) {
ostringstream s;
s << "Unable to use symbol ";

View file

@ -57,6 +57,7 @@ void IfaceDecompCapability::registerCommands(IfaceStatus *status)
status->registerCom(new IfcMapfunction(),"map","function");
status->registerCom(new IfcMapexternalref(),"map","externalref");
status->registerCom(new IfcMaplabel(),"map","label");
status->registerCom(new IfcMapconvert(),"map","convert");
status->registerCom(new IfcPrintdisasm(),"disassemble");
status->registerCom(new IfcDecompile(),"decompile");
status->registerCom(new IfcDump(),"dump");
@ -665,6 +666,45 @@ void IfcMaplabel::execute(istream &s)
scope->setAttribute(sym,Varnode::namelock|Varnode::typelock);
}
/// \class IfcMapconvert
/// \brief Create an convert directive: `map convert <format> <value> <address> <hash>`
///
/// Creates a \e convert directive that causes a targeted constant value to be displayed
/// with the specified integer format. The constant is specified by \e value, and the
/// \e address of the p-code op using the constant plus a dynamic \e hash is also given.
void IfcMapconvert::execute(istream &s)
{
if (dcp->fd == (Funcdata *)0)
throw IfaceExecutionError("No function loaded");
string name;
uintb value;
uint8 hash;
int4 size;
uint4 format = 0;
s >> name; // Parse the format token
if (name == "hex")
format = Symbol::force_hex;
else if (name == "dec")
format = Symbol::force_dec;
else if (name == "bin")
format = Symbol::force_bin;
else if (name == "oct")
format = Symbol::force_oct;
else if (name == "char")
format = Symbol::force_char;
else
throw IfaceParseError("Bad convert format");
s >> ws >> hex >> value;
Address addr = parse_machaddr(s,size,*dcp->conf->types); // Read pc address of hash
s >> hex >> hash; // Parse the hash value
dcp->fd->getScopeLocal()->addConvertSymbol(format, value, addr, hash);
}
/// \class IfcPrintdisasm
/// \brief Print disassembly of a memory range: `disassemble [<address1> <address2>]`
///

View file

@ -178,6 +178,11 @@ public:
virtual void execute(istream &s);
};
class IfcMapconvert : public IfaceDecompCommand {
public:
virtual void execute(istream &s);
};
class IfcPrintdisasm : public IfaceDecompCommand {
public:
virtual void execute(istream &s);

View file

@ -1099,7 +1099,7 @@ void PrintC::push_integer(uintb val,int4 sz,bool sign,
displayFormat = sym->getDisplayFormat();
}
}
if (sign) { // Print the constant as signed
if (sign && displayFormat != Symbol::force_char) { // Print the constant as signed
uintb mask = calc_mask(sz);
uintb flip = val^mask;
print_negsign = (flip < val);
@ -1136,21 +1136,14 @@ void PrintC::push_integer(uintb val,int4 sz,bool sign,
else if (displayFormat == Symbol::force_oct)
t << oct << '0' << val;
else if (displayFormat == Symbol::force_char) {
int4 internalSize = 4;
if (val < 256)
internalSize = 1;
else if (val < 65536)
internalSize = 2;
if ((internalSize==1)&&((val<7)||(val>0x7e)||((val>13)&&(val<0x20)))) { // not a good character constant
t << dec << val; // Just emit as decimal
}
else {
if (doEmitWideCharPrefix() && internalSize > 1)
t << 'L'; // Print symbol indicating wide character
t << '\''; // char is surrounded with single quotes
if (doEmitWideCharPrefix() && sz > 1)
t << 'L'; // Print symbol indicating wide character
t << '\''; // char is surrounded with single quotes
if (sz == 1 && val >= 0x80)
printCharHexEscape(t,(int4)val);
else
printUnicode(t,(int4)val);
t << '\'';
}
t << '\'';
}
else { // Must be Symbol::force_bin
t << "0b";
@ -1270,15 +1263,8 @@ void PrintC::printUnicode(ostream &s,int4 onechar) const
s << "\\\'";
return;
}
// Generic unicode escape
if (onechar < 256) {
s << "\\x" << setfill('0') << setw(2) << hex << onechar;
}
else if (onechar < 65536) {
s << "\\x" << setfill('0') << setw(4) << hex << onechar;
}
else
s << "\\x" << setfill('0') << setw(8) << hex << onechar;
// Generic escape code
printCharHexEscape(s, onechar);
return;
}
StringManager::writeUtf8(s, onechar); // emit normally
@ -1321,6 +1307,22 @@ bool PrintC::doEmitWideCharPrefix(void) const
return true;
}
/// Print the given value using the standard character hexadecimal escape sequence.
/// \param s is the stream to write to
/// \param val is the given value
void PrintC::printCharHexEscape(ostream &s,int4 val)
{
if (val < 256) {
s << "\\x" << setfill('0') << setw(2) << hex << val;
}
else if (val < 65536) {
s << "\\x" << setfill('0') << setw(4) << hex << val;
}
else
s << "\\x" << setfill('0') << setw(8) << hex << val;
}
/// \brief Print a quoted (unicode) string at the given address.
///
/// Data for the string is obtained directly from the LoadImage. The bytes are checked
@ -1397,28 +1399,50 @@ void PrintC::resetDefaultsPrintC(void)
/// \param ct is data-type attached to the value
/// \param vn is the Varnode holding the value
/// \param op is the PcodeOp using the value
void PrintC::pushCharConstant(uintb val,const TypeChar *ct,const Varnode *vn,const PcodeOp *op)
void PrintC::pushCharConstant(uintb val,const Datatype *ct,const Varnode *vn,const PcodeOp *op)
{
ostringstream t;
uint4 displayFormat = 0;
bool isSigned = (ct->getMetatype() == TYPE_INT);
if ((vn != (const Varnode *)0)&&(!vn->isAnnotation())) {
Symbol *sym = vn->getHigh()->getSymbol();
if (sym != (Symbol *)0) {
if (sym->isNameLocked() && (sym->getCategory() == 1)) {
if (pushEquate(val,vn->getSize(),(EquateSymbol *)sym,vn,op))
return;
}
displayFormat = sym->getDisplayFormat();
if (displayFormat == Symbol::force_bin || displayFormat == Symbol::force_dec || displayFormat == Symbol::force_oct) {
push_integer(val, ct->getSize(), isSigned, vn, op);
return;
}
}
}
if ((ct->getSize()==1)&&(val >= 0x80)) {
// For byte characters, the encoding is assumed to be ASCII, UTF-8, or some other
// code-page that extends ASCII. At 0x80 and above, we cannot treat the value as a
// unicode code-point. Its either part of a multi-byte UTF-8 encoding or an unknown
// code-page value. In either case, we print it as an integer.
push_integer(val,1,true,vn,op);
// code-page value. In either case, we print as an integer or an escape sequence.
if (displayFormat != Symbol::force_hex && displayFormat != Symbol::force_char) {
push_integer(val, 1, isSigned, vn, op);
return;
}
displayFormat = Symbol::force_hex; // Fallthru but force a hex representation
}
else {
// From here we assume, the constant value is a direct unicode code-point.
// The value could be an illegal code-point (surrogates or beyond the max code-point),
// but this will just be emitted as an escape sequence.
if (doEmitWideCharPrefix() && ct->getSize() > 1)
t << 'L'; // Print symbol indicating wide character
t << '\''; // char is surrounded with single quotes
ostringstream t;
// From here we assume, the constant value is a direct unicode code-point.
// The value could be an illegal code-point (surrogates or beyond the max code-point),
// but this will just be emitted as an escape sequence.
if (doEmitWideCharPrefix() && ct->getSize() > 1)
t << 'L'; // Print symbol indicating wide character
t << '\''; // char is surrounded with single quotes
if (displayFormat == Symbol::force_hex) {
printCharHexEscape(t,(int4)val);
}
else
printUnicode(t,(int4)val);
t << '\'';
pushAtom(Atom(t.str(),vartoken,EmitXml::const_color,op,vn));
}
t << '\'';
pushAtom(Atom(t.str(),vartoken,EmitXml::const_color,op,vn));
}
/// \brief Push an enumerated value to the RPN stack

View file

@ -131,7 +131,7 @@ protected:
virtual void pushTypeEnd(const Datatype *ct); ///< Push the tail ends of a data-type declaration onto the RPN stack
void pushBoolConstant(uintb val,const TypeBase *ct,const Varnode *vn,
const PcodeOp *op);
void pushCharConstant(uintb val,const TypeChar *ct,const Varnode *vn,
void pushCharConstant(uintb val,const Datatype *ct,const Varnode *vn,
const PcodeOp *op);
void pushEnumConstant(uintb val,const TypeEnum *ct,const Varnode *vn,
const PcodeOp *op);
@ -162,6 +162,7 @@ protected:
void opFunc(const PcodeOp *op); ///< Push a \e functional expression based on the given p-code op to the RPN stack
void opTypeCast(const PcodeOp *op); ///< Push the given p-code op using type-cast syntax to the RPN stack
void opHiddenFunc(const PcodeOp *op); ///< Push the given p-code op as a hidden token
static void printCharHexEscape(ostream &s,int4 val); ///< Print value as an escaped hex sequence
bool printCharacterConstant(ostream &s,const Address &addr,Datatype *charType) const;
int4 getHiddenThisSlot(const PcodeOp *op,FuncProto *fc); ///< Get position of "this" pointer needing to be hidden
void resetDefaultsPrintC(void); ///< Set default values for options specific to PrintC

View file

@ -431,7 +431,7 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint)
if (codepoint > 0xa0) { // Printable codepoints A1-FF
return false;
}
return true;
return true; // Delete + C1 Control characters
}
if (codepoint >= 0x2fa20) { // Up to last currently defined language
return true;
@ -483,6 +483,8 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint)
return true; // zero width non-breaking space
}
if (codepoint >= 0xfff0 && codepoint <= 0xffff) {
if ((codepoint == 0xfffc || codepoint == 0xfffd))
return false;
return true; // interlinear specials
}
return false;
@ -772,11 +774,11 @@ void PrintLanguage::formatBinary(ostream &s,uintb val)
s << '0';
return;
}
else if (pos < 7)
else if (pos <= 7)
pos = 7;
else if (pos < 15)
else if (pos <= 15)
pos = 15;
else if (pos < 31)
else if (pos <= 31)
pos = 31;
else
pos = 63;

View file

@ -2464,6 +2464,7 @@ int4 RuleZextEliminate::applyOp(PcodeOp *op,Funcdata &data)
val = vn2->getOffset();
if ((val>>(8*smallsize))==0) { // Is zero extension unnecessary
newvn = data.newConstant(smallsize,val);
newvn->copySymbolIfValid(vn2);
data.opSetInput(op,zext->getIn(0),zextslot);
data.opSetInput(op,newvn,otherslot);
return 1;
@ -3696,7 +3697,9 @@ int4 RuleXorCollapse::applyOp(PcodeOp *op,Funcdata &data)
}
coeff2 = xorvn->getOffset();
if (coeff2 == 0) return 0;
data.opSetInput(op,data.newConstant(op->getIn(1)->getSize(),coeff1^coeff2),1);
Varnode *constvn = data.newConstant(op->getIn(1)->getSize(),coeff1^coeff2);
constvn->copySymbolIfValid(xorvn);
data.opSetInput(op,constvn,1);
data.opSetInput(op,xorop->getIn(0),0);
return 1;
}
@ -3757,6 +3760,10 @@ int4 RuleAddMultCollapse::applyOp(PcodeOp *op,Funcdata &data)
uintb val = op->getOpcode()->evaluateBinary(c[0]->getSize(),c[0]->getSize(),c[0]->getOffset(),c[1]->getOffset());
newvn = data.newConstant(c[0]->getSize(),val);
if (c[0]->getSymbolEntry() != (SymbolEntry *)0)
newvn->copySymbolIfValid(c[0]);
else if (c[1]->getSymbolEntry() != (SymbolEntry *)0)
newvn->copySymbolIfValid(c[1]);
PcodeOp *newop = data.newOp(2,op->getAddr());
data.opSetOpcode(newop,CPUI_INT_ADD);
Varnode *newout = data.newUniqueOut(c[0]->getSize(),newop);
@ -3774,6 +3781,10 @@ int4 RuleAddMultCollapse::applyOp(PcodeOp *op,Funcdata &data)
uintb val = op->getOpcode()->evaluateBinary(c[0]->getSize(),c[0]->getSize(),c[0]->getOffset(),c[1]->getOffset());
newvn = data.newConstant(c[0]->getSize(),val);
if (c[0]->getSymbolEntry() != (SymbolEntry *)0)
newvn->copySymbolIfValid(c[0]);
else if (c[1]->getSymbolEntry() != (SymbolEntry *)0)
newvn->copySymbolIfValid(c[1]);
data.opSetInput(op,newvn,1); // Replace c[0] with c[0]+c[1] or c[0]*c[1]
data.opSetInput(op,sub2,0); // Replace sub with sub2
return 1;

View file

@ -83,7 +83,7 @@ SubvariableFlow::ReplaceVarnode *SubvariableFlow::setReplacement(Varnode *vn,uin
if (sextval != cval)
return (ReplaceVarnode *)0;
}
return addConstant((ReplaceOp *)0,mask,0,vn->getOffset());
return addConstant((ReplaceOp *)0,mask,0,vn);
}
if (vn->isFree())
@ -628,7 +628,7 @@ bool SubvariableFlow::traceBackward(ReplaceVarnode *rvn)
sa = doesAndClear(op,rvn->mask);
if (sa != -1) {
rop = createOp(CPUI_COPY,1,rvn);
addConstant(rop,rvn->mask,0,op->getIn(sa)->getOffset());
addConstant(rop,rvn->mask,0,op->getIn(sa));
}
else {
rop = createOp(CPUI_INT_AND,2,rvn);
@ -640,7 +640,7 @@ bool SubvariableFlow::traceBackward(ReplaceVarnode *rvn)
sa = doesOrSet(op,rvn->mask);
if (sa != -1) {
rop = createOp(CPUI_COPY,1,rvn);
addConstant(rop,rvn->mask,0,op->getIn(sa)->getOffset());
addConstant(rop,rvn->mask,0,op->getIn(sa));
}
else {
rop = createOp(CPUI_INT_OR,2,rvn);
@ -676,7 +676,7 @@ bool SubvariableFlow::traceBackward(ReplaceVarnode *rvn)
newmask = rvn->mask >> sa; // What mask looks like before shift
if (newmask == 0) { // Subvariable filled with shifted zero
rop = createOp(CPUI_COPY,1,rvn);
addConstant(rop,rvn->mask,0,(uintb)0);
addNewConstant(rop,0,(uintb)0);
return true;
}
if ((newmask<<sa) != rvn->mask)
@ -690,7 +690,7 @@ bool SubvariableFlow::traceBackward(ReplaceVarnode *rvn)
newmask = (rvn->mask << sa) & calc_mask(op->getIn(0)->getSize());
if (newmask == 0) { // Subvariable filled with shifted zero
rop = createOp(CPUI_COPY,1,rvn);
addConstant(rop,rvn->mask,0,(uintb)0);
addNewConstant(rop,0,(uintb)0);
return true;
}
if ((newmask>>sa) != rvn->mask)
@ -772,7 +772,7 @@ bool SubvariableFlow::traceBackward(ReplaceVarnode *rvn)
if ((rvn->mask&1)==1) break; // Not normal variable flow
// Variable is filled with zero
rop = createOp(CPUI_COPY,1,rvn);
addConstant(rop,rvn->mask,0,(uintb)0);
addNewConstant(rop,0,(uintb)0);
return true;
default:
break; // Everything else we abort
@ -825,7 +825,7 @@ bool SubvariableFlow::traceForwardSext(ReplaceVarnode *rvn)
if (!op->getIn(1)->isConstant()) return false; // Right now we only deal with constant shifts
rop = createOpDown(CPUI_INT_SRIGHT,2,op,rvn,0);
if (!createLink(rop,rvn->mask,-1,outvn)) return false; // Keep the same mask size
addConstant(rop,calc_mask(op->getIn(1)->getSize()),1,op->getIn(1)->getOffset()); // Preserve the shift amount
addConstant(rop,calc_mask(op->getIn(1)->getSize()),1,op->getIn(1)); // Preserve the shift amount
hcount += 1;
break;
case CPUI_SUBPIECE:
@ -916,7 +916,7 @@ bool SubvariableFlow::traceBackwardSext(ReplaceVarnode *rvn)
rop = createOp(CPUI_INT_SRIGHT,2,rvn);
if (!createLink(rop,rvn->mask,0,op->getIn(0))) return false; // Keep the same mask
if (rop->input.size()==1)
addConstant(rop,calc_mask(op->getIn(1)->getSize()),1,op->getIn(1)->getOffset()); // Preserve the shift amount
addConstant(rop,calc_mask(op->getIn(1)->getSize()),1,op->getIn(1)); // Preserve the shift amount
return true;
case CPUI_CALL:
case CPUI_CALLIND:
@ -993,23 +993,40 @@ bool SubvariableFlow::createCompareBridge(PcodeOp *op,ReplaceVarnode *inrvn,int4
/// \brief Add a constant variable node to the logical subgraph
///
/// Unlike other subgraph variable nodes, this one does not maintain a mirror with the original containing Varnode.
/// \param rop is the logical operation taking the constant as input
/// \param mask is the set of bits holding the logical value (within a bigger value)
/// \param slot is the input slot to the operation
/// \param val is the bigger constant value holding the logical value
/// \param constvn is the original constant
SubvariableFlow::ReplaceVarnode *SubvariableFlow::addConstant(ReplaceOp *rop,uintb mask,
uint4 slot,uintb val)
{ // Add a constant to the replacement tree
uint4 slot,Varnode *constvn)
{
newvarlist.emplace_back();
ReplaceVarnode *res = &newvarlist.back();
res->vn = (Varnode *)0;
res->vn = constvn;
res->replacement = (Varnode *)0;
res->mask = mask;
// Calculate the actual constant value
int4 sa = leastsigbit_set(mask);
res->val = (mask & val) >> sa;
res->val = (mask & constvn->getOffset()) >> sa;
res->def = (ReplaceOp *)0;
if (rop != (ReplaceOp *)0) {
while(rop->input.size() <= slot)
rop->input.push_back((ReplaceVarnode *)0);
rop->input[slot] = res;
}
return res;
}
SubvariableFlow::ReplaceVarnode *SubvariableFlow::addNewConstant(ReplaceOp *rop,uint4 slot,uintb val)
{
newvarlist.emplace_back();
ReplaceVarnode *res = &newvarlist.back();
res->vn = (Varnode *)0;
res->replacement = (Varnode *)0;
res->mask = 0;
res->val = val;
res->def = (ReplaceOp *)0;
if (rop != (ReplaceOp *)0) {
while(rop->input.size() <= slot)
@ -1214,13 +1231,17 @@ Varnode *SubvariableFlow::getReplaceVarnode(ReplaceVarnode *rvn)
{
if (rvn->replacement != (Varnode *)0)
return rvn->replacement;
// Only a constant if BOTH replacement and vn fields are null
if (rvn->vn == (Varnode *)0) {
if (rvn->def==(ReplaceOp *)0) // A constant
if (rvn->def==(ReplaceOp *)0) // A constant that did not come from an original Varnode
return fd->newConstant(flowsize,rvn->val);
rvn->replacement = fd->newUnique(flowsize);
return rvn->replacement;
}
if (rvn->vn->isConstant()) {
Varnode *newVn = fd->newConstant(flowsize,rvn->val);
newVn->copySymbolIfValid(rvn->vn);
return newVn;
}
bool isinput = rvn->vn->isInput();
if (useSameAddress(rvn)) {

View file

@ -111,7 +111,8 @@ class SubvariableFlow {
void addBooleanPatch(PcodeOp *pullop,ReplaceVarnode *rvn,int4 slot);
void addSuggestedPatch(ReplaceVarnode *rvn,PcodeOp *pushop,int4 sa);
void addComparePatch(ReplaceVarnode *in1,ReplaceVarnode *in2,PcodeOp *op);
ReplaceVarnode *addConstant(ReplaceOp *rop,uintb mask,uint4 slot,uintb val);
ReplaceVarnode *addConstant(ReplaceOp *rop,uintb mask,uint4 slot,Varnode *constvn);
ReplaceVarnode *addNewConstant(ReplaceOp *rop,uint4 slot,uintb val);
void createNewOut(ReplaceOp *rop,uintb mask);
void replaceInput(ReplaceVarnode *rvn);
bool useSameAddress(ReplaceVarnode *rvn);