diff --git a/Ghidra/Features/Decompiler/buildNatives.gradle b/Ghidra/Features/Decompiler/buildNatives.gradle index a05736f686..da6bb3a0ff 100644 --- a/Ghidra/Features/Decompiler/buildNatives.gradle +++ b/Ghidra/Features/Decompiler/buildNatives.gradle @@ -86,6 +86,7 @@ model { include "funcdata.cc" include "funcdata_block.cc" include "funcdata_varnode.cc" + include "unionresolve.cc" include "funcdata_op.cc" include "pcodeinject.cc" include "heritage.cc" diff --git a/Ghidra/Features/Decompiler/certification.manifest b/Ghidra/Features/Decompiler/certification.manifest index b633ca91dd..37ce688c67 100644 --- a/Ghidra/Features/Decompiler/certification.manifest +++ b/Ghidra/Features/Decompiler/certification.manifest @@ -21,6 +21,7 @@ src/decompile/datatests/forloop_loaditer.xml||GHIDRA||||END| src/decompile/datatests/forloop_thruspecial.xml||GHIDRA||||END| src/decompile/datatests/forloop_varused.xml||GHIDRA||||END| src/decompile/datatests/forloop_withskip.xml||GHIDRA||||END| +src/decompile/datatests/impliedfield.xml||GHIDRA||||END| src/decompile/datatests/indproto.xml||GHIDRA||||END| src/decompile/datatests/loopcomment.xml||GHIDRA||||END| src/decompile/datatests/multiret.xml||GHIDRA||||END| @@ -38,6 +39,7 @@ src/decompile/datatests/readvolatile.xml||GHIDRA||||END| src/decompile/datatests/sbyte.xml||GHIDRA||||END| src/decompile/datatests/threedim.xml||GHIDRA||||END| src/decompile/datatests/twodim.xml||GHIDRA||||END| +src/decompile/datatests/union_datatype.xml||GHIDRA||||END| src/decompile/datatests/wayoffarray.xml||GHIDRA||||END| src/main/doc/commonprofile.xsl||GHIDRA||||END| src/main/doc/cspec.xml||GHIDRA||||END| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile b/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile index 5d6e5643ba..6b22889fd7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile @@ -82,7 +82,7 @@ CORE= xml space float address pcoderaw translate opcodes globalcontext DECCORE=capability architecture options graph cover block cast typeop database cpool \ comment stringmanage fspec action loadimage grammar varnode op \ type variable varmap jumptable emulate emulateutil flow userop \ - funcdata funcdata_block funcdata_op funcdata_varnode pcodeinject \ + funcdata funcdata_block funcdata_op funcdata_varnode unionresolve pcodeinject \ heritage prefersplit rangeutil ruleaction subflow blockaction merge double \ transform coreaction condexe override dynamic crc32 prettyprint \ printlanguage printc printjava memstate opbehavior paramid $(COREEXT_NAMES) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc index 985e3e97db..b57fda99dd 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc @@ -3375,7 +3375,7 @@ const Datatype *BlockSwitch::getSwitchType(void) const { PcodeOp *op = jump->getIndirectOp(); - return op->getIn(0)->getHigh()->getType(); + return op->getIn(0)->getHighTypeReadFacing(op); } void BlockSwitch::markUnstructured(void) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/cast.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/cast.cc index 5a199686d8..daf4bdf883 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/cast.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/cast.cc @@ -58,10 +58,10 @@ bool CastStrategyC::checkIntPromotionForExtension(const PcodeOp *op) const return true; // Otherwise we need a cast before we extend } -int4 CastStrategyC::localExtensionType(const Varnode *vn) const +int4 CastStrategyC::localExtensionType(const Varnode *vn,const PcodeOp *op) const { - type_metatype meta = vn->getHigh()->getType()->getMetatype(); + type_metatype meta = vn->getHighTypeReadFacing(op)->getMetatype(); int4 natural; // 1= natural zero extension, 2= natural sign extension if ((meta == TYPE_UINT)||(meta == TYPE_BOOL)||(meta == TYPE_UNKNOWN)) natural = UNSIGNED_EXTENSION; @@ -78,14 +78,14 @@ int4 CastStrategyC::localExtensionType(const Varnode *vn) const return natural; if (!vn->isWritten()) return UNKNOWN_PROMOTION; - const PcodeOp *op = vn->getDef(); - if (op->isBoolOutput()) + const PcodeOp *defOp = vn->getDef(); + if (defOp->isBoolOutput()) return EITHER_EXTENSION; - OpCode opc = op->code(); - if ((opc == CPUI_CAST)||(opc == CPUI_LOAD)||op->isCall()) + OpCode opc = defOp->code(); + if ((opc == CPUI_CAST)||(opc == CPUI_LOAD)||defOp->isCall()) return natural; if (opc == CPUI_INT_AND) { // This is kind of recursing - const Varnode *tmpvn = op->getIn(1); + const Varnode *tmpvn = defOp->getIn(1); if (tmpvn->isConstant()) { if (!signbit_negative(tmpvn->getOffset(),tmpvn->getSize())) return EITHER_EXTENSION; @@ -102,7 +102,7 @@ int4 CastStrategyC::intPromotionType(const Varnode *vn) const if (vn->getSize() >= promoteSize) return NO_PROMOTION; if (vn->isConstant()) - return localExtensionType(vn); + return localExtensionType(vn,vn->loneDescend()); if (vn->isExplicit()) return NO_PROMOTION; if (!vn->isWritten()) return UNKNOWN_PROMOTION; @@ -111,21 +111,21 @@ int4 CastStrategyC::intPromotionType(const Varnode *vn) const switch(op->code()) { case CPUI_INT_AND: othervn = op->getIn(1); - if ((localExtensionType(othervn) & UNSIGNED_EXTENSION) != 0) + if ((localExtensionType(othervn,op) & UNSIGNED_EXTENSION) != 0) return UNSIGNED_EXTENSION; othervn = op->getIn(0); - if ((localExtensionType(othervn) & UNSIGNED_EXTENSION) != 0) + if ((localExtensionType(othervn,op) & UNSIGNED_EXTENSION) != 0) return UNSIGNED_EXTENSION; // If either side has zero extension, result has zero extension break; case CPUI_INT_RIGHT: othervn = op->getIn(0); - val = localExtensionType(othervn); + val = localExtensionType(othervn,op); if ((val & UNSIGNED_EXTENSION) != 0) // If the input provably zero extends return val; // then the result is a zero extension (plus possibly a sign extension) break; case CPUI_INT_SRIGHT: othervn = op->getIn(0); - val = localExtensionType(othervn); + val = localExtensionType(othervn,op); if ((val & SIGNED_EXTENSION) != 0) // If input can be construed as a sign-extension return val; // then the result is a sign extension (plus possibly a zero extension) break; @@ -134,25 +134,25 @@ int4 CastStrategyC::intPromotionType(const Varnode *vn) const case CPUI_INT_DIV: case CPUI_INT_REM: othervn = op->getIn(0); - if ((localExtensionType(othervn) & UNSIGNED_EXTENSION) == 0) + if ((localExtensionType(othervn,op) & UNSIGNED_EXTENSION) == 0) return UNKNOWN_PROMOTION; othervn = op->getIn(1); - if ((localExtensionType(othervn) & UNSIGNED_EXTENSION) == 0) + if ((localExtensionType(othervn,op) & UNSIGNED_EXTENSION) == 0) return UNKNOWN_PROMOTION; return UNSIGNED_EXTENSION; // If both sides have zero extension, result has zero extension case CPUI_INT_SDIV: case CPUI_INT_SREM: othervn = op->getIn(0); - if ((localExtensionType(othervn) & SIGNED_EXTENSION) == 0) + if ((localExtensionType(othervn,op) & SIGNED_EXTENSION) == 0) return UNKNOWN_PROMOTION; othervn = op->getIn(1); - if ((localExtensionType(othervn) & SIGNED_EXTENSION) == 0) + if ((localExtensionType(othervn,op) & SIGNED_EXTENSION) == 0) return UNKNOWN_PROMOTION; return SIGNED_EXTENSION; // If both sides have sign extension, result has sign extension case CPUI_INT_NEGATE: case CPUI_INT_2COMP: othervn = op->getIn(0); - if ((localExtensionType(othervn) & SIGNED_EXTENSION) != 0) + if ((localExtensionType(othervn,op) & SIGNED_EXTENSION) != 0) return SIGNED_EXTENSION; break; case CPUI_INT_ADD: @@ -176,7 +176,7 @@ bool CastStrategyC::isExtensionCastImplied(const PcodeOp *op,const PcodeOp *read else { if (readOp == (PcodeOp *) 0) return false; - type_metatype metatype = outVn->getHigh()->getType()->getMetatype(); + type_metatype metatype = outVn->getHighTypeReadFacing(readOp)->getMetatype(); const Varnode *otherVn; int4 slot; switch (readOp->code()) { @@ -206,7 +206,7 @@ bool CastStrategyC::isExtensionCastImplied(const PcodeOp *op,const PcodeOp *read } else if (!otherVn->isExplicit()) return false; - if (otherVn->getHigh()->getType()->getMetatype() != metatype) + if (otherVn->getHighTypeReadFacing(readOp)->getMetatype() != metatype) return false; break; default: @@ -298,13 +298,13 @@ Datatype *CastStrategyC::castStandard(Datatype *reqtype,Datatype *curtype, Datatype *CastStrategyC::arithmeticOutputStandard(const PcodeOp *op) { - Datatype *res1 = op->getIn(0)->getHigh()->getType(); + Datatype *res1 = op->getIn(0)->getHighTypeReadFacing(op); if (res1->getMetatype() == TYPE_BOOL) // Treat boolean as if it is cast to an integer res1 = tlst->getBase(res1->getSize(),TYPE_INT); Datatype *res2; for(int4 i=1;inumInput();++i) { - res2 = op->getIn(i)->getHigh()->getType(); + res2 = op->getIn(i)->getHighTypeReadFacing(op); if (res2->getMetatype() == TYPE_BOOL) continue; if (0>res2->typeOrder(*res1)) res1 = res2; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh index e6a8e25062..c4b1c2d3c7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/cast.hh @@ -61,8 +61,9 @@ public: /// \brief Decide on integer promotion by examining just local properties of the given Varnode /// /// \param vn is the given Varnode + /// \param op is the PcodeOp reading the Varnode /// \return an IntPromotionCode (excluding NO_PROMOTION) - virtual int4 localExtensionType(const Varnode *vn) const=0; + virtual int4 localExtensionType(const Varnode *vn,const PcodeOp *op) const=0; /// \brief Calculate the integer promotion code of a given Varnode /// @@ -157,7 +158,7 @@ public: /// \brief Casting strategies that are specific to the C language class CastStrategyC : public CastStrategy { public: - virtual int4 localExtensionType(const Varnode *vn) const; + virtual int4 localExtensionType(const Varnode *vn,const PcodeOp *op) const; virtual int4 intPromotionType(const Varnode *vn) const; virtual bool checkIntPromotionForCompare(const PcodeOp *op,int4 slot) const; virtual bool checkIntPromotionForExtension(const PcodeOp *op) const; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/consolemain.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/consolemain.cc index 576dccbe74..23932bd539 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/consolemain.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/consolemain.cc @@ -212,11 +212,17 @@ int main(int argc,char **argv) status->registerCom(new IfcRestore(),"restore"); if (initscript != (const char *)0) { - status->pushScript(initscript,"init> "); - status->setErrorIsDone(true); + try { + status->setErrorIsDone(true); + status->pushScript(initscript,"init> "); + } catch(IfaceParseError &err) { + *status->optr << err.explain << endl; + status->done = true; + } } - mainloop(status); + if (!status->done) + mainloop(status); int4 retval = status->isInError() ? 1 : 0; #ifdef CPUI_STATISTICS diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index ae0b1c3a3c..2c2ef1c936 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -1029,7 +1029,7 @@ SymbolEntry *ActionConstantPtr::isPointer(AddrSpace *spc,Varnode *vn,PcodeOp *op bool needexacthit; Architecture *glb = data.getArch(); Varnode *outvn; - if (vn->getType()->getMetatype() == TYPE_PTR) { // Are we explicitly marked as a pointer + if (vn->getTypeReadFacing(op)->getMetatype() == TYPE_PTR) { // Are we explicitly marked as a pointer rampoint = glb->resolveConstant(spc,vn->getOffset(),vn->getSize(),op->getAddr(),fullEncoding); needexacthit = false; } @@ -1057,9 +1057,9 @@ SymbolEntry *ActionConstantPtr::isPointer(AddrSpace *spc,Varnode *vn,PcodeOp *op break; case CPUI_INT_ADD: outvn = op->getOut(); - if (outvn->getType()->getMetatype()==TYPE_PTR) { + if (outvn->getTypeDefFacing()->getMetatype()==TYPE_PTR) { // Is there another pointer base in this expression - if (op->getIn(1-slot)->getType()->getMetatype()==TYPE_PTR) + if (op->getIn(1-slot)->getTypeReadFacing(op)->getMetatype()==TYPE_PTR) return (SymbolEntry *)0; // If so, we are not a pointer // FIXME: need to fully explore additive tree needexacthit = false; @@ -1197,7 +1197,7 @@ int4 ActionDeindirect::apply(Funcdata &data) } if (data.isTypeRecoveryOn()) { // Check for a function pointer that has an attached prototype - Datatype *ct = op->getIn(0)->getType(); + Datatype *ct = op->getIn(0)->getTypeReadFacing(op); if ((ct->getMetatype()==TYPE_PTR)&& (((TypePointer *)ct)->getPtrTo()->getMetatype()==TYPE_CODE)) { TypeCode *tc = (TypeCode *)((TypePointer *)ct)->getPtrTo(); @@ -2173,14 +2173,15 @@ int4 ActionDefaultParams::apply(Funcdata &data) /// the data-type as a pointer to the structure's first field will get it to match the /// desired data-type. /// \param vn is the given Varnode +/// \param op is the PcodeOp reading the Varnode /// \param ct is the desired data-type /// \param castStrategy is used to determine if the data-types are compatible /// \return \b true if a pointer to the first field makes sense -bool ActionSetCasts::testStructOffset0(Varnode *vn,Datatype *ct,CastStrategy *castStrategy) +bool ActionSetCasts::testStructOffset0(Varnode *vn,PcodeOp *op,Datatype *ct,CastStrategy *castStrategy) { if (ct->getMetatype() != TYPE_PTR) return false; - Datatype *highType = vn->getHigh()->getType(); + Datatype *highType = vn->getHighTypeReadFacing(op); if (highType->getMetatype() != TYPE_PTR) return false; Datatype *highPtrTo = ((TypePointer *)highType)->getPtrTo(); if (highPtrTo->getMetatype() != TYPE_STRUCT) return false; @@ -2197,6 +2198,52 @@ bool ActionSetCasts::testStructOffset0(Varnode *vn,Datatype *ct,CastStrategy *ca return (castStrategy->castStandard(reqtype, curtype, true, true) == (Datatype *)0); } +/// \brief Try to adjust the input and output Varnodes to eliminate a CAST +/// +/// If input/output data-types are different, it may be due to late merges. For +/// unions, the CAST can sometimes be eliminated by adjusting the data-type resolutions +/// of the Varnodes relative to the PcodeOp +/// \param op is the PcodeOp reading the input Varnode and writing the output Varnode +/// \param slot is the index of the input Varnode +/// \param data is the function +/// \return \b true if an adjustment is made so that a CAST is no longer needed +bool ActionSetCasts::tryResolutionAdjustment(PcodeOp *op,int4 slot,Funcdata &data) + +{ + Varnode *outvn = op->getOut(); + if (outvn == (Varnode *)0) + return false; + Datatype *outType = outvn->getHigh()->getType(); + Datatype *inType = op->getIn(slot)->getHigh()->getType(); + if (!inType->needsResolution() && !outType->needsResolution()) return false; + int4 inResolve = -1; + int4 outResolve = -1; + if (inType->needsResolution()) { + inResolve = inType->findCompatibleResolve(outType); + if (inResolve < 0) return false; + } + if (outType->needsResolution()) { + if (inResolve >= 0) + outResolve = outType->findCompatibleResolve(inType->getDepend(inResolve)); + else + outResolve = outType->findCompatibleResolve(inType); + if (outResolve < 0) return false; + } + + TypeFactory *typegrp = data.getArch()->types; + if (inType->needsResolution()) { + ResolvedUnion resolve(inType,inResolve,*typegrp); + if (!data.setUnionField(inType, op, slot, resolve)) + return false; + } + if (outType->needsResolution()) { + ResolvedUnion resolve(outType,outResolve,*typegrp); + if (!data.setUnionField(outType, op, -1, resolve)) + return false; + } + return true; +} + /// \brief Test if two data-types are operation identical /// /// If, at a source code level, a variable with data-type \b ct1 can be @@ -2219,6 +2266,42 @@ bool ActionSetCasts::isOpIdentical(Datatype *ct1,Datatype *ct2) return (ct1 == ct2); } +/// \brief If the given op reads a pointer to a union, insert the CPUI_PTRSUB that resolves the union +/// +/// \param op is the given PcodeOp +/// \param slot is index of the input slot being read +/// \param data is the containing function +/// \return 1 if a PTRSUB is inserted, 0 otherwise +int4 ActionSetCasts::resolveUnion(PcodeOp *op,int4 slot,Funcdata &data) + +{ + Varnode *vn = op->getIn(slot); + if (vn->isAnnotation()) return 0; + Datatype *dt = vn->getHigh()->getType(); + if (!dt->needsResolution()) + return 0; + const ResolvedUnion *resUnion = data.getUnionField(dt, op,slot); + if (resUnion != (ResolvedUnion*)0 && resUnion->getFieldNum() >= 0) { + // Insert specific placeholder indicating which field is accessed + if (dt->getMetatype() == TYPE_PTR) { + PcodeOp *ptrsub = insertPtrsubZero(op,slot,resUnion->getDatatype(),data); + data.setUnionField(dt, ptrsub,-1,*resUnion); // Attach the resolution to the PTRSUB + } + else if (vn->isImplied()) { + if (vn->isWritten()) { + // If the writefacing and readfacing resolutions for vn (an implied variable) are the same, + // the resolutions are unnecessary and we treat the vn as if it had the field data-type + const ResolvedUnion *writeRes = data.getUnionField(dt, vn->getDef(), -1); + if (writeRes != (const ResolvedUnion *)0 && writeRes->getFieldNum() == resUnion->getFieldNum()) + return 0; // Don't print implied fields for vn + } + vn->setImpliedField(); + } + return 1; + } + return 0; +} + /// \brief Insert cast to output Varnode type after given PcodeOp if it is necessary /// /// \param op is the given PcodeOp @@ -2231,12 +2314,23 @@ int4 ActionSetCasts::castOutput(PcodeOp *op,Funcdata &data,CastStrategy *castStr Datatype *outct,*ct,*tokenct; Varnode *vn,*outvn; PcodeOp *newop; - HighVariable *outHigh; + Datatype *outHighType; bool force=false; tokenct = op->getOpcode()->getOutputToken(op,castStrategy); outvn = op->getOut(); - outHigh = outvn->getHigh(); + outHighType = outvn->getHigh()->getType(); + if (tokenct == outHighType) { + if (tokenct->needsResolution()) { + // operation copies directly to outvn AS a union + ResolvedUnion resolve(tokenct); + data.setUnionField(tokenct, op, -1, resolve); + } + // Short circuit more sophisticated casting tests. If they are the same type, there is no cast + return 0; + } + if (outHighType->needsResolution()) + outHighType = outHighType->findResolve(op, -1); // Finish fetching DefFacing data-type if (outvn->isImplied()) { // implied varnode must have parse type if (outvn->isTypeLock()) { @@ -2244,26 +2338,30 @@ int4 ActionSetCasts::castOutput(PcodeOp *op,Funcdata &data,CastStrategy *castStr // The Varnode input to a CPUI_RETURN is marked as implied but // casting should act as if it were explicit if (outOp == (PcodeOp *)0 || outOp->code() != CPUI_RETURN) { - force = !isOpIdentical(outHigh->getType(), tokenct); + force = !isOpIdentical(outHighType, tokenct); } } - else if (outHigh->getType()->getMetatype() != TYPE_PTR) // If implied varnode has an atomic (non-pointer) type + else if (outHighType->getMetatype() != TYPE_PTR) { // If implied varnode has an atomic (non-pointer) type outvn->updateType(tokenct,false,false); // Ignore it in favor of the token type + outHighType = outvn->getHighTypeDefFacing(); + } else if (tokenct->getMetatype() == TYPE_PTR) { // If the token is a pointer AND implied varnode is pointer - outct = ((TypePointer *)outHigh->getType())->getPtrTo(); + outct = ((TypePointer *)outHighType)->getPtrTo(); type_metatype meta = outct->getMetatype(); // Preserve implied pointer if it points to a composite - if ((meta!=TYPE_ARRAY)&&(meta!=TYPE_STRUCT)) + if ((meta!=TYPE_ARRAY)&&(meta!=TYPE_STRUCT)&&(meta!=TYPE_UNION)) { outvn->updateType(tokenct,false,false); // Otherwise ignore it in favor of the token type + outHighType = outvn->getHighTypeDefFacing(); + } } } if (!force) { - outct = outHigh->getType(); // Type of result + outct = outHighType; // Type of result ct = castStrategy->castStandard(outct,tokenct,false,true); if (ct == (Datatype *)0) return 0; } // Generate the cast op - vn = data.newUnique(op->getOut()->getSize()); + vn = data.newUnique(outvn->getSize()); vn->updateType(tokenct,false,false); vn->setImplied(); newop = data.newOp(1,op->getAddr()); @@ -2271,13 +2369,44 @@ int4 ActionSetCasts::castOutput(PcodeOp *op,Funcdata &data,CastStrategy *castStr data.getArch()->stats->countCast(); #endif data.opSetOpcode(newop,CPUI_CAST); - data.opSetOutput(newop,op->getOut()); + data.opSetOutput(newop,outvn); data.opSetInput(newop,vn,0); data.opSetOutput(op,vn); data.opInsertAfter(newop,op); // Cast comes AFTER this operation + if (outHighType->needsResolution()) + data.forceFacingType(outHighType, -1, newop, -1); + return 1; } +/// \brief Insert a PTRSUB with offset 0 that accesses a field of the given data-type +/// +/// The data-type can be a structure, in which case the field at offset zero is being accessed. +/// The data-type can reference a union, in which case a specific field is being accessed +/// as indicated by Funcdata::getUnionField. The PTRSUB is inserted right before the given +/// PcodeOp. The indicated input Varnode becomes the PTRSUB input, and the PTRSUB output +/// replaces the Varnode in the PcodeOp. +/// \param op is the given PcodeOp where the PTRSUB is inserted +/// \param slot is the slot corresponding to the indicated Varnode +/// \param ct is the data-type produced by the PTRSUB +/// \param data is containing Function +/// \return the new PTRSUB op +PcodeOp *ActionSetCasts::insertPtrsubZero(PcodeOp *op,int4 slot,Datatype *ct,Funcdata &data) + +{ + Varnode *vn = op->getIn(slot); + PcodeOp *newop = data.newOp(2,op->getAddr()); + Varnode *vnout = data.newUniqueOut(vn->getSize(), newop); + vnout->updateType(ct,false,false); + vnout->setImplied(); + data.opSetOpcode(newop, CPUI_PTRSUB); + data.opSetInput(newop,vn,0); + data.opSetInput(newop,data.newConstant(4, 0),1); + data.opSetInput(op,vnout,slot); + data.opInsertBefore(newop,op); + return newop; +} + /// \brief Insert cast to produce the input Varnode to a given PcodeOp if necessary /// /// This method can also mark a Varnode as an explicit integer constant. @@ -2315,17 +2444,12 @@ int4 ActionSetCasts::castInput(PcodeOp *op,int4 slot,Funcdata &data,CastStrategy if (vn->getType() == ct) return 1; } - else if (testStructOffset0(vn, ct, castStrategy)) { + else if (testStructOffset0(vn, op, ct, castStrategy)) { // Insert a PTRSUB(vn,#0) instead of a CAST - newop = data.newOp(2,op->getAddr()); - vnout = data.newUniqueOut(vn->getSize(), newop); - vnout->updateType(ct,false,false); - vnout->setImplied(); - data.opSetOpcode(newop, CPUI_PTRSUB); - data.opSetInput(newop,vn,0); - data.opSetInput(newop,data.newConstant(4, 0),1); - data.opSetInput(op,vnout,slot); - data.opInsertBefore(newop,op); + insertPtrsubZero(op, slot, ct, data); + return 1; + } + else if (tryResolutionAdjustment(op, slot, data)) { return 1; } newop = data.newOp(1,op->getAddr()); @@ -2339,6 +2463,12 @@ int4 ActionSetCasts::castInput(PcodeOp *op,int4 slot,Funcdata &data,CastStrategy data.opSetInput(newop,vn,0); data.opSetInput(op,vnout,slot); data.opInsertBefore(newop,op); // Cast comes AFTER operation + if (ct->needsResolution()) { + data.forceFacingType(ct, -1, newop, -1); + } + if (vn->getHigh()->getType()->needsResolution()) { + data.inheritReadResolution(newop, 0, op, slot); + } return 1; } @@ -2362,12 +2492,12 @@ int4 ActionSetCasts::apply(Funcdata &data) if (opc == CPUI_CAST) continue; if (opc == CPUI_PTRADD) { // Check for PTRADD that no longer fits its pointer int4 sz = (int4)op->getIn(2)->getOffset(); - TypePointer *ct = (TypePointer *)op->getIn(0)->getHigh()->getType(); + TypePointer *ct = (TypePointer *)op->getIn(0)->getHighTypeReadFacing(op); if ((ct->getMetatype() != TYPE_PTR)||(ct->getPtrTo()->getSize() != AddrSpace::addressToByteInt(sz, ct->getWordSize()))) data.opUndoPtradd(op,true); } else if (opc == CPUI_PTRSUB) { // Check for PTRSUB that no longer fits pointer - if (!op->getIn(0)->getHigh()->getType()->isPtrsubMatching(op->getIn(1)->getOffset())) { + if (!op->getIn(0)->getHighTypeReadFacing(op)->isPtrsubMatching(op->getIn(1)->getOffset())) { if (op->getIn(1)->getOffset() == 0) { data.opRemoveInput(op, 1); data.opSetOpcode(op, CPUI_COPY); @@ -2376,17 +2506,20 @@ int4 ActionSetCasts::apply(Funcdata &data) data.opSetOpcode(op, CPUI_INT_ADD); } } - for(int4 i=0;inumInput();++i) // Do input casts first, as output may depend on input + // Do input casts first, as output may depend on input + for(int4 i=0;inumInput();++i) { + count += resolveUnion(op, i, data); count += castInput(op,i,data,castStrategy); + } if (opc == CPUI_LOAD) { - TypePointer *ptrtype = (TypePointer *)op->getIn(1)->getHigh()->getType(); + TypePointer *ptrtype = (TypePointer *)op->getIn(1)->getHighTypeReadFacing(op); int4 valsize = op->getOut()->getSize(); if ((ptrtype->getMetatype()!=TYPE_PTR)|| (ptrtype->getPtrTo()->getSize() != valsize)) data.warning("Load size is inaccurate",op->getAddr()); } else if (opc == CPUI_STORE) { - TypePointer *ptrtype = (TypePointer *)op->getIn(1)->getHigh()->getType(); + TypePointer *ptrtype = (TypePointer *)op->getIn(1)->getHighTypeReadFacing(op); int4 valsize = op->getIn(2)->getSize(); if ((ptrtype->getMetatype()!=TYPE_PTR)|| (ptrtype->getPtrTo()->getSize() != valsize)) @@ -4240,7 +4373,13 @@ void ActionInferTypes::buildLocaltypes(Funcdata &data) if (vn->isAnnotation()) continue; if ((!vn->isWritten())&&(vn->hasNoDescend())) continue; bool needsBlock = false; - ct = vn->getLocalType(needsBlock); + if (vn->getSymbolEntry() != (SymbolEntry *)0) { + ct = data.checkSymbolType(vn); + if (ct == (Datatype *)0) + ct = vn->getLocalType(needsBlock); + } + else + ct = vn->getLocalType(needsBlock); if (needsBlock) vn->setStopUpPropagation(); #ifdef TYPEPROP_DEBUG @@ -4273,200 +4412,6 @@ bool ActionInferTypes::writeBack(Funcdata &data) return change; } -/// Determine if the given data-type edge looks like a pointer -/// propagating through an "add a constant" operation. We assume the input -/// to the edge has a pointer data-type. This routine returns one the commands: -/// - 0 indicates this is "add a constant" adding a zero (PTRSUB or PTRADD) -/// - 1 indicates this is "add a constant" and the constant is passed back -/// - 2 indicating the pointer does not propagate through -/// - 3 the input data-type propagates through untransformed -/// -/// \param off passes back the constant offset if the command is '0' or '1' -/// \param op is the PcodeOp propagating the data-type -/// \param slot is the input edge being propagated -/// \param sz is the size of the data-type being pointed to -/// \return a command indicating how the op should be treated -int4 ActionInferTypes::propagateAddPointer(uintb &off,PcodeOp *op,int4 slot,int4 sz) - -{ - if (op->code() == CPUI_PTRADD) { - if (slot != 0) return 2; - Varnode *constvn = op->getIn(1); - uintb mult = op->getIn(2)->getOffset(); - if (constvn->isConstant()) { - off = (constvn->getOffset() * mult) & calc_mask(constvn->getSize()) ; - return (off == 0) ? 0 : 1; - } - if (sz != 0 && (mult % sz) != 0) - return 2; - return 3; - } - if (op->code() == CPUI_PTRSUB) { - if (slot != 0) return 2; - off = op->getIn(1)->getOffset(); - return (off == 0) ? 0 : 1; - } - if (op->code() == CPUI_INT_ADD) { - Varnode *othervn = op->getIn(1-slot); - // Check if othervn is an offset - if (!othervn->isConstant()) { - if (othervn->isWritten()) { - PcodeOp *multop = othervn->getDef(); - if (multop->code() == CPUI_INT_MULT) { - Varnode *constvn = multop->getIn(1); - if (constvn->isConstant()) { - uintb mult = constvn->getOffset(); - if (mult == calc_mask(constvn->getSize())) // If multiplying by -1 - return 2; // Assume this is a pointer difference and don't propagate - if (sz != 0 && (mult % sz) !=0) - return 2; - } - return 3; - } - } - if (sz == 1) - return 3; - return 2; - } - if (othervn->getTempType()->getMetatype() == TYPE_PTR) // Check if othervn marked as ptr - return 2; - off = othervn->getOffset(); - return (off == 0) ? 0 : 1; - } - return 2; -} - -/// \brief Propagate a pointer data-type through an ADD operation. -/// -/// Assuming a pointer data-type from an ADD PcodeOp propagates from an input to -/// its output, calculate the transformed data-type of the output Varnode, which -/// will depend on details of the operation. If the edge doesn't make sense as -/// "an ADD to a pointer", prevent the propagation by returning the output Varnode's -/// current data-type. -/// \param typegrp is the TypeFactory for constructing the transformed Datatype -/// \param op is the ADD operation -/// \param inslot is the edge to propagate along -/// \return the transformed Datatype or the original output Datatype -Datatype *ActionInferTypes::propagateAddIn2Out(TypeFactory *typegrp,PcodeOp *op,int4 inslot) - -{ - TypePointer *pointer = (TypePointer *)op->getIn(inslot)->getTempType(); // We know this is a pointer type - uintb uoffset; - int4 command = propagateAddPointer(uoffset,op,inslot,pointer->getPtrTo()->getSize()); - if (command == 2) return op->getOut()->getTempType(); // Doesn't look like a good pointer add - TypePointer *parent = (TypePointer *)0; - uintb parentOff; - if (command != 3) { - uoffset = AddrSpace::addressToByte(uoffset,pointer->getWordSize()); - bool allowWrap = (op->code() != CPUI_PTRSUB); - do { - pointer = pointer->downChain(uoffset,parent,parentOff,allowWrap,*typegrp); - if (pointer == (TypePointer *)0) - break; - } while(uoffset != 0); - } - if (parent != (TypePointer *)0) { - // If the innermost containing object is a TYPE_STRUCT or TYPE_ARRAY - // preserve info about this container - Datatype *pt; - if (pointer == (TypePointer *)0) - pt = typegrp->getBase(1,TYPE_UNKNOWN); // Offset does not point at a proper sub-type - else - pt = pointer->getPtrTo(); // The sub-type being directly pointed at - pointer = typegrp->getTypePointerRel(parent, pt, parentOff); - } - if (pointer == (TypePointer *)0) { - if (command == 0) - return op->getIn(inslot)->getTempType(); - return op->getOut()->getTempType(); - } - if (op->getIn(inslot)->isSpacebase()) { - if (pointer->getPtrTo()->getMetatype() == TYPE_SPACEBASE) - pointer = typegrp->getTypePointer(pointer->getSize(),typegrp->getBase(1,TYPE_UNKNOWN),pointer->getWordSize()); - } - return pointer; -} - -/// \brief Determine if propagation should happen along the given edge -/// -/// This enforces a series of rules about how a data-type can propagate -/// between the input and output Varnodes of a single PcodeOp. An input to the -/// edge may either an input or output to the PcodeOp. A \e slot value of -1 -/// indicates the PcodeOp output, a non-negative value indicates a PcodeOp input index. -/// \param op is the PcodeOp to test propagation through -/// \param inslot indicates the edge's input Varnode -/// \param outslot indicates the edge's output Varnode -/// \param invn is the input Varnode -/// \return \b false if edge cannot propagate type -bool ActionInferTypes::propagateGoodEdge(PcodeOp *op,int4 inslot,int4 outslot,Varnode *invn) - -{ - if (inslot == outslot) return false; // don't backtrack - type_metatype metain = invn->getTempType()->getMetatype(); - switch(op->code()) { - case CPUI_NEW: - if ((inslot != 0)||(outslot != -1)) return false; - break; - case CPUI_INDIRECT: - if (op->isIndirectCreation()) return false; - if ((inslot==1)||(outslot==1)) return false; - if ((inslot!=-1)&&(outslot!=-1)) return false; // Must propagate input <-> output - break; - case CPUI_COPY: - if ((inslot!=-1)&&(outslot!=-1)) return false; // Must propagate input <-> output - break; - case CPUI_MULTIEQUAL: - if ((inslot!=-1)&&(outslot!=-1)) return false; // Must propagate input <-> output - break; - case CPUI_INT_SLESS: - case CPUI_INT_SLESSEQUAL: - case CPUI_INT_LESS: - case CPUI_INT_LESSEQUAL: - if ((inslot==-1)||(outslot==-1)) return false; // Must propagate input <-> input - break; - case CPUI_INT_EQUAL: - case CPUI_INT_NOTEQUAL: - if ((inslot==-1)||(outslot==-1)) return false; // Must propagate input <-> input - break; - case CPUI_LOAD: - case CPUI_STORE: - if ((inslot==0)||(outslot==0)) return false; // Don't propagate along this edge - if (invn->isSpacebase()) return false; - break; - case CPUI_PTRADD: - if ((inslot==2)||(outslot==2)) return false; // Don't propagate along this edge - case CPUI_PTRSUB: - if ((inslot!=-1)&&(outslot!=-1)) return false; // Must propagate input <-> output - if (metain != TYPE_PTR) return false; - break; - case CPUI_INT_ADD: - if (metain != TYPE_PTR) { - if ((metain == TYPE_INT)||(metain == TYPE_UINT)) { - if ((outslot==1) && (op->getIn(1)->isConstant())) - return true; - } - return false; - } - if ((inslot!=-1)&&(outslot!=-1)) return false; // Must propagate input <-> output - break; - case CPUI_SEGMENTOP: - // Must propagate slot2 <-> output - if ((inslot==0)||(inslot==1)) return false; - if ((outslot==0)||(outslot==1)) return false; - if (invn->isSpacebase()) return false; - if (metain != TYPE_PTR) return false; - break; - case CPUI_INT_AND: - case CPUI_INT_OR: - case CPUI_INT_XOR: - if (!invn->getTempType()->isPowerOfTwo()) return false; // Only propagate flag enums - break; - default: - return false; - } - return true; -} - /// \brief Attempt to propagate a data-type across a single PcodeOp edge /// /// Given an \e input Varnode and an \e output Varnode defining a directed edge @@ -4483,126 +4428,31 @@ bool ActionInferTypes::propagateTypeEdge(TypeFactory *typegrp,PcodeOp *op,int4 i { Varnode *invn,*outvn; - Datatype *newtype; + if (inslot == outslot) return false; // don't backtrack if (outslot < 0) outvn = op->getOut(); else { outvn = op->getIn(outslot); - if (outvn->stopsUpPropagation()) - return false; + if (outvn->isAnnotation()) return false; } - if (outvn->isAnnotation()) return false; - if (outvn->isTypeLock()) return false; // Can't propagate through typelock invn = (inslot==-1) ? op->getOut() : op->getIn(inslot); - if (!propagateGoodEdge(op,inslot,outslot,invn)) - return false; - Datatype *alttype = invn->getTempType(); + if (alttype->needsResolution()) { + alttype = alttype->resolveInFlow(op, inslot); + } + if (outvn->isTypeLock()) return false; // Can't propagate through typelock + if (outvn->stopsUpPropagation() && outslot >= 0) return false; // Propagation is blocked + if (alttype->getMetatype() == TYPE_BOOL) { // Only propagate boolean if (outvn->getNZMask() > 1) // If we know output can only take boolean values return false; } - switch(op->code()) { - case CPUI_INT_LESS: - case CPUI_INT_LESSEQUAL: - case CPUI_INT_EQUAL: - case CPUI_INT_NOTEQUAL: - if (invn->isSpacebase()) { - AddrSpace *spc = typegrp->getArch()->getDefaultDataSpace(); - newtype = typegrp->getTypePointer(alttype->getSize(),typegrp->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); - } - else if (alttype->isPointerRel() && !outvn->isConstant()) { - TypePointerRel *relPtr = (TypePointerRel *)alttype; - if (relPtr->getParent()->getMetatype() == TYPE_STRUCT && relPtr->getPointerOffset() >= 0) { - // If we know the pointer is in the middle of a structure, don't propagate across comparison operators - // as the two sides of the operator are likely to be different types , and the other side can also - // get data-type information from the structure pointer - newtype = typegrp->getTypePointer(relPtr->getSize(),typegrp->getBase(1,TYPE_UNKNOWN),relPtr->getWordSize()); - } - else - newtype = alttype; - } - else - newtype = alttype; - break; - case CPUI_INDIRECT: - case CPUI_COPY: - case CPUI_MULTIEQUAL: - case CPUI_INT_AND: - case CPUI_INT_OR: - case CPUI_INT_XOR: - if (invn->isSpacebase()) { - AddrSpace *spc = typegrp->getArch()->getDefaultDataSpace(); - newtype = typegrp->getTypePointer(alttype->getSize(),typegrp->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); - } - else - newtype = alttype; - break; - case CPUI_INT_SLESS: - case CPUI_INT_SLESSEQUAL: - if (alttype->getMetatype() != TYPE_INT) return false; // Only propagate signed things - newtype = alttype; - break; - case CPUI_NEW: - { - Varnode *vn0 = op->getIn(0); - if (!vn0->isWritten()) return false; // Don't propagate - if (vn0->getDef()->code() != CPUI_CPOOLREF) return false; - newtype = alttype; // Propagate cpool result as result of new operator - } - break; - case CPUI_SEGMENTOP: - { - AddrSpace *spc = typegrp->getArch()->getDefaultDataSpace(); - Datatype *btype = ((TypePointer *)alttype)->getPtrTo(); - newtype = typegrp->getTypePointer(outvn->getSize(),btype,spc->getWordSize()); - } - break; - case CPUI_LOAD: - if (inslot == -1) { // Propagating output to input (value to ptr) - AddrSpace *spc = Address::getSpaceFromConst(op->getIn(0)->getAddr()); - newtype = typegrp->getTypePointerNoDepth(outvn->getTempType()->getSize(),alttype,spc->getWordSize()); - } - else if (alttype->getMetatype()==TYPE_PTR) { - newtype = ((TypePointer *)alttype)->getPtrTo(); - if (newtype->getSize() != outvn->getTempType()->getSize() || newtype->isVariableLength()) // Size must be appropriate - newtype = outvn->getTempType(); - } - else - newtype = outvn->getTempType(); // Don't propagate anything - break; - case CPUI_STORE: - if (inslot==2) { // Propagating value to ptr - AddrSpace *spc = Address::getSpaceFromConst(op->getIn(0)->getAddr()); - newtype = typegrp->getTypePointerNoDepth(outvn->getTempType()->getSize(),alttype,spc->getWordSize()); - } - else if (alttype->getMetatype()==TYPE_PTR) { - newtype = ((TypePointer *)alttype)->getPtrTo(); - if (newtype->getSize() != outvn->getTempType()->getSize() || newtype->isVariableLength()) - newtype = outvn->getTempType(); - } - else - newtype = outvn->getTempType(); // Don't propagate anything - break; - case CPUI_PTRADD: - case CPUI_PTRSUB: - if (inslot == -1) // Propagating output to input - newtype = op->getIn(outslot)->getTempType(); // Don't propagate pointer types this direction - else - newtype = propagateAddIn2Out(typegrp,op,inslot); - break; - case CPUI_INT_ADD: - if (outvn->isConstant() && (alttype->getMetatype() != TYPE_PTR)) - newtype = alttype; - else if (inslot == -1) // Propagating output to input - newtype = op->getIn(outslot)->getTempType(); // Don't propagate pointer types this direction - else - newtype = propagateAddIn2Out(typegrp,op,inslot); - break; - default: - return false; // Don't propagate along this edge - } + + Datatype *newtype = op->getOpcode()->propagateType(alttype, op, invn, outvn, inslot, outslot); + if (newtype == (Datatype *)0) + return false; + if (0>newtype->typeOrder(*outvn->getTempType())) { #ifdef TYPEPROP_DEBUG propagationDebug(typegrp->getArch(),outvn,newtype,op,inslot,(Varnode *)0); @@ -4939,11 +4789,11 @@ void TermOrder::collect(void) for(int4 i=0;inumInput();++i) { curvn = curop->getIn(i); // curvn is a node of the subtree IF if (!curvn->isWritten()) { // curvn is not defined by another operation - terms.push_back(PcodeOpEdge(curop,i,multop)); + terms.push_back(AdditiveEdge(curop,i,multop)); continue; } if (curvn->loneDescend() == (PcodeOp *)0) { // curvn has more then one use - terms.push_back(PcodeOpEdge(curop,i,multop)); + terms.push_back(AdditiveEdge(curop,i,multop)); continue; } subop = curvn->getDef(); @@ -4958,7 +4808,7 @@ void TermOrder::collect(void) } } } - terms.push_back(PcodeOpEdge(curop,i,multop)); + terms.push_back(AdditiveEdge(curop,i,multop)); continue; } opstack.push_back(subop); @@ -4970,7 +4820,7 @@ void TermOrder::collect(void) void TermOrder::sortTerms(void) { - for(vector::iterator iter=terms.begin();iter!=terms.end();++iter) + for(vector::iterator iter=terms.begin();iter!=terms.end();++iter) sorter.push_back( &(*iter) ); sort(sorter.begin(),sorter.end(),additiveCompare); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh index 6cd43c576e..047992912f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh @@ -310,10 +310,13 @@ public: /// input. In this case, it casts to the necessary pointer type /// immediately. class ActionSetCasts : public Action { - static bool testStructOffset0(Varnode *vn,Datatype *ct,CastStrategy *castStrategy); + static bool testStructOffset0(Varnode *vn,PcodeOp *op,Datatype *ct,CastStrategy *castStrategy); + static bool tryResolutionAdjustment(PcodeOp *op,int4 slot,Funcdata &data); static bool isOpIdentical(Datatype *ct1,Datatype *ct2); + static int4 resolveUnion(PcodeOp *op,int4 slot,Funcdata &data); static int4 castOutput(PcodeOp *op,Funcdata &data,CastStrategy *castStrategy); static int4 castInput(PcodeOp *op,int4 slot,Funcdata &data,CastStrategy *castStrategy); + static PcodeOp *insertPtrsubZero(PcodeOp *op,int4 slot,Datatype *ct,Funcdata &data); public: ActionSetCasts(const string &g) : Action(rule_onceperfunc,"setcasts",g) {} ///< Constructor virtual Action *clone(const ActionGroupList &grouplist) const { @@ -926,9 +929,6 @@ class ActionInferTypes : public Action { int4 localcount; ///< Number of passes performed for this function static void buildLocaltypes(Funcdata &data); ///< Assign initial data-type based on local info static bool writeBack(Funcdata &data); ///< Commit the final propagated data-types to Varnodes - static int4 propagateAddPointer(uintb &off,PcodeOp *op,int4 slot,int4 sz); ///< Test if edge is pointer plus a constant - static Datatype *propagateAddIn2Out(TypeFactory *typegrp,PcodeOp *op,int4 inslot); - static bool propagateGoodEdge(PcodeOp *op,int4 inslot,int4 outslot,Varnode *invn); static bool propagateTypeEdge(TypeFactory *typegrp,PcodeOp *op,int4 inslot,int4 outslot); static void propagateOneType(TypeFactory *typegrp,Varnode *vn); static void propagateRef(Funcdata &data,Varnode *vn,const Address &addr); @@ -1034,13 +1034,13 @@ public: }; /// Class representing a \e term in an additive expression -class PcodeOpEdge { +class AdditiveEdge { PcodeOp *op; ///< Lone descendant reading the term int4 slot; ///< The input slot of the term Varnode *vn; ///< The term Varnode PcodeOp *mult; ///< The (optional) multiplier being applied to the term public: - PcodeOpEdge(PcodeOp *o,int4 s,PcodeOp *m) { op = o; slot = s; vn = op->getIn(slot); mult=m; } ///< Constructor + AdditiveEdge(PcodeOp *o,int4 s,PcodeOp *m) { op = o; slot = s; vn = op->getIn(slot); mult=m; } ///< Constructor PcodeOp *getMultiplier(void) const { return mult; } ///< Get the multiplier PcodeOp PcodeOp *getOp(void) const { return op; } ///< Get the component PcodeOp adding in the term int4 getSlot(void) const { return slot; } ///< Get the slot reading the term @@ -1054,15 +1054,15 @@ public: /// sorting of the terms to facilitate constant collapse and factoring simplifications. class TermOrder { PcodeOp *root; ///< The final PcodeOp in the expression - vector terms; ///< Collected terms - vector sorter; ///< An array of references to terms for quick sorting - static bool additiveCompare(const PcodeOpEdge *op1,const PcodeOpEdge *op2); + vector terms; ///< Collected terms + vector sorter; ///< An array of references to terms for quick sorting + static bool additiveCompare(const AdditiveEdge *op1,const AdditiveEdge *op2); public: TermOrder(PcodeOp *rt) { root = rt; } ///< Construct given root PcodeOp int4 getSize(void) const { return terms.size(); } ///< Get the number of terms in the expression void collect(void); ///< Collect all the terms in the expression void sortTerms(void); ///< Sort the terms using additiveCompare() - const vector &getSort(void) { return sorter; } ///< Get the sorted list of references + const vector &getSort(void) { return sorter; } ///< Get the sorted list of references }; /// \brief A comparison operator for ordering terms in a sum @@ -1072,7 +1072,7 @@ public: /// \param op1 is the first term to compare /// \param op2 is the second term /// \return \b true if the first term is less than the second -inline bool TermOrder::additiveCompare(const PcodeOpEdge *op1,const PcodeOpEdge *op2) { +inline bool TermOrder::additiveCompare(const AdditiveEdge *op1,const AdditiveEdge *op2) { return (-1 == op1->getVarnode()->termOrder(op2->getVarnode())); } #endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc index c54843332d..498d4aef1e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc @@ -127,20 +127,18 @@ bool SymbolEntry::updateType(Varnode *vn) const Datatype *SymbolEntry::getSizedType(const Address &inaddr,int4 sz) const { - Datatype *last,*cur; uintb off; if (isDynamic()) off = offset; else off = (inaddr.getOffset() - addr.getOffset()) + offset; - cur = symbol->getType(); + Datatype *cur = symbol->getType(); do { - last = cur; + if (offset == 0 && cur->getSize() == sz) + return cur; cur = cur->getSubType(off,&off); } while(cur != (Datatype *)0); - if (last->getSize() == sz) - return last; // else { // This case occurs if the varnode is a "partial type" of some sort // This PROBABLY means the varnode shouldn't be considered addrtied @@ -400,7 +398,7 @@ void Symbol::restoreXmlHeader(const Element *el) { name.clear(); - category = -1; + category = no_category; symbolId = 0; for(int4 i=0;igetNumAttributes();++i) { const string &attName(el->getAttributeName(i)); @@ -488,7 +486,7 @@ void Symbol::restoreXmlHeader(const Element *el) break; } } - if (category == 0) { + if (category == function_parameter) { istringstream s2(el->getAttributeValue("index")); s2.unsetf(ios::dec | ios::hex | ios::oct); s2 >> catindex; @@ -649,7 +647,7 @@ EquateSymbol::EquateSymbol(Scope *sc,const string &nm,uint4 format,uintb val) : Symbol(sc, nm, (Datatype *)0) { value = val; - category = 1; + category = equate; type = sc->getArch()->types->getBase(1,TYPE_UNKNOWN); dispflags |= format; } @@ -705,6 +703,49 @@ void EquateSymbol::restoreXml(const Element *el) type = types->getBase(1,TYPE_UNKNOWN); } +/// Create a symbol that forces a particular field of a union to propagate +/// +/// \param sc is the scope owning the new symbol +/// \param nm is the name of the symbol +/// \param unionDt is the union data-type being forced +/// \param fldNum is the particular field to force (-1 indicates the whole union) +UnionFacetSymbol::UnionFacetSymbol(Scope *sc,const string &nm,Datatype *unionDt,int4 fldNum) + : Symbol(sc, nm, unionDt) +{ + fieldNum = fldNum; + category = union_facet; +} + +void UnionFacetSymbol::saveXml(ostream &s) const + +{ + s << "\n"; + saveXmlBody(s); + s << "\n"; +} + +void UnionFacetSymbol::restoreXml(const Element *el) + +{ + restoreXmlHeader(el); + istringstream s(el->getAttributeValue("field")); + s.unsetf(ios::dec | ios::hex | ios::oct); + s >> fieldNum; + const List &list(el->getChildren()); + + restoreXmlBody(list.begin()); + Datatype *testType = type; + if (testType->getMetatype() == TYPE_PTR) + testType = ((TypePointer *)testType)->getPtrTo(); + if (testType->getMetatype() != TYPE_UNION) + throw LowlevelError(" does not have a union type"); + if (fieldNum < -1 || fieldNum >= testType->numDepend()) + throw LowlevelError(" field attribute is out of bounds"); +} + /// Label symbols don't really have a data-type, so we just put /// a size 1 placeholder. void LabSymbol::buildType(void) @@ -1556,6 +1597,8 @@ Symbol *Scope::addMapSym(const Element *el) sym = new LabSymbol(owner); else if (symname == "externrefsymbol") sym = new ExternRefSymbol(owner); + else if (symname == "facetsymbol") + sym = new UnionFacetSymbol(owner); else throw LowlevelError("Unknown symbol type: "+symname); try { // Protect against duplicate scope errors @@ -1711,9 +1754,9 @@ string Scope::buildDefaultName(Symbol *sym,int4 &base,Varnode *vn) const if (!vn->isAddrTied() && fd != (Funcdata *)0) usepoint = vn->getUsePoint(*fd); HighVariable *high = vn->getHigh(); - if (sym->getCategory() == 0 || high->isInput()) { + if (sym->getCategory() == Symbol::function_parameter || high->isInput()) { int4 index = -1; - if (sym->getCategory()==0) + if (sym->getCategory()==Symbol::function_parameter) index = sym->getCategoryIndex()+1; return buildVariableName(vn->getAddr(),usepoint,sym->getType(),index,vn->getFlags() | Varnode::input); } @@ -1724,7 +1767,7 @@ string Scope::buildDefaultName(Symbol *sym,int4 &base,Varnode *vn) const Address addr = entry->getAddr(); Address usepoint = entry->getFirstUseAddress(); uint4 flags = usepoint.isInvalid() ? Varnode::addrtied : 0; - if (sym->getCategory() == 0) { // If this is a parameter + if (sym->getCategory() == Symbol::function_parameter) { flags |= Varnode::input; int4 index = sym->getCategoryIndex() + 1; return buildVariableName(addr, usepoint, sym->getType(), index, flags); @@ -1958,7 +2001,7 @@ void ScopeInternal::categorySanity(void) for(int4 j=0;jisSizeTypeLocked()) resetSizeLockType(sym); } - else if (sym->getCategory() == 1) { + else if (sym->getCategory() == Symbol::equate) { // Note we treat EquateSymbols as locked for purposes of this method // as a typelock (which traditionally prevents a symbol from being cleared) // does not make sense for an equate @@ -2579,8 +2622,11 @@ void ScopeInternal::saveXml(ostream &s) const int4 symbolType = 0; if (!sym->mapentry.empty()) { const SymbolEntry &entry( *sym->mapentry.front() ); - if (entry.isDynamic()) - symbolType = (sym->getCategory() == 1) ? 2 : 1; + if (entry.isDynamic()) { + if (sym->getCategory() == Symbol::union_facet) + continue; // Don't save override + symbolType = (sym->getCategory() == Symbol::equate) ? 2 : 1; + } } s << " EntryMap; ///< A rangemap of SymbolEntry /// At its most basic, a Symbol is a \b name and a \b data-type. /// Practically a Symbol knows what Scope its in, how it should be /// displayed, and the symbols \e category. A category is a subset -/// of symbols that are stored together for quick access. The -/// \b category field can be: -/// - -1 for no category -/// - 0 indicates a function parameter -/// - 1 indicates an equate symbol +/// of symbols that are stored together for quick access. class Symbol { friend class Scope; friend class ScopeInternal; @@ -162,7 +158,7 @@ protected: // only typelock,namelock,readonly,externref // addrtied, persist inherited from scope uint4 dispflags; ///< Flags affecting the display of this symbol - int2 category; ///< Special category (-1==none 0=parameter 1=equate) + int2 category; ///< Special category (\b function_parameter, \b equate, etc.) uint2 catindex; ///< Index within category uint8 symbolId; ///< Unique id, 0=unassigned vector::iterator> mapentry; ///< List of storage locations labeled with \b this Symbol @@ -187,6 +183,14 @@ public: is_this_ptr = 64 ///< We are the "this" symbol for a class method }; + /// \brief The possible specialize Symbol \e categories + enum { + no_category = -1, ///< Symbol is not in a special category + function_parameter = 0, ///< The Symbol is a parameter to a function + equate = 1, ///< The Symbol holds \e equate information about a constant + union_facet = 2 ///< Symbol holding read or write facing union field information + }; + Symbol(Scope *sc,const string &nm,Datatype *ct); ///< Construct given a name and data-type Symbol(Scope *sc); ///< Construct for use with restoreXml() const string &getName(void) const { return name; } ///< Get the local name of the symbol @@ -271,13 +275,23 @@ class EquateSymbol : public Symbol { uintb value; ///< Value of the constant being equated public: EquateSymbol(Scope *sc,const string &nm,uint4 format,uintb val); ///< Constructor - EquateSymbol(Scope *sc) : Symbol(sc) { value = 0; category = 1; } ///< Constructor for use with restoreXml + EquateSymbol(Scope *sc) : Symbol(sc) { value = 0; category = equate; } ///< Constructor for use with restoreXml uintb getValue(void) const { return value; } ///< Get the constant value bool isValueClose(uintb op2Value,int4 size) const; ///< Is the given value similar to \b this equate virtual void saveXml(ostream &s) const; virtual void restoreXml(const Element *el); }; +class UnionFacetSymbol : public Symbol { + int4 fieldNum; ///< Particular field to associate with Symbol access +public: + UnionFacetSymbol(Scope *sc,const string &nm,Datatype *unionDt,int4 fldNum); ///< Constructor from components + UnionFacetSymbol(Scope *sc) : Symbol(sc) { fieldNum = -1; category = union_facet; } ///< Constructor for restoreXml + int4 getFieldNumber(void) const { return fieldNum; } ///< Get the particular field associate with \b this + virtual void saveXml(ostream &s) const; + virtual void restoreXml(const Element *el); +}; + /// \brief A Symbol that labels code internal to a function class LabSymbol : public Symbol { void buildType(void); ///< Build placeholder data-type @@ -894,7 +908,7 @@ inline Symbol::Symbol(Scope *sc,const string &nm,Datatype *ct) type=ct; flags=0; dispflags=0; - category=-1; + category=no_category; catindex = 0; symbolId=0; wholeCount=0; @@ -911,7 +925,7 @@ inline Symbol::Symbol(Scope *sc) type = (Datatype *)0; flags=0; dispflags=0; - category=-1; + category=no_category; catindex = 0; symbolId = 0; wholeCount=0; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc index d503de1955..86492e63ce 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc @@ -196,6 +196,38 @@ void DynamicHash::clear(void) opedge.clear(); } +void DynamicHash::calcHash(const PcodeOp *op,int4 slot,uint4 method) + +{ + vnproc = 0; + opproc = 0; + opedgeproc = 0; + + const Varnode *root = (slot < 0) ? op->getOut() : op->getIn(slot); + opedge.push_back(ToOpEdge(op,slot)); + switch(method) { + case 4: + break; + case 5: + gatherUnmarkedOp(); + for(;opproc < markop.size();++opproc) { + buildOpUp(markop[opproc]); + } + gatherUnmarkedVn(); + break; + case 6: + gatherUnmarkedOp(); + for(;opproc < markop.size();++opproc) { + buildOpDown(markop[opproc]); + } + gatherUnmarkedVn(); + break; + default: + break; + } + pieceTogetherHash(root,method); +} + /// A sub-graph is formed extending from the given Varnode as the root. The /// method specifies how the sub-graph is extended. In particular: /// - Method 0 is extends to just immediate p-code ops reading or writing root @@ -254,7 +286,17 @@ void DynamicHash::calcHash(const Varnode *root,uint4 method) default: break; } - + pieceTogetherHash(root,method); +} + +/// Assume all the elements of the hash have been calculated. Calculate the internal 32-bit hash +/// based on these elements. Construct the 64-bit hash by piecing together the 32-bit hash +/// together with the core opcode, slot, and method. +/// \param root is the Varnode to extract root characteristics from +/// \param method is the method used to compute the hash elements +void DynamicHash::pieceTogetherHash(const Varnode *root,uint4 method) + +{ for(uint4 i=0;iclearMark(); for(uint4 i=0;i oplist; + vector oplist2; + vector champion; + uint4 method; + uint8 tmphash; + Address tmpaddr; + uint4 maxduplicates = 8; + + gatherOpsAtAddress(oplist,fd,op->getAddr()); + for(method=4;method<7;++method) { + clear(); + calcHash(op,slot,method); + if (hash == 0) return; // Can't get a good hash + tmphash = hash; + tmpaddr = addrresult; + oplist.clear(); + oplist2.clear(); + for(uint4 i=0;i= tmpop->numInput()) continue; + clear(); + calcHash(tmpop,slot,method); + if (hash == tmphash) { // Hash collision + oplist2.push_back(tmpop); + if (oplist2.size()>maxduplicates) + break; + } + } + if (oplist2.size() <= maxduplicates) { + if ((champion.size()==0)||(oplist2.size() < champion.size())) { + champion = oplist2; + if (champion.size()==1) + break; // Current hash is unique + } + } + } + if (champion.empty()) { + hash = (uint8)0; + addrresult = Address(); // Couldn't find a unique hash + return; + } + uint4 total = (uint4)champion.size() - 1; // total is in range [0,maxduplicates-1] + uint4 pos; + for(pos=0;pos<=total;++pos) + if (champion[pos] == op) + break; + if (pos > total) { + hash = (uint8)0; + addrresult = Address(); + return; + } + hash = tmphash | ((uint8)pos << 49); // Store three bits for position with list of duplicate hashes + hash |= ((uint8)total << 52); // Store three bits for total number of duplicate hashes + addrresult = tmpaddr; +} + /// \brief Given an address and hash, find the unique matching Varnode /// /// The method, number of collisions, and position are pulled out of the hash. @@ -414,6 +515,41 @@ Varnode *DynamicHash::findVarnode(const Funcdata *fd,const Address &addr,uint8 h return vnlist2[pos]; } +/// \brief Given an address and hash, find the unique matching PcodeOp +/// +/// The method, slot, number of collisions, and position are pulled out of the hash. +/// Hashes for the method are performed at PcodeOps linked to the given address, +/// and the PcodeOp which matches the hash (and the position) is returned. +/// If the number of collisions for the hash does not match, this method +/// will not return a PcodeOp, even if the position looks valid. +/// \param fd is the function containing the data-flow +/// \param addr is the given address +/// \param h is the hash +/// \return the matching PcodeOp or NULL +PcodeOp *DynamicHash::findOp(const Funcdata *fd,const Address &addr,uint8 h) + +{ + int method = getMethodFromHash(h); + int slot = getSlotFromHash(h); + int total = getTotalFromHash(h); + int pos = getPositionFromHash(h); + clearTotalPosition(h); + vector oplist; + vector oplist2; + gatherOpsAtAddress(oplist,fd,addr); + for(uint4 i=0;i= tmpop->numInput()) continue; + clear(); + calcHash(tmpop,slot,method); + if (hash == h) + oplist2.push_back(tmpop); + } + if (total != oplist2.size()) + return (PcodeOp *)0; + return oplist2[pos]; +} + /// \brief Get the Varnodes immediately attached to PcodeOps at the given address /// /// Varnodes can be either inputs or outputs to the PcodeOps. The op-code, slot, and @@ -463,6 +599,22 @@ void DynamicHash::gatherFirstLevelVars(vector &varlist,const Funcdata } } +/// \brief Place all PcodeOps at the given address in the provided container +/// +/// \param opList is the container to hold the PcodeOps +/// \param fd is the function +/// \param addr is the given address +void DynamicHash::gatherOpsAtAddress(vector &opList,const Funcdata *fd,const Address &addr) + +{ + PcodeOpTree::const_iterator iter,enditer; + enditer = fd->endOp(addr); + for(iter = fd->beginOp(addr); iter != enditer; ++iter) { + PcodeOp *op = (*iter).second; + opList.push_back(op); + } +} + /// The hash encodes the input \e slot the root Varnode was attached to in its PcodeOp. /// \param h is the hash value /// \return the slot index or -1 if the Varnode was attached as output diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.hh index d84d0e017f..2607b603c4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.hh @@ -75,15 +75,20 @@ class DynamicHash { void buildOpDown(const PcodeOp *op); ///< Move the output Varnode for the given PcodeOp into staging void gatherUnmarkedVn(void); ///< Move staged Varnodes into the sub-graph and mark them void gatherUnmarkedOp(void); ///< Mark any new PcodeOps in the sub-graph + void pieceTogetherHash(const Varnode *root,uint4 method); ///< Clean-up and piece together formal hash value public: void clear(void); ///< Called for each additional hash (after the first) void calcHash(const Varnode *root,uint4 method); ///< Calculate the hash for given Varnode and method + void calcHash(const PcodeOp *op,int4 slot,uint4 method); ///< Calculate hash for given PcodeOp, slot, and method void uniqueHash(const Varnode *root,Funcdata *fd); ///< Select a unique hash for the given Varnode + void uniqueHash(const PcodeOp *op,int4 slot,Funcdata *fd); ///< Select unique hash for given PcodeOp and slot Varnode *findVarnode(const Funcdata *fd,const Address &addr,uint8 h); + PcodeOp *findOp(const Funcdata *fd,const Address &addr,uint8 h); uint8 getHash(void) const { return hash; } ///< Get the (current) hash const Address &getAddress(void) const { return addrresult; } ///< Get the (current) address static void gatherFirstLevelVars(vector &varlist,const Funcdata *fd,const Address &addr,uint8 h); + static void gatherOpsAtAddress(vector &opList,const Funcdata *fd,const Address &addr); static int4 getSlotFromHash(uint8 h); ///< Retrieve the encoded slot from a hash static uint4 getMethodFromHash(uint8 h); ///< Retrieve the encoded method from a hash static OpCode getOpCodeFromHash(uint8 h); ///< Retrieve the encoded op-code from a hash diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc index 5756f1e590..31d14c9a2a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc @@ -2700,7 +2700,7 @@ ProtoParameter *ProtoStoreSymbol::setInput(int4 i, const string &nm,const Parame { ParameterSymbol *res = getSymbolBacked(i); - res->sym = scope->getCategorySymbol(0,i); + res->sym = scope->getCategorySymbol(Symbol::function_parameter,i); SymbolEntry *entry; Address usepoint; @@ -2717,7 +2717,7 @@ ProtoParameter *ProtoStoreSymbol::setInput(int4 i, const string &nm,const Parame if (scope->discoverScope(pieces.addr,pieces.type->getSize(),usepoint) == (Scope *)0) usepoint = restricted_usepoint; res->sym = scope->addSymbol(nm,pieces.type,pieces.addr,usepoint)->getSymbol(); - scope->setCategory(res->sym,0,i); + scope->setCategory(res->sym,Symbol::function_parameter,i); if (isindirect || ishidden) { uint4 mirror = 0; if (isindirect) @@ -2750,17 +2750,17 @@ ProtoParameter *ProtoStoreSymbol::setInput(int4 i, const string &nm,const Parame void ProtoStoreSymbol::clearInput(int4 i) { - Symbol *sym = scope->getCategorySymbol(0,i); + Symbol *sym = scope->getCategorySymbol(Symbol::function_parameter,i); if (sym != (Symbol *)0) { - scope->setCategory(sym,-1,0); // Remove it from category list + scope->setCategory(sym,Symbol::no_category,0); // Remove it from category list scope->removeSymbol(sym); // Remove it altogether } // Renumber any category 0 symbol with index greater than i - int4 sz = scope->getCategorySize(0); + int4 sz = scope->getCategorySize(Symbol::function_parameter); for(int4 j=i+1;jgetCategorySymbol(0,j); + sym = scope->getCategorySymbol(Symbol::function_parameter,j); if (sym != (Symbol *)0) - scope->setCategory(sym,0,j-1); + scope->setCategory(sym,Symbol::function_parameter,j-1); } } @@ -2773,13 +2773,13 @@ void ProtoStoreSymbol::clearAllInputs(void) int4 ProtoStoreSymbol::getNumInputs(void) const { - return scope->getCategorySize(0); + return scope->getCategorySize(Symbol::function_parameter); } ProtoParameter *ProtoStoreSymbol::getInput(int4 i) { - Symbol *sym = scope->getCategorySymbol(0,i); + Symbol *sym = scope->getCategorySymbol(Symbol::function_parameter,i); if (sym == (Symbol *)0) return (ProtoParameter *)0; ParameterSymbol *res = getSymbolBacked(i); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc index e5c361eaff..fd2f814f68 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc @@ -85,6 +85,7 @@ void Funcdata::clear(void) clearActiveOutput(); funcp.clearUnlockedOutput(); // Inputs are cleared by localmap + unionMap.clear(); clearBlocks(); obank.clear(); vbank.clear(); @@ -852,6 +853,105 @@ void PcodeEmitFd::dump(const Address &addr,OpCode opc,VarnodeData *outvar,Varnod } } +/// \brief Get the resolved union field associated with the given edge +/// +/// If there is no field associated with the edge, null is returned +/// \param parent is the data-type being resolved +/// \param op is the PcodeOp component of the given edge +/// \param slot is the slot component of the given edge +/// \return the associated field as a ResolvedUnion or null +const ResolvedUnion *Funcdata::getUnionField(const Datatype *parent,const PcodeOp *op,int4 slot) const + +{ + map::const_iterator iter; + ResolveEdge edge(parent,op,slot); + iter = unionMap.find(edge); + if (iter != unionMap.end()) + return &(*iter).second; + return (const ResolvedUnion *)0; +} + +/// \brief Associate a union field with the given edge +/// +/// If there was a previous association, it is overwritten unless it was \e locked. +/// The method returns \b true except in this case where a previous locked association exists. +/// \param parent is the parent union data-type +/// \param op is the PcodeOp component of the given edge +/// \param slot is the slot component of the given edge +/// \param resolve is the resolved union +/// \return \b true unless there was a locked association +bool Funcdata::setUnionField(const Datatype *parent,const PcodeOp *op,int4 slot,const ResolvedUnion &resolve) + +{ + ResolveEdge edge(parent,op,slot); + pair::iterator,bool> res; + res = unionMap.emplace(edge,resolve); + if (!res.second) { + if ((*res.first).second.isLocked()) { + return false; + } + (*res.first).second = resolve; + } + return true; +} + +/// \brief Force a specific union field resolution for the given edge +/// +/// The \b parent data-type is taken directly from the given Varnode. +/// \param parent is the parent data-type +/// \param fieldNum is the index of the field to force +/// \param op is PcodeOp of the edge +/// \param slot is -1 for the write edge or >=0 indicating the particular read edge +void Funcdata::forceFacingType(Datatype *parent,int4 fieldNum,PcodeOp *op,int4 slot) + +{ + Datatype *baseType = parent; + if (baseType->getMetatype() == TYPE_PTR) + baseType = ((TypePointer *)baseType)->getPtrTo(); + if (parent->isPointerRel()) { + // Don't associate a relative pointer with the resolution, but convert to a standard pointer + parent = glb->types->getTypePointer(parent->getSize(), baseType, ((TypePointer *)parent)->getWordSize()); + } + ResolvedUnion resolve(parent,fieldNum,*glb->types); + setUnionField(parent, op, slot, resolve); +} + +/// \brief Copy a Varnode's read facing resolve to another PcodeOp +/// +/// \param op is the new PcodeOp reading the Varnode +/// \param slot is the new read slot +/// \param oldOp is the PcodeOp to inherit the resolve from +/// \param oldSlot is the old read slot +void Funcdata::inheritReadResolution(const PcodeOp *op,int4 slot,PcodeOp *oldOp,int4 oldSlot) + +{ + Datatype *ct = op->getIn(slot)->getType(); + if (!ct->needsResolution()) return; + map::const_iterator iter; + ResolveEdge edge(ct,oldOp,oldSlot); + iter = unionMap.find(edge); + if (iter == unionMap.end()) return; + setUnionField(ct,op,slot,(*iter).second); +} + +/// \brief Copy any write facing for a specific data-type from one PcodeOp to another +/// +/// \param parent is the data-type that needs resolution +/// \param op is the destination PcodeOp +/// \param oldOp is the source PcodeOp +/// \return the resolution index that was copied or -1 if there was no resolution +int4 Funcdata::inheritWriteResolution(Datatype *parent,const PcodeOp *op,PcodeOp *oldOp) + +{ + map::const_iterator iter; + ResolveEdge edge(parent,oldOp,-1); + iter = unionMap.find(edge); + if (iter == unionMap.end()) + return -1; + setUnionField(parent,op,-1,(*iter).second); + return (*iter).second.getFieldNum(); +} + #ifdef OPACTION_DEBUG /// The current state of the op is recorded for later comparison after diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index 0df1dd9195..7e4af8b173 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -24,6 +24,7 @@ #include "heritage.hh" #include "merge.hh" #include "dynamic.hh" +#include "unionresolve.hh" class FlowInfo; @@ -88,6 +89,7 @@ class Funcdata { ParamActive *activeoutput; ///< Data for assessing which parameters are passed to \b this function Override localoverride; ///< Overrides of data-flow, prototypes, etc. that are local to \b this function map lanedMap; ///< Current storage locations which may be laned registers + map unionMap; ///< A map from data-flow edges to the resolved field of TypeUnion being accessed // Low level Varnode functions void setVarnodeProperties(Varnode *vn) const; ///< Look-up boolean properties and data-type information @@ -100,6 +102,7 @@ class Funcdata { Varnode *cloneVarnode(const Varnode *vn); ///< Clone a Varnode (between copies of the function) void destroyVarnode(Varnode *vn); ///< Delete the given Varnode from \b this function void coverVarnodes(SymbolEntry *entry,vector &list); + bool applyUnionFacet(SymbolEntry *entry,DynamicHash &dhash); // Low level op functions void opZeroMulti(PcodeOp *op); ///< Transform trivial CPUI_MULTIEQUAL to CPUI_COPY // Low level block functions @@ -373,6 +376,7 @@ public: bool onlyOpUse(const Varnode *invn,const PcodeOp *opmatch,const ParamTrial &trial,uint4 mainFlags) const; bool ancestorOpUse(int4 maxlevel,const Varnode *invn,const PcodeOp *op,ParamTrial &trial,uint4 mainFlags) const; bool syncVarnodesWithSymbols(const ScopeLocal *lm,bool typesyes); + Datatype *checkSymbolType(Varnode *vn); ///< Check for any delayed symbol data-type information on the given Varnode void transferVarnodeProperties(Varnode *vn,Varnode *newVn,int4 lsbOffset); bool fillinReadOnly(Varnode *vn); ///< Replace the given Varnode with its (constant) value in the load image bool replaceVolatile(Varnode *vn); ///< Replace accesses of the given Varnode with \e volatile operations @@ -489,6 +493,12 @@ public: bool moveRespectingCover(PcodeOp *op,PcodeOp *lastOp); ///< Move given op past \e lastOp respecting covers if possible + const ResolvedUnion *getUnionField(const Datatype *parent,const PcodeOp *op,int4 slot) const; + bool setUnionField(const Datatype *parent,const PcodeOp *op,int4 slot,const ResolvedUnion &resolve); + void forceFacingType(Datatype *parent,int4 fieldNum,PcodeOp *op,int4 slot); + void inheritReadResolution(const PcodeOp *op,int4 slot,PcodeOp *oldOp,int4 oldSlot); + int4 inheritWriteResolution(Datatype *parent,const PcodeOp *op,PcodeOp *oldOp); + // Jumptable routines JumpTable *linkJumpTable(PcodeOp *op); ///< Link jump-table with a given BRANCHIND JumpTable *findJumpTable(const PcodeOp *op) const; ///< Find a jump-table associated with a given BRANCHIND diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc index 9b40fb1f24..d546e58b67 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc @@ -544,7 +544,7 @@ void Funcdata::opUndoPtradd(PcodeOp *op,bool finalize) newVal &= calc_mask(offVn->getSize()); Varnode *newOffVn = newConstant(offVn->getSize(), newVal); if (finalize) - newOffVn->updateType(offVn->getType(), false, false); + newOffVn->updateType(offVn->getTypeReadFacing(op), false, false); opSetInput(op,newOffVn,1); return; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index dad2994284..41fe8fed5e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -828,13 +828,8 @@ bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool typesyes) fl = entry->getAllFlags(); if (entry->getSize() >= vnexemplar->getSize()) { if (typesyes) { - uintb off = (vnexemplar->getOffset() - entry->getAddr().getOffset()) + entry->getOffset(); - Datatype *cur = entry->getSymbol()->getType(); - do { - ct = cur; - cur = cur->getSubType(off,&off); - } while(cur != (Datatype *)0); - if ((ct->getSize() != vnexemplar->getSize())||(ct->getMetatype() == TYPE_UNKNOWN)) + ct = entry->getSizedType(vnexemplar->getAddr(), vnexemplar->getSize()); + if (ct != (Datatype *)0 && ct->getMetatype() == TYPE_UNKNOWN) ct = (Datatype *)0; } } @@ -863,6 +858,52 @@ bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool typesyes) return updateoccurred; } +/// If the Varnode is a partial Symbol with \e union data-type, the best description of the Varnode's +/// data-type is delayed until data-type propagation is started. +/// We attempt to resolve this description and also lay down any facing resolutions for the Varnode +/// \param vn is the given Varnode +/// \return the best data-type or null +Datatype *Funcdata::checkSymbolType(Varnode *vn) + +{ + if (vn->isTypeLock()) return vn->getType(); + SymbolEntry *entry = vn->getSymbolEntry(); + Symbol *sym = entry->getSymbol(); + if (sym->getType()->getMetatype() != TYPE_UNION) + return (Datatype *)0; + TypeUnion *unionType = (TypeUnion *)sym->getType(); + int4 off = (int4)(vn->getOffset() - entry->getAddr().getOffset()) + entry->getOffset(); + if (off == 0 && unionType->getSize() == vn->getSize()) + return (Datatype *)0; + const TypeField *finalField = (const TypeField *)0; + uintb finalOff = 0; + list::const_iterator iter; + for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { + PcodeOp *op = *iter; + const TypeField *field = unionType->resolveTruncation(off, op, op->getSlot(vn),off); + if (field != (const TypeField *)0) { + finalField = field; + finalOff = off; + } + } + if (vn->isWritten()) { + const TypeField *field = unionType->resolveTruncation(off, vn->getDef(), -1, off); + if (field != (const TypeField *)0) { + finalField = field; + finalOff = off; + } + } + if (finalField != (const TypeField *)0) { // If any use of the Varnode resolves to a specific field + // Try to truncate down to a final data-type to assign to the Varnode + Datatype *ct = finalField->type; + while(ct != (Datatype *)0 && (finalOff != 0 || ct->getSize() != vn->getSize())) { + ct = ct->getSubType(finalOff, &finalOff); + } + return ct; + } + return (Datatype *)0; +} + /// A Varnode overlaps the given SymbolEntry. Make sure the Varnode is part of the variable /// underlying the Symbol. If not, remap things so that the Varnode maps to a distinct Symbol. /// In either case, attach the appropriate Symbol to the Varnode @@ -1175,10 +1216,14 @@ bool Funcdata::attemptDynamicMapping(SymbolEntry *entry,DynamicHash &dhash) if (sym->getScope() != localmap) throw LowlevelError("Cannot currently have a dynamic symbol outside the local scope"); dhash.clear(); + int4 category = sym->getCategory(); + if (category == Symbol::union_facet) { + return applyUnionFacet(entry, dhash); + } Varnode *vn = dhash.findVarnode(this,entry->getFirstUseAddress(),entry->getHash()); if (vn == (Varnode *)0) return false; - if (entry->getSymbol()->getCategory() == 1) { // Is this an equate symbol - if (vn->mapentry != entry) { // Check we haven't marked this before + if (category == Symbol::equate) { // Is this an equate symbol + if (vn->mapentry != entry) { // Check we haven't marked this before vn->setSymbolEntry(entry); return true; } @@ -1202,12 +1247,15 @@ bool Funcdata::attemptDynamicMappingLate(SymbolEntry *entry,DynamicHash &dhash) { dhash.clear(); + Symbol *sym = entry->getSymbol(); + if (sym->getCategory() == Symbol::union_facet) { + return applyUnionFacet(entry, dhash); + } Varnode *vn = dhash.findVarnode(this,entry->getFirstUseAddress(),entry->getHash()); if (vn == (Varnode *)0) return false; if (vn->getSymbolEntry() == entry) return false; // Already applied it - Symbol *sym = entry->getSymbol(); - if (sym->getCategory() == 1) { // Equate symbol does not depend on size + if (sym->getCategory() == Symbol::equate) { // Equate symbol does not depend on size vn->setSymbolEntry(entry); return true; } @@ -1406,6 +1454,28 @@ void Funcdata::coverVarnodes(SymbolEntry *entry,vector &list) } } +/// \brief Cache information from a UnionFacetSymbol +/// +/// The symbol forces a particular union field resolution for the associated PcodeOp and slot, +/// which are extracted from the given \e dynamic SymbolEntry. The resolution is cached +/// in the \b unionMap so that it will get picked up by resolveInFlow() methods etc. +/// \param entry is the given SymbolEntry +/// \param dhash is preallocated storage for calculating the dynamic hash +/// \return \b true if the UnionFacetSymbol is successfully cached +bool Funcdata::applyUnionFacet(SymbolEntry *entry,DynamicHash &dhash) + +{ + Symbol *sym = entry->getSymbol(); + PcodeOp *op = dhash.findOp(this, entry->getFirstUseAddress(), entry->getHash()); + if (op == (PcodeOp *)0) + return false; + int4 slot = DynamicHash::getSlotFromHash(entry->getHash()); + int4 fldNum = ((UnionFacetSymbol *)sym)->getFieldNumber(); + ResolvedUnion resolve(sym->getType(), fldNum, *glb->types); + resolve.setLock(true); + return setUnionField(sym->getType(),op,slot,resolve); +} + /// Search for \e addrtied Varnodes whose storage falls in the global Scope, then /// build a new global Symbol if one didn't exist before. void Funcdata::mapGlobals(void) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc index d726945484..08f6e9429c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc @@ -2814,7 +2814,7 @@ Datatype *CParse::newStruct(const string &ident,vector *declis { // Build a new structure TypeStruct *res = glb->types->getTypeStruct(ident); // Create stub (for recursion) vector sublist; - + for(uint4 i=0;isize();++i) { TypeDeclarator *decl = (*declist)[i]; if (!decl->isValid()) { @@ -2822,12 +2822,10 @@ Datatype *CParse::newStruct(const string &ident,vector *declis glb->types->destroyType(res); return (Datatype *)0; } - sublist.push_back(TypeField()); - sublist.back().type = decl->buildType(glb); - sublist.back().name = decl->getIdentifier(); - sublist.back().offset = -1; // Let typegrp figure out offset + sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); } + TypeStruct::assignFieldOffsets(sublist,glb->types->getStructAlign()); if (!glb->types->setFields(sublist,res,-1,0)) { setError("Bad structure definition"); glb->types->destroyType(res); @@ -2848,15 +2846,34 @@ Datatype *CParse::oldStruct(const string &ident) Datatype *CParse::newUnion(const string &ident,vector *declist) { - setError("Unions are currently unsupported"); - return (Datatype *)0; + TypeUnion *res = glb->types->getTypeUnion(ident); // Create stub (for recursion) + vector sublist; + + for(uint4 i=0;isize();++i) { + TypeDeclarator *decl = (*declist)[i]; + if (!decl->isValid()) { + setError("Invalid union declarator"); + glb->types->destroyType(res); + return (Datatype *)0; + } + sublist.emplace_back(i,0,decl->getIdentifier(),decl->buildType(glb)); + } + + if (!glb->types->setFields(sublist,res,-1,0)) { + setError("Bad union definition"); + glb->types->destroyType(res); + return (Datatype *)0; + } + return res; } Datatype *CParse::oldUnion(const string &ident) { - setError("Unions are currently unsupported"); - return (Datatype *)0; + Datatype *res = glb->types->findByName(ident); + if ((res==(Datatype *)0)||(res->getMetatype() != TYPE_UNION)) + setError("Identifier does not represent a union as required"); + return res; } Enumerator *CParse::newEnumerator(const string &ident) @@ -3179,6 +3196,9 @@ void parse_C(Architecture *glb,istream &s) else if (decl->getBaseType()->getMetatype()==TYPE_STRUCT) { // We parsed a struct, treat as a typedef } + else if (decl->getBaseType()->getMetatype()==TYPE_UNION) { + // We parsed a union, treat as a typedef + } else if (decl->getBaseType()->isEnumType()) { // We parsed an enum, treat as a typedef } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y index 01f435b3a8..3de9192312 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y @@ -1029,7 +1029,7 @@ Datatype *CParse::newStruct(const string &ident,vector *declis { // Build a new structure TypeStruct *res = glb->types->getTypeStruct(ident); // Create stub (for recursion) vector sublist; - + for(uint4 i=0;isize();++i) { TypeDeclarator *decl = (*declist)[i]; if (!decl->isValid()) { @@ -1037,12 +1037,10 @@ Datatype *CParse::newStruct(const string &ident,vector *declis glb->types->destroyType(res); return (Datatype *)0; } - sublist.push_back(TypeField()); - sublist.back().type = decl->buildType(glb); - sublist.back().name = decl->getIdentifier(); - sublist.back().offset = -1; // Let typegrp figure out offset + sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); } + TypeStruct::assignFieldOffsets(sublist,glb->types->getStructAlign()); if (!glb->types->setFields(sublist,res,-1,0)) { setError("Bad structure definition"); glb->types->destroyType(res); @@ -1063,15 +1061,34 @@ Datatype *CParse::oldStruct(const string &ident) Datatype *CParse::newUnion(const string &ident,vector *declist) { - setError("Unions are currently unsupported"); - return (Datatype *)0; + TypeUnion *res = glb->types->getTypeUnion(ident); // Create stub (for recursion) + vector sublist; + + for(uint4 i=0;isize();++i) { + TypeDeclarator *decl = (*declist)[i]; + if (!decl->isValid()) { + setError("Invalid union declarator"); + glb->types->destroyType(res); + return (Datatype *)0; + } + sublist.emplace_back(i,0,decl->getIdentifier(),decl->buildType(glb)); + } + + if (!glb->types->setFields(sublist,res,-1,0)) { + setError("Bad union definition"); + glb->types->destroyType(res); + return (Datatype *)0; + } + return res; } Datatype *CParse::oldUnion(const string &ident) { - setError("Unions are currently unsupported"); - return (Datatype *)0; + Datatype *res = glb->types->findByName(ident); + if ((res==(Datatype *)0)||(res->getMetatype() != TYPE_UNION)) + setError("Identifier does not represent a union as required"); + return res; } Enumerator *CParse::newEnumerator(const string &ident) @@ -1394,6 +1411,9 @@ void parse_C(Architecture *glb,istream &s) else if (decl->getBaseType()->getMetatype()==TYPE_STRUCT) { // We parsed a struct, treat as a typedef } + else if (decl->getBaseType()->getMetatype()==TYPE_UNION) { + // We parsed a union, treat as a typedef + } else if (decl->getBaseType()->isEnumType()) { // We parsed an enum, treat as a typedef } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc index b10e490f8d..68553330a9 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc @@ -1266,7 +1266,7 @@ void IfcRename::execute(istream &s) else throw IfaceExecutionError("More than one symbol named: "+oldname); - if (sym->getCategory() == 0) + if (sym->getCategory() == Symbol::function_parameter) dcp->fd->getFuncProto().setInputLock(true); sym->getScope()->renameSymbol(sym,newname); sym->getScope()->setAttribute(sym,Varnode::namelock|Varnode::typelock); @@ -1330,7 +1330,7 @@ void IfcRetype::execute(istream &s) else sym = symList[0]; - if (sym->getCategory()==0) + if (sym->getCategory()==Symbol::function_parameter) dcp->fd->getFuncProto().setInputLock(true); sym->getScope()->retypeSymbol(sym,ct); sym->getScope()->setAttribute(sym,Varnode::typelock); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/interface.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/interface.cc index a95982ecb5..c404d9f125 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/interface.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/interface.cc @@ -144,7 +144,7 @@ void IfaceStatus::pushScript(const string &filename,const string &newprompt) { ifstream *s = new ifstream(filename.c_str()); if (!*s) - throw IfaceParseError("Unable to open script file"); + throw IfaceParseError("Unable to open script file: "+filename); pushScript(s,newprompt); } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc index 1f3b4cf33d..903a567fd4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc @@ -344,18 +344,24 @@ void Merge::mergeByDatatype(VarnodeLocSet::const_iterator startiter,VarnodeLocSe /// A COPY is allocated with the given input and data-type. A \e unique space /// output is created. /// \param inVn is the given input Varnode for the new COPY -/// \param ct is the data-type to assign to the new unique output /// \param addr is the address associated with the new COPY /// \return the newly allocated COPY -PcodeOp *Merge::allocateCopyTrim(Varnode *inVn,Datatype *ct,const Address &addr) +PcodeOp *Merge::allocateCopyTrim(Varnode *inVn,const Address &addr) { PcodeOp *copyOp = data.newOp(1,addr); data.opSetOpcode(copyOp,CPUI_COPY); + Datatype *ct = inVn->getType(); Varnode *outVn = data.newUnique(inVn->getSize(),ct); data.opSetOutput(copyOp,outVn); data.opSetInput(copyOp,inVn,0); copyTrims.push_back(copyOp); + if (ct->needsResolution()) { // If the data-type needs resolution + if (inVn->isWritten()) { + int4 fieldNum = data.inheritWriteResolution(ct, copyOp, inVn->getDef()); + data.forceFacingType(ct, fieldNum, copyOp, 0); + } + } return copyOp; } @@ -374,7 +380,6 @@ void Merge::snipReads(Varnode *vn,list &markedop) PcodeOp *copyop,*op; BlockBasic *bl; Address pc; - int4 slot; PcodeOp *afterop; // Figure out where copy is inserted @@ -392,7 +397,7 @@ void Merge::snipReads(Varnode *vn,list &markedop) else afterop = vn->getDef(); } - copyop = allocateCopyTrim(vn, vn->getType(), pc); + copyop = allocateCopyTrim(vn, pc); if (afterop == (PcodeOp *)0) data.opInsertBegin(copyop,bl); else @@ -401,8 +406,7 @@ void Merge::snipReads(Varnode *vn,list &markedop) list::iterator iter; for(iter=markedop.begin();iter!=markedop.end();++iter) { op = *iter; - for(slot=0;slotnumInput();++slot) - if (op->getIn(slot)==vn) break; // Find the correct slot + int4 slot = op->getSlot(vn); data.opSetInput(op,copyop->getOut(),slot); } } @@ -561,7 +565,7 @@ void Merge::trimOpOutput(PcodeOp *op) else afterop = op; vn = op->getOut(); - uniq = data.newUnique(vn->getSize(),vn->getType()); + uniq = data.newUnique(vn->getSize(),vn->getTypeDefFacing()); copyop = data.newOp(1,op->getAddr()); data.opSetOutput(op,uniq); // Output of op is now stubby uniq data.opSetOpcode(copyop,CPUI_COPY); @@ -592,7 +596,7 @@ void Merge::trimOpInput(PcodeOp *op,int4 slot) else pc = op->getAddr(); vn = op->getIn(slot); - copyop = allocateCopyTrim(vn, vn->getType(), pc); + copyop = allocateCopyTrim(vn, pc); data.opSetInput(op,copyop->getOut(),slot); if (op->code() == CPUI_MULTIEQUAL) data.opInsertEnd(copyop,(BlockBasic *)op->getParent()->getIn(slot)); @@ -748,7 +752,7 @@ void Merge::snipIndirect(PcodeOp *indop) // an instance of the output high must // all intersect so the varnodes must all be // traceable via COPY to the same root - snipop = allocateCopyTrim(refvn, refvn->getType(), op->getAddr()); + snipop = allocateCopyTrim(refvn, op->getAddr()); data.opInsertBefore(snipop,op); list::iterator oiter; int4 i,slot; @@ -774,8 +778,10 @@ void Merge::mergeIndirect(PcodeOp *indop) return; } - if (mergeTestRequired(outvn->getHigh(),invn0->getHigh())) - if (merge(invn0->getHigh(),outvn->getHigh(),false)) return; + if (mergeTestRequired(outvn->getHigh(),invn0->getHigh())) { + if (merge(invn0->getHigh(),outvn->getHigh(),false)) + return; + } snipIndirect(indop); // If we cannot merge, the only thing that can go // wrong with an input trim, is if the output of // indop is involved in the input to the op causing @@ -783,7 +789,11 @@ void Merge::mergeIndirect(PcodeOp *indop) PcodeOp *newop; - newop = allocateCopyTrim(invn0, outvn->getType(), indop->getAddr()); + newop = allocateCopyTrim(invn0, indop->getAddr()); + SymbolEntry *entry = outvn->getSymbolEntry(); + if (entry != (SymbolEntry *)0 && entry->getSymbol()->getType()->needsResolution()) { + data.inheritWriteResolution(entry->getSymbol()->getType(), newop, indop); + } data.opSetInput(indop,newop->getOut(),0); data.opInsertBefore(newop,indop); if (!mergeTestRequired(outvn->getHigh(),indop->getIn(0)->getHigh()) || diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh index 7e6b8fc1cc..a300d9bbf4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh @@ -96,7 +96,7 @@ class Merge { void collectCovering(vector &vlist,HighVariable *high,PcodeOp *op); bool collectCorrectable(const vector &vlist,list &oplist,vector &slotlist, PcodeOp *op); - PcodeOp *allocateCopyTrim(Varnode *inVn,Datatype *ct,const Address &addr); + PcodeOp *allocateCopyTrim(Varnode *inVn,const Address &addr); void snipReads(Varnode *vn,list &markedop); void snipIndirect(PcodeOp *indop); void eliminateIntersect(Varnode *vn,const vector &blocksort); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc index 49b30e439a..20e3953e08 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc @@ -233,7 +233,8 @@ void EmitXml::tagType(const char *ptr,syntax_highlight hl,const Datatype *ct) { /// \param hl indicates how the identifier should be highlighted /// \param ct is the data-type associated with the field /// \param o is the (byte) offset of the field within its structured data-type -void EmitXml::tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 o) { +/// \param op is the PcodeOp associated with the field (usually PTRSUB or SUBPIECE) +void EmitXml::tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 o,const PcodeOp *op) { *s << "getTime() << "\""; } *s << '>'; xml_escape(*s,ptr); @@ -419,7 +422,7 @@ void TokenSplit::print(EmitXml *emit) const emit->tagType(tok.c_str(),hl,ptr_second.ct); break; case field_t: // tagField - emit->tagField(tok.c_str(),hl,ptr_second.ct,(int4)off); + emit->tagField(tok.c_str(),hl,ptr_second.ct,(int4)off,op); break; case comm_t: // tagComment emit->tagComment(tok.c_str(),hl,ptr_second.spc,off); @@ -1054,12 +1057,12 @@ void EmitPrettyPrint::tagType(const char *ptr,syntax_highlight hl,const Datatype scan(); } -void EmitPrettyPrint::tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 o) +void EmitPrettyPrint::tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 o,const PcodeOp *op) { checkstring(); TokenSplit &tok( tokqueue.push() ); - tok.tagField(ptr,hl,ct,o); + tok.tagField(ptr,hl,ct,o,op); scan(); } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh index 5874631330..418e582da8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh @@ -121,7 +121,7 @@ public: virtual void tagOp(const char *ptr,syntax_highlight hl,const PcodeOp *op); virtual void tagFuncName(const char *ptr,syntax_highlight hl,const Funcdata *fd,const PcodeOp *op); virtual void tagType(const char *ptr,syntax_highlight hl,const Datatype *ct); - virtual void tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 off); + virtual void tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 off,const PcodeOp *op); virtual void tagComment(const char *ptr,syntax_highlight hl,const AddrSpace *spc,uintb off); virtual void tagLabel(const char *ptr,syntax_highlight hl,const AddrSpace *spc,uintb off); virtual void print(const char *str,syntax_highlight hl=no_color); @@ -270,7 +270,7 @@ public: *s << ptr; } virtual void tagType(const char *ptr,syntax_highlight hl,const Datatype *ct) { *s << ptr; } - virtual void tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 off) { + virtual void tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 off,const PcodeOp *op) { *s << ptr; } virtual void tagComment(const char *ptr,syntax_highlight hl, const AddrSpace *spc,uintb off) { @@ -504,9 +504,10 @@ public: /// \param h indicates how the identifier should be highlighted /// \param ct is the data-type associated with the field /// \param o is the (byte) offset of the field within its structured data-type - void tagField(const char *ptr,EmitXml::syntax_highlight h,const Datatype *ct,int4 o) { + /// \param inOp is the PcodeOp associated with the field (usually PTRSUB or SUBPIECE) + void tagField(const char *ptr,EmitXml::syntax_highlight h,const Datatype *ct,int4 o,const PcodeOp *inOp) { tok = ptr; size = tok.size(); - tagtype=field_t; delimtype=tokenstring; hl=h; ptr_second.ct=ct; off=(uintb)o; } + tagtype=field_t; delimtype=tokenstring; hl=h; ptr_second.ct=ct; off=(uintb)o; op=inOp; } /// \brief Create a comment string in the generated source code /// @@ -773,7 +774,7 @@ public: virtual void tagOp(const char *ptr,syntax_highlight hl,const PcodeOp *op); virtual void tagFuncName(const char *ptr,syntax_highlight hl,const Funcdata *fd,const PcodeOp *op); virtual void tagType(const char *ptr,syntax_highlight hl,const Datatype *ct); - virtual void tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 off); + virtual void tagField(const char *ptr,syntax_highlight hl,const Datatype *ct,int4 off,const PcodeOp *op); virtual void tagComment(const char *ptr,syntax_highlight hl, const AddrSpace *spc,uintb off); virtual void tagLabel(const char *ptr,syntax_highlight hl, diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index 64ecda14c9..161c07ad5b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -360,7 +360,7 @@ void PrintC::opFunc(const PcodeOp *op) // implied vn's pushed on in reverse order for efficiency // see PrintLanguage::pushVnImplied for(int4 i=op->numInput()-1;i>=0;--i) - pushVnImplied(op->getIn(i),op,mods); + pushVn(op->getIn(i),op,mods); } else // Push empty token for void pushAtom(Atom("",blanktoken,EmitXml::no_color)); @@ -375,9 +375,9 @@ void PrintC::opTypeCast(const PcodeOp *op) { if (!option_nocasts) { pushOp(&typecast,op); - pushType(op->getOut()->getHigh()->getType()); + pushType(op->getOut()->getHighTypeDefFacing()); } - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); } /// The syntax represents the given op using a function with one input, @@ -392,13 +392,13 @@ void PrintC::opHiddenFunc(const PcodeOp *op) { pushOp(&hidden,op); - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); } void PrintC::opCopy(const PcodeOp *op) { - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); } void PrintC::opLoad(const PcodeOp *op) @@ -411,7 +411,7 @@ void PrintC::opLoad(const PcodeOp *op) else { pushOp(&dereference,op); } - pushVnImplied(op->getIn(1),op,m); + pushVn(op->getIn(1),op,m); } void PrintC::opStore(const PcodeOp *op) @@ -430,8 +430,8 @@ void PrintC::opStore(const PcodeOp *op) } // implied vn's pushed on in reverse order for efficiency // see PrintLanguage::pushVnImplied - pushVnImplied(op->getIn(2),op,mods); - pushVnImplied(op->getIn(1),op,m); + pushVn(op->getIn(2),op,mods); + pushVn(op->getIn(1),op,m); } void PrintC::opBranch(const PcodeOp *op) @@ -441,7 +441,7 @@ void PrintC::opBranch(const PcodeOp *op) // Assume the BRANCH is a statement emit->tagOp("goto",EmitXml::keyword_color,op); emit->spaces(1); - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); } } @@ -480,7 +480,7 @@ void PrintC::opCbranch(const PcodeOp *op) } if (booleanflip) pushOp(&boolean_not,op); - pushVnImplied(op->getIn(1),op,m); + pushVn(op->getIn(1),op,m); // Make sure stack is clear before emitting more recurse(); if (yesparen) @@ -492,7 +492,7 @@ void PrintC::opCbranch(const PcodeOp *op) emit->spaces(1); emit->print("goto",EmitXml::keyword_color); emit->spaces(1); - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); } } @@ -502,7 +502,7 @@ void PrintC::opBranchind(const PcodeOp *op) // FIXME: This routine shouldn't emit directly emit->tagOp("switch",EmitXml::keyword_color,op); // Print header for switch int4 id = emit->openParen('('); - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); recurse(); emit->closeParen(')',id); } @@ -544,7 +544,7 @@ void PrintC::opCall(const PcodeOp *op) // see PrintLanguage::pushVnImplied for(int4 i=op->numInput()-1;i>=1;--i) { if (i == skip) continue; - pushVnImplied(op->getIn(i),op,mods); + pushVn(op->getIn(i),op,mods); } } else // Push empty token for void @@ -564,25 +564,25 @@ void PrintC::opCallind(const PcodeOp *op) int4 count = op->numInput() - 1; count -= (skip < 0) ? 0 : 1; if (count > 1) { // Multiple parameters - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); for(int4 i=0;inumInput()-1;i>=1;--i) { if (i == skip) continue; - pushVnImplied(op->getIn(i),op,mods); + pushVn(op->getIn(i),op,mods); } } else if (count == 1) { // One parameter if (skip == 1) - pushVnImplied(op->getIn(2),op,mods); + pushVn(op->getIn(2),op,mods); else - pushVnImplied(op->getIn(1),op,mods); - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(1),op,mods); + pushVn(op->getIn(0),op,mods); } else { // A void function - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); pushAtom(Atom("",blanktoken,EmitXml::no_color)); } } @@ -599,7 +599,7 @@ void PrintC::opCallother(const PcodeOp *op) // implied vn's pushed on in reverse order for efficiency // see PrintLanguage::pushVnImplied for(int4 i=op->numInput()-1;i>=1;--i) - pushVnImplied(op->getIn(i),op,mods); + pushVn(op->getIn(i),op,mods); } else // Push empty token for void pushAtom(Atom("",blanktoken,EmitXml::no_color)); @@ -614,7 +614,7 @@ void PrintC::opConstructor(const PcodeOp *op,bool withNew) const Varnode *outvn = newop->getOut(); pushOp(&new_op,newop); pushAtom(Atom("new",optoken,EmitXml::keyword_color,newop,outvn)); - dt = outvn->getType(); + dt = outvn->getTypeDefFacing(); } else { const Varnode *thisvn = op->getIn(1); @@ -632,10 +632,10 @@ void PrintC::opConstructor(const PcodeOp *op,bool withNew) for(int4 i=2;inumInput()-1;++i) pushOp(&comma,op); for(int4 i=op->numInput()-1;i>=2;--i) - pushVnImplied(op->getIn(i),op,mods); + pushVn(op->getIn(i),op,mods); } else if (op->numInput()==3) { // One parameter - pushVnImplied(op->getIn(2),op,mods); + pushVn(op->getIn(2),op,mods); } else { // A void function pushAtom(Atom("",blanktoken,EmitXml::no_color)); @@ -652,7 +652,7 @@ void PrintC::opReturn(const PcodeOp *op) emit->tagOp("return",EmitXml::keyword_color,op); if (op->numInput()>1) { emit->spaces(1); - pushVnImplied(op->getIn(1),op,mods); + pushVn(op->getIn(1),op,mods); } return; case PcodeOp::noreturn: // Previous instruction does not exit @@ -677,7 +677,7 @@ void PrintC::opReturn(const PcodeOp *op) void PrintC::opIntZext(const PcodeOp *op,const PcodeOp *readOp) { - if (castStrategy->isZextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) { + if (castStrategy->isZextCast(op->getOut()->getHighTypeDefFacing(),op->getIn(0)->getHighTypeReadFacing(op))) { if (option_hide_exts && castStrategy->isExtensionCastImplied(op,readOp)) opHiddenFunc(op); else @@ -690,7 +690,7 @@ void PrintC::opIntZext(const PcodeOp *op,const PcodeOp *readOp) void PrintC::opIntSext(const PcodeOp *op,const PcodeOp *readOp) { - if (castStrategy->isSextCast(op->getOut()->getHigh()->getType(),op->getIn(0)->getHigh()->getType())) { + if (castStrategy->isSextCast(op->getOut()->getHighTypeDefFacing(),op->getIn(0)->getHighTypeReadFacing(op))) { if (option_hide_exts && castStrategy->isExtensionCastImplied(op,readOp)) opHiddenFunc(op); else @@ -707,22 +707,33 @@ void PrintC::opBoolNegate(const PcodeOp *op) { if (isSet(negatetoken)) { // Check if we are negated by a previous BOOL_NEGATE unsetMod(negatetoken); // If so, mark that negatetoken is consumed - pushVnImplied(op->getIn(0),op,mods); // Don't print ourselves, but print our input unmodified + pushVn(op->getIn(0),op,mods); // Don't print ourselves, but print our input unmodified } else if (checkPrintNegation(op->getIn(0))) { // If the next operator can be flipped - pushVnImplied(op->getIn(0),op,mods|negatetoken); // Don't print ourselves, but print a modified input + pushVn(op->getIn(0),op,mods|negatetoken); // Don't print ourselves, but print a modified input } else { pushOp(&boolean_not,op); // Otherwise print ourselves - pushVnImplied(op->getIn(0),op,mods); // And print our input + pushVn(op->getIn(0),op,mods); // And print our input } } void PrintC::opSubpiece(const PcodeOp *op) { - if (castStrategy->isSubpieceCast(op->getOut()->getHigh()->getType(), - op->getIn(0)->getHigh()->getType(), + if (op->doesSpecialPrinting()) { // Special printing means it is a field extraction + int4 offset; + Datatype *ct; + const TypeField *field = TypeOpSubpiece::testExtraction(true, op, ct, offset); + if (field != (const TypeField *)0 && offset == 0) { + pushOp(&object_member,op); + pushVn(op->getIn(0), op, mods); + pushAtom(Atom(field->name,fieldtoken,EmitXml::no_color,ct,field->ident,op)); + return; + } + } + if (castStrategy->isSubpieceCast(op->getOut()->getHighTypeDefFacing(), + op->getIn(0)->getHighTypeReadFacing(op), (uint4)op->getIn(1)->getOffset())) opTypeCast(op); else @@ -735,7 +746,7 @@ void PrintC::opPtradd(const PcodeOp *op) bool printval = isSet(print_load_value|print_store_value); uint4 m = mods & ~(print_load_value|print_store_value); if (!printval) { - TypePointer *tp = (TypePointer *)op->getIn(0)->getHigh()->getType(); + TypePointer *tp = (TypePointer *)op->getIn(0)->getHighTypeReadFacing(op); if (tp->getMetatype() == TYPE_PTR) { if (tp->getPtrTo()->getMetatype() == TYPE_ARRAY) printval = true; @@ -747,8 +758,8 @@ void PrintC::opPtradd(const PcodeOp *op) pushOp(&binary_plus,op); // implied vn's pushed on in reverse order for efficiency // see PrintLanguage::pushVnImplied - pushVnImplied(op->getIn(1),op,m); - pushVnImplied(op->getIn(0),op,m); + pushVn(op->getIn(1),op,m); + pushVn(op->getIn(0),op,m); } static bool isValueFlexible(const Varnode *vn) @@ -791,7 +802,7 @@ void PrintC::opPtrsub(const PcodeOp *op) in0 = op->getIn(0); in1const = op->getIn(1)->getOffset(); - ptype = (TypePointer *)in0->getHigh()->getType(); + ptype = (TypePointer *)in0->getHighTypeReadFacing(op); if (ptype->getMetatype() != TYPE_PTR) { clear(); throw LowlevelError("PTRSUB off of non-pointer type"); @@ -808,7 +819,7 @@ void PrintC::opPtrsub(const PcodeOp *op) valueon = (mods & (print_load_value|print_store_value)) != 0; flex = isValueFlexible(in0); - if (ct->getMetatype() == TYPE_STRUCT) { + if (ct->getMetatype() == TYPE_STRUCT || ct->getMetatype() == TYPE_UNION) { uintb suboff = in1const; // How far into container if (ptrel != (TypePointerRel *)0) { suboff += ptrel->getPointerOffset(); @@ -817,34 +828,48 @@ void PrintC::opPtrsub(const PcodeOp *op) // Special case where we do not print a field pushTypePointerRel(op); if (flex) - pushVnImplied(in0,op,m | print_load_value); + pushVn(in0,op,m | print_load_value); else - pushVnImplied(in0,op,m); + pushVn(in0,op,m); return; } } suboff = AddrSpace::addressToByte(suboff,ptype->getWordSize()); string fieldname; Datatype *fieldtype; - int4 fieldoffset; + int4 fieldid; int4 newoff; - const TypeField *fld = ((TypeStruct *)ct)->getField((int4)suboff,0,&newoff); - if (fld == (const TypeField *)0) { - if (ct->getSize() <= suboff) { - clear(); - throw LowlevelError("PTRSUB out of bounds into struct"); - } - // Try to match the Ghidra's default field name from DataTypeComponent.getDefaultFieldName - ostringstream s; - s << "field_0x" << hex << suboff; - fieldname = s.str(); - fieldtype = (Datatype *)0; - fieldoffset = suboff; - } - else { + if (ct->getMetatype() == TYPE_UNION) { + if (suboff != 0) + throw LowlevelError("PTRSUB accesses union with non-zero offset"); + const Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *resUnion = fd->getUnionField(ptype, op, -1); + if (resUnion == (const ResolvedUnion *)0 || resUnion->getFieldNum() < 0) + throw LowlevelError("PTRSUB for union that does not resolve to a field"); + const TypeField *fld = ((TypeUnion *)ct)->getField(resUnion->getFieldNum()); + fieldid = fld->ident; fieldname = fld->name; fieldtype = fld->type; - fieldoffset = fld->offset; + } + else { // TYPE_STRUCT + const TypeField *fld = ((TypeStruct*)ct)->resolveTruncation((int4)suboff,0,&newoff); + if (fld == (const TypeField*)0) { + if (ct->getSize() <= suboff) { + clear(); + throw LowlevelError("PTRSUB out of bounds into struct"); + } + // Try to match the Ghidra's default field name from DataTypeComponent.getDefaultFieldName + ostringstream s; + s << "field_0x" << hex << suboff; + fieldname = s.str(); + fieldtype = (Datatype*)0; + fieldid = suboff; + } + else { + fieldname = fld->name; + fieldtype = fld->type; + fieldid = fld->ident; + } } arrayvalue = false; // The '&' is dropped if the output type is an array @@ -859,16 +884,16 @@ void PrintC::opPtrsub(const PcodeOp *op) pushOp(&object_member,op); if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m | print_load_value); - pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldoffset)); + pushVn(in0,op,m | print_load_value); + pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldid,op)); } else { // EMIT &( )->name pushOp(&addressof,op); pushOp(&pointer_member,op); if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m); - pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldoffset)); + pushVn(in0,op,m); + pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldid,op)); } } else { // Not printing an ampersand @@ -878,15 +903,15 @@ void PrintC::opPtrsub(const PcodeOp *op) pushOp(&object_member,op); if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m | print_load_value); - pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldoffset)); + pushVn(in0,op,m | print_load_value); + pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldid,op)); } else { // EMIT ( )->name pushOp(&pointer_member,op); if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m); - pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldoffset)); + pushVn(in0,op,m); + pushAtom(Atom(fieldname,fieldtoken,EmitXml::no_color,ct,fieldid,op)); } if (arrayvalue) push_integer(0,4,false,(Varnode *)0,op); @@ -927,7 +952,7 @@ void PrintC::opPtrsub(const PcodeOp *op) // we can't use a cast in its description, so turn off // casting when printing the partial symbol // Datatype *exttype = ((mods & print_store_value)!=0) ? (Datatype *)0 : ct; - pushPartialSymbol(symbol,off,0,(Varnode *)0,op,(Datatype *)0); + pushPartialSymbol(symbol,off,0,(Varnode *)0,op,-1); } } if (arrayvalue) @@ -947,13 +972,13 @@ void PrintC::opPtrsub(const PcodeOp *op) // becomes struct->arrayfield[i] if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m); + pushVn(in0,op,m); } else { // EMIT *( ) pushOp(&dereference,op); if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m); + pushVn(in0,op,m); } } else { @@ -961,7 +986,7 @@ void PrintC::opPtrsub(const PcodeOp *op) pushOp(&subscript,op); if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m); + pushVn(in0,op,m); push_integer(0,4,false,(Varnode *)0,op); } else { // EMIT (* )[0] @@ -969,7 +994,7 @@ void PrintC::opPtrsub(const PcodeOp *op) pushOp(&dereference,op); if (ptrel != (TypePointerRel *)0) pushTypePointerRel(op); - pushVnImplied(in0,op,m); + pushVn(in0,op,m); push_integer(0,4,false,(Varnode *)0,op); } } @@ -988,7 +1013,7 @@ void PrintC::opPtrsub(const PcodeOp *op) void PrintC::opSegmentOp(const PcodeOp *op) { - pushVnImplied(op->getIn(2),op,mods); + pushVn(op->getIn(2),op,mods); } void PrintC::opCpoolRefOp(const PcodeOp *op) @@ -1033,7 +1058,7 @@ void PrintC::opCpoolRefOp(const PcodeOp *op) pushOp(&function_call,op); pushAtom(Atom(rec->getToken(),functoken,EmitXml::funcname_color,op,outvn)); pushOp(&comma,(const PcodeOp *)0); - pushVnImplied(vn0,op,mods); + pushVn(vn0,op,mods); pushAtom(Atom(dt->getName(),syntax,EmitXml::type_color,op,outvn)); break; } @@ -1056,7 +1081,7 @@ void PrintC::opCpoolRefOp(const PcodeOp *op) } else { pushOp(&pointer_member, op); - pushVnImplied(vn0, op, mods); + pushVn(vn0, op, mods); pushAtom(Atom(rec->getToken(), syntax, color, op, outvn)); } break; @@ -1081,7 +1106,7 @@ void PrintC::opNewOp(const PcodeOp *op) nm = ""; } else { - Datatype *dt = outvn->getType(); + Datatype *dt = outvn->getTypeDefFacing(); while (dt->getMetatype() == TYPE_PTR) { dt = ((TypePointer *)dt)->getPtrTo(); } @@ -1089,14 +1114,14 @@ void PrintC::opNewOp(const PcodeOp *op) } pushOp(&subscript,op); pushAtom(Atom(nm,optoken,EmitXml::type_color,op)); - pushVnImplied(vn1,op,mods); + pushVn(vn1,op,mods); return; } } // This printing is used only if the 'new' operator doesn't feed directly into a constructor pushOp(&function_call,op); pushAtom(Atom("new",optoken,EmitXml::keyword_color,op,outvn)); - pushVnImplied(vn0,op,mods); + pushVn(vn0,op,mods); } void PrintC::opInsertOp(const PcodeOp *op) @@ -1133,7 +1158,7 @@ void PrintC::push_integer(uintb val,int4 sz,bool sign, if ((vn != (const Varnode *)0)&&(!vn->isAnnotation())) { Symbol *sym = vn->getHigh()->getSymbol(); if (sym != (Symbol *)0) { - if (sym->isNameLocked() && (sym->getCategory() == 1)) { + if (sym->isNameLocked() && (sym->getCategory() == Symbol::equate)) { if (pushEquate(val,sz,(EquateSymbol *)sym,vn,op)) return; } @@ -1448,7 +1473,7 @@ void PrintC::pushCharConstant(uintb val,const Datatype *ct,const Varnode *vn,con if ((vn != (const Varnode *)0)&&(!vn->isAnnotation())) { Symbol *sym = vn->getHigh()->getSymbol(); if (sym != (Symbol *)0) { - if (sym->isNameLocked() && (sym->getCategory() == 1)) { + if (sym->isNameLocked() && (sym->getCategory() == Symbol::equate)) { if (pushEquate(val,vn->getSize(),(EquateSymbol *)sym,vn,op)) return; } @@ -1627,6 +1652,7 @@ void PrintC::pushConstant(uintb val,const Datatype *ct, case TYPE_CODE: case TYPE_ARRAY: case TYPE_STRUCT: + case TYPE_UNION: case TYPE_PARTIALSTRUCT: break; } @@ -1723,7 +1749,7 @@ void PrintC::pushAnnotation(const Varnode *vn,const PcodeOp *op) pushSymbol(entry->getSymbol(),vn,op); else { int4 symboloff = vn->getOffset() - entry->getFirst(); - pushPartialSymbol(entry->getSymbol(),symboloff,size,vn,op,(Datatype *)0); + pushPartialSymbol(entry->getSymbol(),symboloff,size,vn,op,-1); } } else { @@ -1743,7 +1769,7 @@ void PrintC::pushSymbol(const Symbol *sym,const Varnode *vn,const PcodeOp *op) EmitXml::syntax_highlight tokenColor; if (sym->getScope()->isGlobal()) tokenColor = EmitXml::global_color; - else if (sym->getCategory() == 0) + else if (sym->getCategory() == Symbol::function_parameter) tokenColor = EmitXml::param_color; else tokenColor = EmitXml::var_color; @@ -1777,7 +1803,7 @@ void PrintC::pushUnnamedLocation(const Address &addr, void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, const Varnode *vn,const PcodeOp *op, - Datatype *outtype) + int4 inslot) { // We need to print "bottom up" in order to get parentheses right // I.e. we want to print globalstruct.arrayfield[0], rather than @@ -1788,12 +1814,19 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, Datatype *ct = sym->getType(); while(ct != (Datatype *)0) { - if (((sz==0)||(sz==ct->getSize()))&&(off==0)) - break; // Found the full partial + if (off == 0) { + if (sz == 0 || (sz == ct->getSize() && (!ct->needsResolution() || ct->getMetatype()==TYPE_PTR))) + break; + } bool succeeded = false; if (ct->getMetatype()==TYPE_STRUCT) { + if (ct->needsResolution() && ct->getSize() == sz) { + Datatype *outtype = ct->findResolve(op, inslot); + if (outtype == ct) + break; // Turns out we don't resolve to the field + } const TypeField *field; - field = ((TypeStruct *)ct)->getField(off,sz,&off); + field = ((TypeStruct *)ct)->resolveTruncation(off,sz,&off); if (field != (const TypeField *)0) { stack.emplace_back(); PartialSymbolEntry &entry( stack.back() ); @@ -1822,13 +1855,32 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, succeeded = true; } } - else if ((outtype != (Datatype *)0)&& - castStrategy->isSubpieceCastEndian(outtype,ct,off, - sym->getFirstWholeMap()->getAddr().getSpace()->isBigEndian())) { - // Treat truncation as SUBPIECE style cast - finalcast = outtype; - ct = (Datatype *)0; - succeeded = true; + else if (ct->getMetatype() == TYPE_UNION) { + const TypeField *field; + field = ((TypeUnion *)ct)->findTruncation(off,op,inslot,off); + if (field != (const TypeField*)0) { + stack.emplace_back(); + PartialSymbolEntry &entry(stack.back()); + entry.token = &object_member; + entry.field = field; + entry.parent = ct; + entry.fieldname = entry.field->name; + entry.hilite = EmitXml::no_color; + ct = field->type; + succeeded = true; + } + else if (ct->getSize() == sz) + break; // Turns out we don't need to resolve the field + } + else if (inslot >= 0) { + Datatype *outtype = vn->getHigh()->getType(); + if (castStrategy->isSubpieceCastEndian(outtype,ct,off, + sym->getFirstWholeMap()->getAddr().getSpace()->isBigEndian())) { + // Treat truncation as SUBPIECE style cast + finalcast = outtype; + ct = (Datatype*)0; + succeeded = true; + } } if (!succeeded) { // Subtype was not good stack.emplace_back(); @@ -1860,7 +1912,7 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, if (field == (const TypeField *)0) pushAtom(Atom(stack[i].fieldname,syntax,stack[i].hilite,op)); else - pushAtom(Atom(stack[i].fieldname,fieldtoken,stack[i].hilite,stack[i].parent,field->offset)); + pushAtom(Atom(stack[i].fieldname,fieldtoken,stack[i].hilite,stack[i].parent,field->ident,op)); } } @@ -1882,6 +1934,39 @@ void PrintC::pushMismatchSymbol(const Symbol *sym,int4 off,int4 sz, pushUnnamedLocation(vn->getAddr(),vn,op); } +void PrintC::pushImpliedField(const Varnode *vn,const PcodeOp *op) + +{ + bool proceed = false; + Datatype *parent = vn->getHigh()->getType(); + const TypeField *field; + if (parent->needsResolution() && parent->getMetatype() != TYPE_PTR) { + const Funcdata *fd = op->getParent()->getFuncdata(); + int4 slot = op->getSlot(vn); + const ResolvedUnion *res = fd->getUnionField(parent, op, slot); + if (res != (const ResolvedUnion *)0 && res->getFieldNum() >= 0) { + if (parent->getMetatype() == TYPE_STRUCT && res->getFieldNum() == 0) { + field = &(*((TypeStruct *)parent)->beginField()); + proceed = true; + } + else if (parent->getMetatype() == TYPE_UNION) { + field = ((TypeUnion *)parent)->getField(res->getFieldNum()); + proceed = true; + } + } + } + + const PcodeOp *defOp = vn->getDef(); + if (!proceed) { + // Just push original op + defOp->getOpcode()->push(this,defOp,op); + return; + } + pushOp(&object_member,op); + defOp->getOpcode()->push(this,defOp,op); + pushAtom(Atom(field->name,fieldtoken,EmitXml::no_color,parent,field->ident,op)); +} + /// Print all the components making up the data-type, using the \b struct keyword /// \param ct is the structure data-type void PrintC::emitStructDefinition(const TypeStruct *ct) @@ -2038,14 +2123,14 @@ void PrintC::emitLocalVarDecls(const Funcdata *fd) { bool notempty = false; - if (emitScopeVarDecls(fd->getScopeLocal(),-1)) + if (emitScopeVarDecls(fd->getScopeLocal(),Symbol::no_category)) notempty = true; ScopeMap::const_iterator iter,enditer; iter = fd->getScopeLocal()->childrenBegin(); enditer = fd->getScopeLocal()->childrenEnd(); while(iter!=enditer) { Scope *l1 = (*iter).second; - if (emitScopeVarDecls(l1,-1)) + if (emitScopeVarDecls(l1,Symbol::no_category)) notempty = true; ++iter; } @@ -2226,7 +2311,7 @@ bool PrintC::emitInplaceOp(const PcodeOp *op) if (op->getOut()->getHigh() != vn->getHigh()) return false; pushOp(tok,op); pushVnExplicit(vn,op); - pushVnImplied(op->getIn(1),op,mods); + pushVn(op->getIn(1),op,mods); recurse(); return true; } @@ -2238,14 +2323,14 @@ void PrintC::emitExpression(const PcodeOp *op) if (outvn != (Varnode *)0) { if (option_inplace_ops && emitInplaceOp(op)) return; pushOp(&assignment,op); - pushVnLHS(outvn,op); + pushSymbolDetail(outvn,op,false); } else if (op->doesSpecialPrinting()) { // Printing of constructor syntax const PcodeOp *newop = op->getIn(1)->getDef(); outvn = newop->getOut(); pushOp(&assignment,newop); - pushVnLHS(outvn,newop); + pushSymbolDetail(outvn,newop,false); opConstructor(op,true); recurse(); return; @@ -2377,7 +2462,7 @@ void PrintC::emitGlobalVarDeclsRecursive(Scope *symScope) { if (!symScope->isGlobal()) return; - emitScopeVarDecls(symScope,-1); + emitScopeVarDecls(symScope,Symbol::no_category); ScopeMap::const_iterator iter,enditer; iter = symScope->childrenBegin(); enditer = symScope->childrenEnd(); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh index 8b0efd3a05..9f5c67a22c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh @@ -172,14 +172,14 @@ protected: virtual bool pushEquate(uintb val,int4 sz,const EquateSymbol *sym, const Varnode *vn,const PcodeOp *op); virtual void pushAnnotation(const Varnode *vn,const PcodeOp *op); - virtual void pushSymbol(const Symbol *sym,const Varnode *vn, - const PcodeOp *op); + virtual void pushSymbol(const Symbol *sym,const Varnode *vn,const PcodeOp *op); virtual void pushUnnamedLocation(const Address &addr, const Varnode *vn,const PcodeOp *op); virtual void pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, - const Varnode *vn,const PcodeOp *op,Datatype *outtype); + const Varnode *vn,const PcodeOp *op,int4 inslot); virtual void pushMismatchSymbol(const Symbol *sym,int4 off,int4 sz, const Varnode *vn,const PcodeOp *op); + virtual void pushImpliedField(const Varnode *vn,const PcodeOp *op); virtual void push_integer(uintb val,int4 sz,bool sign, const Varnode *vn, const PcodeOp *op); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printjava.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printjava.cc index bf8e9526d3..4d7826a2dc 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printjava.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printjava.cc @@ -230,7 +230,7 @@ void PrintJava::opLoad(const PcodeOp *op) bool printArrayRef = needZeroArray(op->getIn(1)); if (printArrayRef) pushOp(&subscript,op); - pushVnImplied(op->getIn(1),op,m); + pushVn(op->getIn(1),op,m); if (printArrayRef) push_integer(0,4,false,(Varnode *)0,op); } @@ -242,15 +242,15 @@ void PrintJava::opStore(const PcodeOp *op) pushOp(&assignment,op); // This is an assignment if (needZeroArray(op->getIn(1))) { pushOp(&subscript,op); - pushVnImplied(op->getIn(1),op,m); + pushVn(op->getIn(1),op,m); push_integer(0,4,false,(Varnode *)0,op); - pushVnImplied(op->getIn(2),op,mods); + pushVn(op->getIn(2),op,mods); } else { // implied vn's pushed on in reverse order for efficiency // see PrintLanguage::pushVnImplied - pushVnImplied(op->getIn(2),op,mods); - pushVnImplied(op->getIn(1),op,m); + pushVn(op->getIn(2),op,mods); + pushVn(op->getIn(1),op,m); } } @@ -266,25 +266,25 @@ void PrintJava::opCallind(const PcodeOp *op) int4 count = op->numInput() - 1; count -= (skip < 0) ? 0 : 1; if (count > 1) { // Multiple parameters - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); for(int4 i=0;inumInput()-1;i>=1;--i) { if (i == skip) continue; - pushVnImplied(op->getIn(i),op,mods); + pushVn(op->getIn(i),op,mods); } } else if (count == 1) { // One parameter if (skip == 1) - pushVnImplied(op->getIn(2),op,mods); + pushVn(op->getIn(2),op,mods); else - pushVnImplied(op->getIn(1),op,mods); - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(1),op,mods); + pushVn(op->getIn(0),op,mods); } else { // A void function - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); pushAtom(Atom("",blanktoken,EmitXml::no_color)); } } @@ -329,7 +329,7 @@ void PrintJava::opCpoolRefOp(const PcodeOp *op) dt = ((TypePointer *)dt)->getPtrTo(); } pushOp(&instanceof,op); - pushVnImplied(vn0,op,mods); + pushVn(vn0,op,mods); pushAtom(Atom(dt->getName(),syntax,EmitXml::type_color,op,outvn)); break; } @@ -352,7 +352,7 @@ void PrintJava::opCpoolRefOp(const PcodeOp *op) } else { pushOp(&object_member,op); - pushVnImplied(vn0,op,mods); + pushVn(vn0,op,mods); pushAtom(Atom(rec->getToken(),syntax,color,op,outvn)); } } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc index 30eb781412..d2be8064bf 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc @@ -189,7 +189,7 @@ void PrintLanguage::pushAtom(const Atom &atom) /// \param vn is the given implied Varnode /// \param op is PcodeOp taking the Varnode as input /// \param m is the set of printing modifications to apply for this sub-expression -void PrintLanguage::pushVnImplied(const Varnode *vn,const PcodeOp *op,uint4 m) +void PrintLanguage::pushVn(const Varnode *vn,const PcodeOp *op,uint4 m) { // if (pending == nodepend.size()) @@ -202,7 +202,7 @@ void PrintLanguage::pushVnImplied(const Varnode *vn,const PcodeOp *op,uint4 m) // } // But it is more efficient to just call them in reverse order - nodepend.push_back(NodePending(vn,op,m)); + nodepend.emplace_back(vn,op,m); } /// This method pushes a given Varnode as a \b leaf of the current expression. @@ -217,35 +217,20 @@ void PrintLanguage::pushVnExplicit(const Varnode *vn,const PcodeOp *op) pushAnnotation(vn,op); return; } - HighVariable *high = vn->getHigh(); if (vn->isConstant()) { - pushConstant(vn->getOffset(),high->getType(),vn,op); + pushConstant(vn->getOffset(),vn->getHighTypeReadFacing(op),vn,op); return; } - Symbol *sym = high->getSymbol(); - if (sym == (Symbol *)0) { - pushUnnamedLocation(high->getNameRepresentative()->getAddr(),vn,op); - } - else { - int4 symboloff = high->getSymbolOffset(); - if (symboloff == -1) - pushSymbol(sym,vn,op); - else { - if (symboloff + vn->getSize() <= sym->getType()->getSize()) - pushPartialSymbol(sym,symboloff,vn->getSize(),vn,op,vn->getHigh()->getType()); - else - pushMismatchSymbol(sym,symboloff,vn->getSize(),vn,op); - } - } + pushSymbolDetail(vn,op,true); } -/// The given Varnode will ultimately be emitted as an explicit variable on -/// the left-hand side of an \e assignment statement. As with pushVnExplicit(), -/// this method decides how the Varnode will be emitted and pushes the resulting -/// Atom onto the RPN stack. -/// \param vn is the given LSH Varnode -/// \param op is the PcodeOp which produces the Varnode as an output -void PrintLanguage::pushVnLHS(const Varnode *vn,const PcodeOp *op) +/// We know that the given Varnode matches part of a single Symbol. +/// Push a set of tokens that represents the Varnode, which may require +/// extracting subfields or casting to get the correct value. +/// \param vn is the given Varnode +/// \param op is the PcodeOp involved in the expression with the Varnode +/// \param isRead is \b true if the PcodeOp reads the Varnode +void PrintLanguage::pushSymbolDetail(const Varnode *vn,const PcodeOp *op,bool isRead) { HighVariable *high = vn->getHigh(); @@ -255,14 +240,19 @@ void PrintLanguage::pushVnLHS(const Varnode *vn,const PcodeOp *op) } else { int4 symboloff = high->getSymbolOffset(); - if (symboloff == -1) - pushSymbol(sym,vn,op); - else { - if (symboloff + vn->getSize() <= sym->getType()->getSize()) - pushPartialSymbol(sym,symboloff,vn->getSize(),vn,op,(Datatype *)0); - else - pushMismatchSymbol(sym,symboloff,vn->getSize(),vn,op); + if (symboloff == -1) { + if (!sym->getType()->needsResolution()) { + pushSymbol(sym,vn,op); + return; + } + symboloff = 0; } + if (symboloff + vn->getSize() <= sym->getType()->getSize()) { + int4 inslot = isRead ? op->getSlot(vn) : -1; + pushPartialSymbol(sym,symboloff,vn->getSize(),vn,op,inslot); + } + else + pushMismatchSymbol(sym,symboloff,vn->getSize(),vn,op); } } @@ -399,7 +389,7 @@ void PrintLanguage::emitAtom(const Atom &atom) emit->tagType(atom.name.c_str(),atom.highlight,atom.ptr_second.ct); break; case fieldtoken: - emit->tagField(atom.name.c_str(),atom.highlight,atom.ptr_second.ct,atom.offset); + emit->tagField(atom.name.c_str(),atom.highlight,atom.ptr_second.ct,atom.offset,atom.op); break; case blanktoken: break; // Print nothing @@ -519,17 +509,22 @@ void PrintLanguage::recurse(void) { uint4 modsave = mods; - int4 final = pending; // Already claimed + int4 lastPending = pending; // Already claimed pending = nodepend.size(); // Lay claim to the rest - while(final < pending) { + while(lastPending < pending) { const Varnode *vn = nodepend.back().vn; const PcodeOp *op = nodepend.back().op; mods = nodepend.back().vnmod; nodepend.pop_back(); pending -= 1; if (vn->isImplied()) { - const PcodeOp *defOp = vn->getDef(); - defOp->getOpcode()->push(this,defOp,op); + if (vn->hasImpliedField()) { + pushImpliedField(vn, op); + } + else { + const PcodeOp *defOp = vn->getDef(); + defOp->getOpcode()->push(this,defOp,op); + } } else pushVnExplicit(vn,op); @@ -554,8 +549,8 @@ void PrintLanguage::opBinary(const OpToken *tok,const PcodeOp *op) pushOp(tok,op); // Push on reverse polish notation // implied vn's pushed on in reverse order for efficiency // see PrintLanguage::pushVnImplied - pushVnImplied(op->getIn(1),op,mods); - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(1),op,mods); + pushVn(op->getIn(0),op,mods); } /// Push an operator onto the stack that has a normal unary format. @@ -568,7 +563,7 @@ void PrintLanguage::opUnary(const OpToken *tok,const PcodeOp *op) pushOp(tok,op); // implied vn's pushed on in reverse order for efficiency // see PrintLanguage::pushVnImplied - pushVnImplied(op->getIn(0),op,mods); + pushVn(op->getIn(0),op,mods); } void PrintLanguage::resetDefaultsInternal(void) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh index c1ccd02457..93c1de1d45 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh @@ -24,6 +24,7 @@ #include "prettyprint.hh" class PrintLanguage; +class ResolvedUnion; /// \brief Base class for high-level language capabilities /// @@ -220,8 +221,8 @@ public: : name(nm) { type = t; highlight = hl; ptr_second.ct = c; } /// \brief Construct a token for a field name - Atom(const string &nm,tagtype t,EmitXml::syntax_highlight hl,const Datatype *c,int4 off) - : name(nm) { type = t; highlight = hl; ptr_second.ct = c; offset = off; } + Atom(const string &nm,tagtype t,EmitXml::syntax_highlight hl,const Datatype *c,int4 off,const PcodeOp *o) + : name(nm) { type = t; highlight = hl; ptr_second.ct = c; offset = off; op = o; } /// \brief Construct a token with an associated PcodeOp Atom(const string &nm,tagtype t,EmitXml::syntax_highlight hl,const PcodeOp *o) @@ -268,9 +269,9 @@ protected: void unsetMod(uint4 m) { mods &= ~m; } ///< Deactivate the given printing modification void pushOp(const OpToken *tok,const PcodeOp *op); ///< Push an operator token onto the RPN stack void pushAtom(const Atom &atom); ///< Push a variable token onto the RPN stack - void pushVnImplied(const Varnode *vn,const PcodeOp *op,uint4 m); ///< Push an implied variable onto the RPN stack + void pushVn(const Varnode *vn,const PcodeOp *op,uint4 m); ///< Push an expression rooted at a Varnode onto the RPN stack void pushVnExplicit(const Varnode *vn,const PcodeOp *op); ///< Push an explicit variable onto the RPN stack - void pushVnLHS(const Varnode *vn,const PcodeOp *op); ///< Push a variable as the left-hand side of an expression + void pushSymbolDetail(const Varnode *vn,const PcodeOp *op,bool isRead); ///< Push symbol name with adornments matching given Varnode bool parentheses(const OpToken *op2); ///< Determine if the given token should be emitted in its own parenthetic expression void emitOp(const ReversePolish &entry); ///< Send an operator token from the RPN to the emitter @@ -328,8 +329,7 @@ protected: /// \param sym is the given Symbol /// \param vn is the Varnode holding the Symbol value /// \param op is a PcodeOp associated with the Varnode - virtual void pushSymbol(const Symbol *sym,const Varnode *vn, - const PcodeOp *op)=0; + virtual void pushSymbol(const Symbol *sym,const Varnode *vn,const PcodeOp *op)=0; /// \brief Push an address as a substitute for a Symbol onto the RPN stack /// @@ -338,8 +338,7 @@ protected: /// \param addr is the storage address /// \param vn is the Varnode representing the variable (if present) /// \param op is a PcodeOp associated with the variable - virtual void pushUnnamedLocation(const Address &addr, - const Varnode *vn,const PcodeOp *op)=0; + virtual void pushUnnamedLocation(const Address &addr,const Varnode *vn,const PcodeOp *op)=0; /// \brief Push a variable that represents only part of a symbol onto the RPN stack /// @@ -349,9 +348,9 @@ protected: /// \param sz is the number of bytes in the partial variable /// \param vn is the Varnode holding the partial value /// \param op is a PcodeOp associate with the Varnode - /// \param outtype is the data-type expected by expression using the partial variable + /// \param inslot is the input slot of \b vn with \b op, or -1 if \b op writes \b vn virtual void pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, - const Varnode *vn,const PcodeOp *op,Datatype *outtype)=0; + const Varnode *vn,const PcodeOp *op,int4 inslot)=0; /// \brief Push an identifier for a variable that mismatches with its Symbol /// @@ -365,6 +364,15 @@ protected: virtual void pushMismatchSymbol(const Symbol *sym,int4 off,int4 sz, const Varnode *vn,const PcodeOp *op)=0; + /// \brief Push the implied field of a given Varnode as an object member extraction operation + /// + /// If a Varnode is \e implied and has a \e union data-type, the particular read of the varnode + /// may correspond to a particular field that needs to get printed as a token, even though the + /// Varnode itself is printed directly. This method pushes the field name token. + /// \param vn is the given Varnode + /// \param op is the particular PcodeOp reading the Varnode + virtual void pushImpliedField(const Varnode *vn,const PcodeOp *op)=0; + virtual void emitLineComment(int4 indent,const Comment *comm); ///< Emit a comment line /// \brief Emit a variable declaration diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 4c5559a4ea..bb5f05e34a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -114,7 +114,7 @@ int4 RuleCollectTerms::applyOp(PcodeOp *op,Funcdata &data) termorder.sortTerms(); // Sort them based on termorder Varnode *vn1,*vn2; uintb coef1,coef2; - const vector &order( termorder.getSort() ); + const vector &order( termorder.getSort() ); int4 i=0; if (!order[0]->getVarnode()->isConstant()) { @@ -5661,8 +5661,9 @@ AddTreeState::AddTreeState(Funcdata &d,PcodeOp *op,int4 slot) : data(d) { baseOp = op; + baseSlot = slot; ptr = op->getIn(slot); - ct = (const TypePointer *)ptr->getType(); + ct = (const TypePointer *)ptr->getTypeReadFacing(op); ptrsize = ptr->getSize(); ptrmask = calc_mask(ptrsize); baseType = ct->getPtrTo(); @@ -6038,16 +6039,14 @@ Varnode *AddTreeState::buildMultiples(void) Varnode *AddTreeState::buildExtra(void) { - correct = (correct+offset) & ptrmask; // Total correction that needs to be made - bool offset_corrected= (correct==0); + correct = correct+offset; // Total correction that needs to be made Varnode *resNode = (Varnode *)0; for(int4 i=0;iisConstant())) - if (vn->getOffset() == correct) { - offset_corrected = true; - continue; - } + if (vn->isConstant()) { + correct -= vn->getOffset(); + continue; + } if (resNode == (Varnode *)0) resNode = vn; else { @@ -6055,7 +6054,8 @@ Varnode *AddTreeState::buildExtra(void) resNode = op->getOut(); } } - if (!offset_corrected) { + correct &= ptrmask; + if (correct != 0) { Varnode *vn = data.newConstant(ptrsize,uintb_negate(correct-1,ptrsize)); if (resNode == (Varnode *)0) resNode = vn; @@ -6077,7 +6077,7 @@ bool AddTreeState::buildDegenerate(void) // If the size is really less than scale, there is // probably some sort of padding going on return false; // Don't transform at all - if (baseOp->getOut()->getType()->getMetatype() != TYPE_PTR) // Make sure pointer propagates thru INT_ADD + if (baseOp->getOut()->getTypeDefFacing()->getMetatype() != TYPE_PTR) // Make sure pointer propagates thru INT_ADD return false; vector newparams; int4 slot = baseOp->getSlot(ptr); @@ -6152,6 +6152,7 @@ void AddTreeState::buildTree(void) // Create PTRADD portion of operation if (multNode != (Varnode *)0) { newop = data.newOpBefore(baseOp,CPUI_PTRADD,ptr,multNode,data.newConstant(ptrsize,size)); + data.inheritReadResolution(newop, 0, baseOp, baseSlot); multNode = newop->getOut(); } else @@ -6160,6 +6161,7 @@ void AddTreeState::buildTree(void) // Create PTRSUB portion of operation if (isSubtype) { newop = data.newOpBefore(baseOp,CPUI_PTRSUB,multNode,data.newConstant(ptrsize,offset)); + data.inheritReadResolution(newop, 0, baseOp, baseSlot); if (size != 0) newop->setStopPropagation(); multNode = newop->getOut(); @@ -6192,9 +6194,9 @@ bool RulePtrArith::verifyPreferredPointer(PcodeOp *op,int4 slot) PcodeOp *preOp = vn->getDef(); if (preOp->code() != CPUI_INT_ADD) return true; int preslot = 0; - if (preOp->getIn(preslot)->getType()->getMetatype() != TYPE_PTR) { + if (preOp->getIn(preslot)->getTypeReadFacing(preOp)->getMetatype() != TYPE_PTR) { preslot = 1; - if (preOp->getIn(preslot)->getType()->getMetatype() != TYPE_PTR) + if (preOp->getIn(preslot)->getTypeReadFacing(preOp)->getMetatype() != TYPE_PTR) return true; } return (1 != evaluatePointerExpression(preOp, preslot)); // Does earlier varnode look like the base pointer @@ -6220,7 +6222,7 @@ int4 RulePtrArith::evaluatePointerExpression(PcodeOp *op,int4 slot) Varnode *ptrBase = op->getIn(slot); if (ptrBase->isFree() && !ptrBase->isConstant()) return 0; - if (op->getIn(1 - slot)->getType()->getMetatype() == TYPE_PTR) + if (op->getIn(1 - slot)->getTypeReadFacing(op)->getMetatype() == TYPE_PTR) res = 2; Varnode *outVn = op->getOut(); list::const_iterator iter; @@ -6232,7 +6234,7 @@ int4 RulePtrArith::evaluatePointerExpression(PcodeOp *op,int4 slot) Varnode *otherVn = decOp->getIn(1 - decOp->getSlot(outVn)); if (otherVn->isFree() && !otherVn->isConstant()) return 0; // No action if the data-flow isn't fully linked - if (otherVn->getType()->getMetatype() == TYPE_PTR) + if (otherVn->getTypeReadFacing(decOp)->getMetatype() == TYPE_PTR) res = 2; // Do not push in the presence of other pointers } else if ((opc == CPUI_LOAD || opc == CPUI_STORE) && decOp->getIn(1) == outVn) { // If use is as pointer for LOAD or STORE @@ -6289,7 +6291,7 @@ int4 RulePtrArith::applyOp(PcodeOp *op,Funcdata &data) if (!data.isTypeRecoveryOn()) return 0; for(slot=0;slotnumInput();++slot) { // Search for pointer type - ct = op->getIn(slot)->getType(); + ct = op->getIn(slot)->getTypeReadFacing(op); if (ct->getMetatype() == TYPE_PTR) break; } if (slot == op->numInput()) return 0; @@ -6334,7 +6336,7 @@ int4 RuleStructOffset0::applyOp(PcodeOp *op,Funcdata &data) else return 0; - Datatype *ct = op->getIn(1)->getType(); + Datatype *ct = op->getIn(1)->getTypeReadFacing(op); if (ct->getMetatype() != TYPE_PTR) return 0; Datatype *baseType = ((TypePointer *)ct)->getPtrTo(); uintb offset = 0; @@ -6373,6 +6375,7 @@ int4 RuleStructOffset0::applyOp(PcodeOp *op,Funcdata &data) return 0; PcodeOp *newop = data.newOpBefore(op,CPUI_PTRSUB,op->getIn(1),data.newConstant(op->getIn(1)->getSize(),0)); + data.inheritReadResolution(newop, 0, op, 1); newop->setStopPropagation(); data.opSetInput(op,newop->getOut(),1); return 1; @@ -6474,7 +6477,7 @@ int4 RulePushPtr::applyOp(PcodeOp *op,Funcdata &data) if (!data.isTypeRecoveryOn()) return 0; for(slot=0;slotnumInput();++slot) { // Search for pointer type vni = op->getIn(slot); - if (vni->getType()->getMetatype() == TYPE_PTR) break; + if (vni->getTypeReadFacing(op)->getMetatype() == TYPE_PTR) break; } if (slot == op->numInput()) return 0; @@ -6538,7 +6541,7 @@ int4 RulePtraddUndo::applyOp(PcodeOp *op,Funcdata &data) if (!data.isTypeRecoveryOn()) return 0; int4 size = (int4)op->getIn(2)->getOffset(); // Size the PTRADD thinks we are pointing basevn = op->getIn(0); - tp = (TypePointer *)basevn->getType(); + tp = (TypePointer *)basevn->getTypeReadFacing(op); if (tp->getMetatype() == TYPE_PTR) // Make sure we are still a pointer if (tp->getPtrTo()->getSize()==AddrSpace::addressToByteInt(size,tp->getWordSize())) { // of the correct size Varnode *indVn = op->getIn(1); @@ -6568,7 +6571,7 @@ int4 RulePtrsubUndo::applyOp(PcodeOp *op,Funcdata &data) if (!data.isTypeRecoveryOn()) return 0; Varnode *basevn = op->getIn(0); - if (basevn->getType()->isPtrsubMatching(op->getIn(1)->getOffset())) + if (basevn->getTypeReadFacing(op)->isPtrsubMatching(op->getIn(1)->getOffset())) return 0; data.opSetOpcode(op,CPUI_INT_ADD); @@ -6613,7 +6616,7 @@ int4 RuleAddUnsigned::applyOp(PcodeOp *op,Funcdata &data) Varnode *constvn = op->getIn(1); if (!constvn->isConstant()) return 0; - Datatype *dt = constvn->getType(); + Datatype *dt = constvn->getTypeReadFacing(op); if (dt->getMetatype() != TYPE_UINT) return 0; if (dt->isCharPrint()) return 0; // Only change integer forms if (dt->isEnumType()) return 0; @@ -6661,6 +6664,8 @@ int4 Rule2Comp2Sub::applyOp(PcodeOp *op,Funcdata &data) /// \class RuleSubRight /// \brief Cleanup: Convert truncation to cast: `sub(V,c) => sub(V>>c*8,0)` /// +/// Before attempting the transform, check if the SUBPIECE is really extracting a field +/// from a structure. If so, mark the op as requiring special printing and return. /// If the lone descendant of the SUBPIECE is a INT_RIGHT or INT_SRIGHT, /// we lump that into the shift as well. void RuleSubRight::getOpList(vector &oplist) const @@ -6672,6 +6677,16 @@ void RuleSubRight::getOpList(vector &oplist) const int4 RuleSubRight::applyOp(PcodeOp *op,Funcdata &data) { + Datatype *parent; + int4 offset; + + if (op->doesSpecialPrinting()) + return 0; + if (TypeOpSubpiece::testExtraction(false, op, parent, offset) != (const TypeField *)0) { + data.opMarkSpecialPrint(op); // Print this as a field extraction + return 0; + } + int4 c = op->getIn(1)->getOffset(); if (c==0) return 0; // SUBPIECE is not least sig Varnode *a = op->getIn(0); @@ -6764,13 +6779,14 @@ int4 RulePtrsubCharConstant::applyOp(PcodeOp *op,Funcdata &data) { Varnode *sb = op->getIn(0); - if (sb->getType()->getMetatype() != TYPE_PTR) return 0; - TypeSpacebase *sbtype = (TypeSpacebase *)((TypePointer *)sb->getType())->getPtrTo(); + Datatype *sbType = sb->getTypeReadFacing(op); + if (sbType->getMetatype() != TYPE_PTR) return 0; + TypeSpacebase *sbtype = (TypeSpacebase *)((TypePointer *)sbType)->getPtrTo(); if (sbtype->getMetatype() != TYPE_SPACEBASE) return 0; Varnode *vn1 = op->getIn(1); if (!vn1->isConstant()) return 0; Varnode *outvn = op->getOut(); - TypePointer *outtype = (TypePointer *)outvn->getType(); + TypePointer *outtype = (TypePointer *)outvn->getTypeDefFacing(); if (outtype->getMetatype() != TYPE_PTR) return 0; Datatype *basetype = outtype->getPtrTo(); if (!basetype->isCharPrint()) return 0; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh index d62e62df07..17ede9725a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh @@ -48,6 +48,7 @@ class AddTreeState { const TypePointerRel *pRelType; ///< A copy of \b ct, if it is a relative pointer int4 ptrsize; ///< Size of the pointer int4 size; ///< Size of data-type being pointed to (in address units) or 0 for open ended pointer + int4 baseSlot; ///< Slot of the ADD tree base that is holding the pointer uintb ptrmask; ///< Mask for modulo calculations in ptr space uintb offset; ///< Number of bytes we dig into the base data-type uintb correct; ///< Number of bytes being double counted diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/space.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/space.hh index 23eb035417..86e1931139 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/space.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/space.hh @@ -178,8 +178,8 @@ public: virtual void printRaw(ostream &s,uintb offset) const; virtual void saveXml(ostream &s) const; virtual void restoreXml(const Element *el); - static const string NAME; // Reserved name for the address space - static const int4 INDEX; // Reserved index for constant space + static const string NAME; ///< Reserved name for the address space + static const int4 INDEX; ///< Reserved index for constant space }; /// \brief Special AddrSpace for special/user-defined address spaces @@ -189,8 +189,8 @@ public: OtherSpace(AddrSpaceManager *m, const Translate *t); ///< For use with restoreXml virtual void printRaw(ostream &s, uintb offset) const; virtual void saveXml(ostream &s) const; - static const string NAME; // Reserved name for the address space - static const int4 INDEX; // Reserved index for the other space + static const string NAME; ///< Reserved name for the address space + static const int4 INDEX; ///< Reserved index for the other space }; /// \brief The pool of temporary storage registers diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index 0f9e82ecf1..acb8290aad 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -18,8 +18,8 @@ /// The base propagation ordering associated with each meta-type. /// The array elements correspond to the ordering of #type_metatype. -sub_metatype Datatype::base2sub[13] = { - SUB_STRUCT, SUB_PARTIALSTRUCT, SUB_ARRAY, SUB_PTRREL, SUB_PTR, SUB_FLOAT, SUB_CODE, SUB_BOOL, +sub_metatype Datatype::base2sub[14] = { + SUB_UNION, SUB_STRUCT, SUB_PARTIALSTRUCT, SUB_ARRAY, SUB_PTRREL, SUB_PTR, SUB_FLOAT, SUB_CODE, SUB_BOOL, SUB_UINT_PLAIN, SUB_INT_PLAIN, SUB_UNKNOWN, SUB_SPACEBASE, SUB_VOID }; @@ -190,6 +190,9 @@ void metatype2string(type_metatype metatype,string &res) case TYPE_STRUCT: res = "struct"; break; + case TYPE_UNION: + res = "union"; + break; case TYPE_SPACEBASE: res = "spacebase"; break; @@ -246,6 +249,8 @@ type_metatype string2metatype(const string &metastring) return TYPE_UNKNOWN; else if (metastring=="uint") return TYPE_UINT; + else if (metastring=="union") + return TYPE_UNION; break; case 'i': if (metastring == "int") @@ -370,6 +375,41 @@ Datatype *Datatype::getStripped(void) const return (Datatype *)0; } +/// For certain data-types, particularly \e union, variables of that data-type are transformed into a subtype +/// depending on the particular use. Each read or write of the variable may use a different subtype. +/// This method returns the particular subtype required based on a specific PcodeOp. A slot index >=0 +/// indicates which operand \e reads the variable, or if the index is -1, the variable is \e written. +/// \param op is the specific PcodeOp +/// \param slot indicates the input operand, or the output +/// \return the resolved sub-type +Datatype *Datatype::resolveInFlow(PcodeOp *op,int4 slot) + +{ + return this; +} + +/// This is the constant version of resolveInFlow. If a resulting subtype has already been calculated, +/// for the particular read (\b slot >= 0) or write (\b slot == -1), then return it. +/// Otherwise return the original data-type. +/// \param op is the PcodeOp using the Varnode assigned with \b this data-type +/// \param slot is the slot reading or writing the Varnode +/// \return the resolved subtype or the original data-type +Datatype* Datatype::findResolve(const PcodeOp *op,int4 slot) + +{ + return this; +} + +/// If \b this data-type has an alternate data-type form that matches the given data-type, +/// return an index indicating this form, otherwise return -1. +/// \param ct is the given data-type +/// \return the index of the matching form or -1 +int4 Datatype::findCompatibleResolve(Datatype *ct) const + +{ + return -1; +} + /// Restore the basic properties (name,size,id) of a data-type from an XML element /// Properties are read from the attributes of the element /// \param el is the XML element @@ -449,6 +489,53 @@ uint8 Datatype::hashSize(uint8 id,int4 size) return id; } +/// Contruct from the given \ element. +/// \param el is the element +/// \param typegrp is the TypeFactory for parsing data-type info +TypeField::TypeField(const Element *el,TypeFactory &typegrp) + +{ + ident = -1; + offset = -1; + for(int4 i=0;igetNumAttributes();++i) { + const string &attribName(el->getAttributeName(i)); + if (attribName == "name") + name = el->getAttributeValue(i); + else if (attribName == "offset") { + istringstream j(el->getAttributeValue(i)); + j.unsetf(ios::dec | ios::hex | ios::oct); + j >> offset; + } + else if (attribName == "id") { + istringstream j(el->getAttributeValue(i)); + j.unsetf(ios::dec | ios::hex | ios::oct); + j >> ident; + } + } + type = typegrp.restoreXmlType( *el->getChildren().begin() ); + if (name.size()==0) + throw LowlevelError("name attribute must not be empty in tag"); + if (offset < 0) + throw LowlevelError("offset attribute invalid for tag"); + if (ident < 0) + ident = offset; // By default the id is the offset +} + +/// Write out a formal description of \b this as an XML \ tag. +/// \param s is the stream to write to +void TypeField::saveXml(ostream &s) const + +{ + s << "'; + type->saveXmlRef(s); + s << "\n"; +} + /// Parse a \ tag for attributes of the character data-type /// \param el is the root XML element /// \param typegrp is the factory owning \b this data-type @@ -591,20 +678,26 @@ void TypePointer::restoreXml(const Element *el,TypeFactory &typegrp) } ptrto = typegrp.restoreXmlType( *el->getChildren().begin() ); calcSubmeta(); - if (name.size() == 0) // Inherit only coretype only if no name - flags = ptrto->getInheritable(); + if (name.size() == 0) // Inherit only if no name + flags |= ptrto->getInheritable(); } /// Pointers to structures may require a specific \b submeta void TypePointer::calcSubmeta(void) { - if (ptrto->getMetatype() == TYPE_STRUCT) { + type_metatype ptrtoMeta = ptrto->getMetatype(); + if (ptrtoMeta == TYPE_STRUCT) { if (ptrto->numDepend() > 1 || ptrto->isIncomplete()) submeta = SUB_PTR_STRUCT; else submeta = SUB_PTR; } + else if (ptrtoMeta == TYPE_UNION) { + submeta = SUB_PTR_STRUCT; + } + if (ptrto->needsResolution() && ptrtoMeta != TYPE_PTR) + flags |= needs_resolution; // Inherit needs_resolution, but only if not a pointer } /// \brief Find a sub-type pointer given an offset into \b this @@ -665,17 +758,49 @@ bool TypePointer::isPtrsubMatching(uintb off) const if (newoff != 0) return false; } - else { + else if (ptrto->getMetatype() == TYPE_ARRAY || ptrto->getMetatype() == TYPE_STRUCT) { int4 sz = off; int4 typesize = ptrto->getSize(); - if ((ptrto->getMetatype() != TYPE_ARRAY)&&(ptrto->getMetatype() != TYPE_STRUCT)) - return false; // Not a pointer to a structured type - else if ((typesize <= AddrSpace::addressToByteInt(sz,wordsize))&&(typesize!=0)) + if ((typesize <= AddrSpace::addressToByteInt(sz,wordsize))&&(typesize!=0)) return false; } + else if (ptrto->getMetatype() == TYPE_UNION) { + // A PTRSUB reaching here cannot be used for a union field resolution + // These are created by ActionSetCasts::resolveUnion + return false; // So we always return false + } + else + return false; // Not a pointer to a structured data-type return true; } +Datatype *TypePointer::resolveInFlow(PcodeOp *op,int4 slot) + +{ + if (ptrto->getMetatype() == TYPE_UNION) { + Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this,op,slot); + if (res != (ResolvedUnion*)0) + return res->getDatatype(); + ScoreUnionFields scoreFields(*fd->getArch()->types,this,op,slot); + fd->setUnionField(this,op,slot,scoreFields.getResult()); + return scoreFields.getResult().getDatatype(); + } + return this; +} + +Datatype* TypePointer::findResolve(const PcodeOp *op,int4 slot) + +{ + if (ptrto->getMetatype() == TYPE_UNION) { + const Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this,op,slot); + if (res != (ResolvedUnion*)0) + return res->getDatatype(); + } + return this; +} + void TypeArray::printRaw(ostream &s) const { @@ -747,6 +872,34 @@ void TypeArray::saveXml(ostream &s) const s << ""; } +Datatype *TypeArray::resolveInFlow(PcodeOp *op,int4 slot) + +{ + // This is currently only called if the array size is 1 + // in which case this should always resolve to the element data-type + return arrayof; +} + +Datatype* TypeArray::findResolve(const PcodeOp *op,int4 slot) + +{ + // This is currently only called if the array size is 1 + // in which case this should always resolve to the element data-type + return arrayof; +} + +int4 TypeArray::findCompatibleResolve(Datatype *ct) const + +{ + if (ct->needsResolution() && !arrayof->needsResolution()) { + if (ct->findCompatibleResolve(arrayof) >= 0) + return 0; + } + if (arrayof == ct) + return 0; + return -1; +} + /// Parse a \ tag with a child describing the array element data-type. /// \param el is the root XML element /// \param typegrp is the factory owning \b this data-type @@ -761,6 +914,8 @@ void TypeArray::restoreXml(const Element *el,TypeFactory &typegrp) arrayof = typegrp.restoreXmlType(*el->getChildren().begin()); if ((arraysize<=0)||(arraysize*arrayof->getSize()!=size)) throw LowlevelError("Bad size for array of type "+arrayof->getName()); + if (arraysize == 1) + flags |= needs_resolution; // Array of size 1 needs special treatment } TypeEnum::TypeEnum(const TypeEnum &op) : TypeBase(op) @@ -978,6 +1133,10 @@ void TypeStruct::setFields(const vector &fd) if (end > size) size = end; } + if (field.size() == 1) { // A single field + if (field[0].type->getSize() == size) // that fills the whole structure + flags |= needs_resolution; // needs special attention + } } /// Find the proper subfield given an offset. Return the index of that field @@ -1034,7 +1193,7 @@ int4 TypeStruct::getLowerBoundField(int4 off) const /// \param sz is the size of the byte range /// \param newoff points to the renormalized offset to pass back /// \return the containing field or NULL if the range is not contained -const TypeField *TypeStruct::getField(int4 off,int4 sz,int4 *newoff) const +const TypeField *TypeStruct::resolveTruncation(int4 off,int4 sz,int4 *newoff) const { int4 i; @@ -1196,12 +1355,7 @@ void TypeStruct::saveXml(ostream &s) const s << ">\n"; vector::const_iterator iter; for(iter=field.begin();iter!=field.end();++iter) { - s << "'; - (*iter).type->saveXmlRef(s); - s << "\n"; + (*iter).saveXml(s); } s << ""; } @@ -1216,27 +1370,365 @@ void TypeStruct::restoreFields(const Element *el,TypeFactory &typegrp) List::const_iterator iter; int4 maxoffset = 0; for(iter=list.begin();iter!=list.end();++iter) { - field.push_back( TypeField() ); - field.back().name = (*iter)->getAttributeValue("name"); - istringstream j((*iter)->getAttributeValue("offset")); - j.unsetf(ios::dec | ios::hex | ios::oct); - j >> field.back().offset; - field.back().type = typegrp.restoreXmlType( *(*iter)->getChildren().begin() ); + field.emplace_back(*iter,typegrp); int4 trialmax = field.back().offset + field.back().type->getSize(); if (trialmax > maxoffset) maxoffset = trialmax; - if (field.back().name.size()==0) { + if (maxoffset > size) { ostringstream s; - s << "unlabelled" << dec << field.back().offset; - field.back().name = s.str(); + s << "Field " << field.back().name << " does not fit in structure " + name; + throw LowlevelError(s.str()); } } - if (maxoffset > size) - throw LowlevelError("Size too small for fields of structure "+name); if (size == 0) // We can restore an incomplete structure, indicated by 0 size flags |= type_incomplete; else markComplete(); // Otherwise the structure is complete + if (field.size() == 1) { // A single field + if (field[0].type->getSize() == size) // that fills the whole structure + flags |= needs_resolution; // needs special resolution + } +} + +/// We know if this method is called that \b this structure has a single field that fills the entire +/// structure. The indicated Varnode can either be referred either by naming the struture or naming +/// the field. This method returns an indication of the best fit: either 0 for the field or +/// -1 for the structure. +/// \param op is the given PcodeOp using the Varnode +/// \param slot is -1 if the Varnode is an output or >=0 indicating the input slot +/// \return either 0 to indicate the field or -1 to indicate the structure +int4 TypeStruct::scoreFill(PcodeOp *op,int4 slot) const + +{ + if (op->code() == CPUI_COPY || op->code() == CPUI_INDIRECT) { + Varnode *vn; + if (slot == 0) + vn = op->getOut(); + else + vn = op->getIn(0); + if (vn->isTypeLock() && vn->getType() == this) + return -1; // COPY of the structure directly, use whole structure + } + else if ((op->code() == CPUI_LOAD && slot == -1)||(op->code() == CPUI_STORE && slot == 2)) { + Varnode *vn = op->getIn(1); + if (vn->isTypeLock()) { + Datatype *ct = vn->getTypeReadFacing(op); + if (ct->getMetatype() == TYPE_PTR && ((TypePointer *)ct)->getPtrTo() == this) + return -1; // LOAD or STORE of the structure directly, use whole structure + } + } + else if (op->isCall()) { + Funcdata *fd = op->getParent()->getFuncdata(); + FuncCallSpecs *fc = fd->getCallSpecs(op); + if (fc != (FuncCallSpecs *)0) { + ProtoParameter *param = (ProtoParameter *)0; + if (slot >= 1 && fc->isInputLocked()) + param = fc->getParam(slot-1); + else if (slot < 0 && fc->isOutputLocked()) + param = fc->getOutput(); + if (param != (ProtoParameter *)0 && param->getType() == this) + return -1; // Function signature refers to structure directly, use whole structure + } + } + return 0; // In all other cases refer to the field +} + +Datatype *TypeStruct::resolveInFlow(PcodeOp *op,int4 slot) + +{ + Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this, op, slot); + if (res != (ResolvedUnion *)0) + return res->getDatatype(); + + int4 fieldNum = scoreFill(op,slot); + + ResolvedUnion compFill(this,fieldNum,*fd->getArch()->types); + fd->setUnionField(this, op, slot, compFill); + return compFill.getDatatype(); +} + +Datatype *TypeStruct::findResolve(const PcodeOp *op,int4 slot) + +{ + const Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this, op, slot); + if (res != (ResolvedUnion *)0) + return res->getDatatype(); + return field[0].type; // If not calculated before, assume referring to field +} + +int4 TypeStruct::findCompatibleResolve(Datatype *ct) const + +{ + Datatype *fieldType = field[0].type; + if (ct->needsResolution() && !fieldType->needsResolution()) { + if (ct->findCompatibleResolve(fieldType) >= 0) + return 0; + } + if (fieldType == ct) + return 0; + return -1; +} + +/// Assign an offset to fields in order so that each field starts at an aligned offset within the structure +/// \param list is the list of fields +/// \param align is the given alignment +void TypeStruct::assignFieldOffsets(vector &list,int4 align) + +{ + int4 offset = 0; + vector::iterator iter; + for(iter=list.begin();iter!=list.end();++iter) { + if ((*iter).offset != -1) continue; + int4 cursize = (*iter).type->getSize(); + int4 curalign = 0; + if (align > 1) { + curalign = align; + while((curalign>>1) >= cursize) + curalign >>= 1; + curalign -= 1; + } + if ((offset & curalign)!=0) + offset = (offset-(offset & curalign) + (curalign+1)); + (*iter).offset = offset; + (*iter).ident = offset; + offset += cursize; + } +} + +/// Copy a list of fields into this union, establishing its size. +/// Should only be called once when constructing the type. TypeField \b offset is assumed to be 0. +/// \param fd is the list of fields to copy in +void TypeUnion::setFields(const vector &fd) + +{ + vector::const_iterator iter; + // Need to calculate size + size = 0; + for(iter=fd.begin();iter!=fd.end();++iter) { + field.push_back(*iter); + int4 end = field.back().type->getSize(); + if (end > size) + size = end; + } +} + +/// Children of the XML element describe each field. +/// \param el is the root union element +/// \param typegrp is the factory owning the new union +void TypeUnion::restoreFields(const Element *el,TypeFactory &typegrp) + +{ + const List &list(el->getChildren()); + List::const_iterator iter; + for(iter=list.begin();iter!=list.end();++iter) { + field.emplace_back(*iter,typegrp); + if (field.back().offset + field.back().type->getSize() > size) { + ostringstream s; + s << "Field " << field.back().name << " does not fit in union " << name; + throw LowlevelError(s.str()); + } + } + if (size == 0) // We can restore an incomplete structure, indicated by 0 size + flags |= type_incomplete; + else + markComplete(); // Otherwise the union is complete +} + +TypeUnion::TypeUnion(const TypeUnion &op) + : Datatype(op) +{ + setFields(op.field); + size = op.size; // setFields might have changed the size +} + +int4 TypeUnion::compare(const Datatype &op,int4 level) const + +{ + int4 res = Datatype::compare(op,level); + if (res != 0) return res; + const TypeUnion *tu = (const TypeUnion *)&op; + vector::const_iterator iter1,iter2; + + if (field.size() != tu->field.size()) return (tu->field.size()-field.size()); + iter1 = field.begin(); + iter2 = tu->field.begin(); + // Test only the name and first level metatype first + while(iter1 != field.end()) { + if ((*iter1).name != (*iter2).name) + return ((*iter1).name < (*iter2).name) ? -1:1; + if ((*iter1).type->getMetatype() != (*iter2).type->getMetatype()) + return ((*iter1).type->getMetatype() < (*iter2).type->getMetatype()) ? -1 : 1; + ++iter1; + ++iter2; + } + level -= 1; + if (level < 0) { + if (id == op.getId()) return 0; + return (id < op.getId()) ? -1 : 1; + } + // If we are still equal, now go down deep into each field type + iter1 = field.begin(); + iter2 = tu->field.begin(); + while(iter1 != field.end()) { + if ((*iter1).type != (*iter2).type) { // Short circuit recursive loops + int4 c = (*iter1).type->compare( *(*iter2).type, level ); + if (c != 0) return c; + } + ++iter1; + ++iter2; + } + return 0; +} + +int4 TypeUnion::compareDependency(const Datatype &op) const + +{ + int4 res = Datatype::compareDependency(op); + if (res != 0) return res; + const TypeUnion *tu = (const TypeUnion *)&op; + vector::const_iterator iter1,iter2; + + if (field.size() != tu->field.size()) return (tu->field.size()-field.size()); + iter1 = field.begin(); + iter2 = tu->field.begin(); + // Test only the name and first level metatype first + while(iter1 != field.end()) { + if ((*iter1).name != (*iter2).name) + return ((*iter1).name < (*iter2).name) ? -1:1; + Datatype *fld1 = (*iter1).type; + Datatype *fld2 = (*iter2).type; + if (fld1 != fld2) + return (fld1 < fld2) ? -1 : 1; // compare the pointers directly + ++iter1; + ++iter2; + } + return 0; +} + +void TypeUnion::saveXml(ostream &s) const + +{ + if (typedefImm != (Datatype *)0) { + saveXmlTypedef(s); + return; + } + s << "\n"; + vector::const_iterator iter; + for(iter=field.begin();iter!=field.end();++iter) { + (*iter).saveXml(s); + } + s << ""; +} + +Datatype *TypeUnion::resolveInFlow(PcodeOp *op,int4 slot) + +{ + Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this, op, slot); + if (res != (ResolvedUnion *)0) + return res->getDatatype(); + ScoreUnionFields scoreFields(*fd->getArch()->types,this,op,slot); + fd->setUnionField(this, op, slot, scoreFields.getResult()); + return scoreFields.getResult().getDatatype(); +} + +Datatype* TypeUnion::findResolve(const PcodeOp *op,int4 slot) + +{ + const Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this, op, slot); + if (res != (ResolvedUnion *)0) + return res->getDatatype(); + return this; +} + +/// \brief Resolve which union field is being used for a given PcodeOp when a truncation is involved +/// +/// This is used either when a Varnode is backed by a larger Symbol with a union data-type, +/// or if the Varnode is produced by a CPUI_SUBPIECE where the input Varnode has a union data-type. +/// Scoring is done to compute the best field and the result is cached with the function. +/// The record of the best field is returned or null if there is no appropriate field +/// \param offset is the byte offset into the union we are truncating to +/// \param op is either the PcodeOp reading the truncated Varnode or the CPUI_SUBPIECE doing the truncation +/// \param slot is either the input slot of the reading PcodeOp or the artificial SUBPIECE slot: 1 +/// \param newoff is used to pass back how much offset is left to resolve +/// \return the field of the union best associated with the truncation or null +const TypeField *TypeUnion::resolveTruncation(int4 offset,PcodeOp *op,int4 slot,int4 &newoff) + +{ + Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this, op, slot); + if (res != (ResolvedUnion *)0 && res->getFieldNum() >= 0) { + const TypeField *field = getField(res->getFieldNum()); + newoff = offset - field->offset; + return field; + } + if (op->code() == CPUI_SUBPIECE && slot == 1) { // The slot is artificial in this case + ScoreUnionFields scoreFields(*fd->getArch()->types,this,offset,op); + fd->setUnionField(this, op, slot, scoreFields.getResult()); + if (scoreFields.getResult().getFieldNum() >= 0) { + newoff = 0; + return getField(scoreFields.getResult().getFieldNum()); + } + } + else { + ScoreUnionFields scoreFields(*fd->getArch()->types,this,offset,op,slot); + fd->setUnionField(this, op, slot, scoreFields.getResult()); + if (scoreFields.getResult().getFieldNum() >= 0) { + const TypeField *field = getField(scoreFields.getResult().getFieldNum()); + newoff = offset - field->offset; + return field; + } + } + return (const TypeField *)0; +} + +/// \brief Return a precalculated field associated with a truncation +/// +/// This is the \e const version of resolveTruncation(). No new scoring is done, but if a cached result +/// is available, return it. +/// \param offset is the byte offset of the truncation +/// \param op is the PcodeOp reading the truncated value +/// \param slot is the input slot being read +/// \param newoff is used to pass back any remaining offset into the field which still must be resolved +/// \return the field to use with truncation or null if there is no appropriate field +const TypeField *TypeUnion::findTruncation(int4 offset,const PcodeOp *op,int4 slot,int4 &newoff) const + +{ + const Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(this, op, slot); + if (res != (ResolvedUnion *)0 && res->getFieldNum() >= 0) { + const TypeField *field = getField(res->getFieldNum()); + newoff = offset - field->offset; + return field; + } + return (const TypeField *)0; +} + +int4 TypeUnion::findCompatibleResolve(Datatype *ct) const + +{ + if (!ct->needsResolution()) { + for(int4 i=0;igetSize() != ct->getSize()) continue; + if (fieldType->needsResolution()) continue; + if (ct->findCompatibleResolve(fieldType) >= 0) + return i; + } + } + return -1; } /// Parse a \ tag with children describing the data-type being pointed to and the parent data-type. @@ -1271,9 +1763,9 @@ void TypePointerRel::restoreXml(const Element *el,TypeFactory &typegrp) cacheStrippedType(typegrp); // it is considered ephemeral } -/// For a variable that is a relative pointer, constant offsets off of the variable can be +/// For a variable that is a relative pointer, constant offsets relative to the variable can be /// displayed either as coming from the variable itself or from the parent object. -/// \param byteOff is the given offset off of the variable +/// \param addrOff is the given offset in address units /// \return \b true if the variable should be displayed as coming from the parent bool TypePointerRel::evaluateThruParent(uintb addrOff) const @@ -2090,11 +2582,9 @@ Datatype *TypeFactory::setName(Datatype *ct,const string &n) bool TypeFactory::setFields(vector &fd,TypeStruct *ot,int4 fixedsize,uint4 flags) { - int4 offset,cursize,curalign; - if (!ot->isIncomplete()) throw LowlevelError("Can only set fields on an incomplete structure"); - offset = 0; + int4 offset = 0; vector::iterator iter; // Find the maximum offset, from the explicitly set offsets @@ -2111,23 +2601,6 @@ bool TypeFactory::setFields(vector &fd,TypeStruct *ot,int4 fixedsize, } } - // Assign offsets, respecting alignment, where not explicitly set - for(iter=fd.begin();iter!=fd.end();++iter) { - if ((*iter).offset != -1) continue; - cursize = (*iter).type->getSize(); - curalign = 0; - if (align > 1) { - curalign = align; - while((curalign>>1) >= cursize) - curalign >>= 1; - curalign -= 1; - } - if ((offset & curalign)!=0) - offset = (offset-(offset & curalign) + (curalign+1)); - (*iter).offset = offset; - offset += cursize; - } - sort(fd.begin(),fd.end()); // Sort fields by offset // We could check field overlapping here @@ -2148,6 +2621,42 @@ bool TypeFactory::setFields(vector &fd,TypeStruct *ot,int4 fixedsize, return true; } +/// If \b fixedsize is greater than 0, force the final structure to have that size. +/// This method should only be used on an incomplete union. It will mark the union as complete. +/// \param fd is the list of fields to set +/// \param ot is the TypeUnion object to modify +/// \param fixedsize is 0 or the forced size of the union +/// \param flags are other flags to set on the union +/// \return true if modification was successful +bool TypeFactory::setFields(vector &fd,TypeUnion *ot,int4 fixedsize,uint4 flags) + +{ + if (!ot->isIncomplete()) + throw LowlevelError("Can only set fields on an incomplete union"); + vector::iterator iter; + + for(iter=fd.begin();iter!=fd.end();++iter) { + Datatype *ct = (*iter).type; + // Do some sanity checks on the field + if (ct->getMetatype() == TYPE_VOID) return false; + if ((*iter).offset != 0) return false; + if ((*iter).name.size() == 0) return false; + } + + tree.erase(ot); + ot->setFields(fd); + ot->flags &= ~(uint4)Datatype::type_incomplete; + ot->flags |= (flags & (Datatype::variable_length | Datatype::type_incomplete)); + if (fixedsize > 0) { // If the caller is trying to force a size + if (fixedsize > ot->size) // If the forced size is bigger than the size required for fields + ot->size = fixedsize; // Force the bigger size + else if (fixedsize < ot->size) // If the forced size is smaller, this is an error + throw LowlevelError("Trying to force too small a size on "+ot->getName()); + } + tree.insert(ot); + return true; +} + /// The given prototype is copied into the given code data-type /// This method should only be used on an incomplete TypeCode. It will mark the TypeCode as complete. /// \param fp is the given prototype to copy @@ -2519,14 +3028,24 @@ TypeArray *TypeFactory::getTypeArray(int4 as,Datatype *ao) TypeStruct *TypeFactory::getTypeStruct(const string &n) { - // We should probably strip offsets here - // But I am currently choosing not to TypeStruct tmp; tmp.name = n; tmp.id = Datatype::hashName(n); return (TypeStruct *) findAdd(tmp); } +/// The created union will be incomplete and have no fields. They must be added later. +/// \param n is the name of the union +/// \return the TypeUnion object +TypeUnion *TypeFactory::getTypeUnion(const string &n) + +{ + TypeUnion tmp; + tmp.name = n; + tmp.id = Datatype::hashName(n); + return (TypeUnion *) findAdd(tmp); +} + /// The created enumeration will have no named values and a default configuration /// Named values must be added later. /// \param n is the name of the enumeration @@ -2711,7 +3230,7 @@ void TypeFactory::saveXml(ostream &s) const if ((*iter)->isCoreType()) { // If this would be saved as a coretype type_metatype meta = (*iter)->getMetatype(); if ((meta != TYPE_PTR)&&(meta != TYPE_ARRAY)&& - (meta != TYPE_STRUCT)) + (meta != TYPE_STRUCT)&&(meta != TYPE_UNION)) continue; // Don't save it here } s << ' '; @@ -2736,7 +3255,7 @@ void TypeFactory::saveXmlCoreTypes(ostream &s) const if (!ct->isCoreType()) continue; type_metatype meta = ct->getMetatype(); if ((meta==TYPE_PTR)||(meta==TYPE_ARRAY)|| - (meta==TYPE_STRUCT)) + (meta==TYPE_STRUCT)||(meta==TYPE_UNION)) continue; s << ' '; ct->saveXml(s); @@ -2752,24 +3271,44 @@ void TypeFactory::saveXmlCoreTypes(ostream &s) const Datatype *TypeFactory::restoreTypedef(const Element *el) { - uint8 id; - istringstream s1(el->getAttributeValue("id")); - s1.unsetf(ios::dec | ios::hex | ios::oct); - s1 >> id; - string nm = el->getAttributeValue("name"); + uint8 id = 0; + string nm; + for(int4 i=0;igetNumAttributes();++i) { + const string &attribName(el->getAttributeName(i)); + if (attribName == "id") { + istringstream s1(el->getAttributeValue("id")); + s1.unsetf(ios::dec | ios::hex | ios::oct); + s1 >> id; + } + else if (attribName == "name") { + nm = el->getAttributeValue("name"); + } + } + if (id == 0) { // Its possible the typedef is a builtin + id = Datatype::hashName(nm); // There must be some kind of id + } Datatype *defedType = restoreXmlType( *el->getChildren().begin() ); if (defedType->isVariableLength()) id = Datatype::hashSize(id, defedType->size); - if (defedType->getMetatype() == TYPE_STRUCT) { - // Its possible that a typedef of a struct is recursively defined, in which case + if (defedType->getMetatype() == TYPE_STRUCT || defedType->getMetatype() == TYPE_UNION) { + // Its possible that a typedef of a struct/union is recursively defined, in which case // an incomplete version may already be in the container - TypeStruct *prev = (TypeStruct *)findByIdLocal(nm, id); + Datatype *prev = findByIdLocal(nm, id); if (prev != (Datatype *)0) { if (defedType != prev->getTypedef()) throw LowlevelError("Trying to create typedef of existing type: " + prev->name); - TypeStruct *defedStruct = (TypeStruct *)defedType; - if (prev->field.size() != defedStruct->field.size()) - prev->field = defedStruct->field; + if (prev->getMetatype() == TYPE_STRUCT) { + TypeStruct *prevStruct = (TypeStruct *)prev; + TypeStruct *defedStruct = (TypeStruct *)defedType; + if (prevStruct->field.size() != defedStruct->field.size()) + setFields(defedStruct->field,prevStruct,defedStruct->size,defedStruct->flags); + } + else { + TypeUnion *prevUnion = (TypeUnion *)prev; + TypeUnion *defedUnion = (TypeUnion *)defedType; + if (prevUnion->field.size() != defedUnion->field.size()) + setFields(defedUnion->field,prevUnion,defedUnion->size,defedUnion->flags); + } return prev; } } @@ -2805,6 +3344,35 @@ Datatype* TypeFactory::restoreStruct(const Element *el,bool forcecore) return ct; } +/// If necessary create a stub object before parsing the field descriptions, to deal with recursive definitions +/// \param el is the XML element describing the union +/// \param forcecore is \b true if the data-type is considered core +/// \return the newly minted union data-type +Datatype* TypeFactory::restoreUnion(const Element *el,bool forcecore) + +{ + TypeUnion tu; + tu.restoreXmlBasic(el); + if (forcecore) + tu.flags |= Datatype::coretype; + Datatype *ct = findByIdLocal(tu.name,tu.id); + if (ct == (Datatype*)0) { + ct = findAdd(tu); // Create stub to allow recursive definitions + } + else if (ct->getMetatype() != TYPE_UNION) + throw LowlevelError("Trying to redefine type: " + tu.name); + tu.restoreFields(el,*this); + if (!ct->isIncomplete()) { // Structure of this name was already present + if (0 != ct->compareDependency(tu)) + throw LowlevelError("Redefinition of union: " + tu.name); + } + else { // If structure is a placeholder stub + if (!setFields(tu.field,(TypeUnion*)ct,tu.size,tu.flags)) // Define structure now by copying fields + throw LowlevelError("Bad union definition"); + } + return ct; +} + /// If necessary create a stub object before parsing the prototype description, to deal with recursive definitions /// \param el is the XML element describing the code object /// \param isConstructor is \b true if any prototype should be treated as a constructor @@ -2886,6 +3454,9 @@ Datatype *TypeFactory::restoreXmlTypeNoRef(const Element *el,bool forcecore) case TYPE_STRUCT: ct = restoreStruct(el,forcecore); break; + case TYPE_UNION: + ct = restoreUnion(el,forcecore); + break; case TYPE_SPACEBASE: { TypeSpacebase tsb((AddrSpace *)0,Address(),glb); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index 42892615d4..187bd3a3a3 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -29,46 +29,48 @@ extern void print_data(ostream &s,uint1 *buffer,int4 size,const Address &baseadd /// The core meta-types supported by the decompiler. These are sizeless templates /// for the elements making up the type algebra. enum type_metatype { - TYPE_VOID = 12, ///< Standard "void" type, absence of type - TYPE_SPACEBASE = 11, ///< Placeholder for symbol/type look-up calculations - TYPE_UNKNOWN = 10, ///< An unknown low-level type. Treated as an unsigned integer. - TYPE_INT = 9, ///< Signed integer. Signed is considered less specific than unsigned in C - TYPE_UINT = 8, ///< Unsigned integer - TYPE_BOOL = 7, ///< Boolean - TYPE_CODE = 6, ///< Data is actual executable code - TYPE_FLOAT = 5, ///< Floating-point + TYPE_VOID = 13, ///< Standard "void" type, absence of type + TYPE_SPACEBASE = 12, ///< Placeholder for symbol/type look-up calculations + TYPE_UNKNOWN = 11, ///< An unknown low-level type. Treated as an unsigned integer. + TYPE_INT = 10, ///< Signed integer. Signed is considered less specific than unsigned in C + TYPE_UINT = 9, ///< Unsigned integer + TYPE_BOOL = 8, ///< Boolean + TYPE_CODE = 7, ///< Data is actual executable code + TYPE_FLOAT = 6, ///< Floating-point - TYPE_PTR = 4, ///< Pointer data-type - TYPE_PTRREL = 3, ///< Pointer relative to another data-type (specialization of TYPE_PTR) - TYPE_ARRAY = 2, ///< Array data-type, made up of a sequence of "element" datatype - TYPE_PARTIALSTRUCT = 1, ///< Part of a structure, stored separately from the whole - TYPE_STRUCT = 0 ///< Structure data-type, made up of component datatypes + TYPE_PTR = 5, ///< Pointer data-type + TYPE_PTRREL = 4, ///< Pointer relative to another data-type (specialization of TYPE_PTR) + TYPE_ARRAY = 3, ///< Array data-type, made up of a sequence of "element" datatype + TYPE_PARTIALSTRUCT = 2, ///< Part of a structure, stored separately from the whole + TYPE_STRUCT = 1, ///< Structure data-type, made up of component datatypes + TYPE_UNION = 0 ///< An overlapping union of multiple datatypes }; /// Specializations of the core meta-types. Each enumeration is associated with a specific #type_metatype. /// Ordering is important: The lower the number, the more \b specific the data-type, affecting propagation. enum sub_metatype { - SUB_VOID = 20, ///< Compare as a TYPE_VOID - SUB_SPACEBASE = 19, ///< Compare as a TYPE_SPACEBASE - SUB_UNKNOWN = 18, ///< Compare as a TYPE_UNKNOWN - SUB_INT_CHAR = 17, ///< Signed 1-byte character, sub-type of TYPE_INT - SUB_UINT_CHAR = 16, ///< Unsigned 1-byte character, sub-type of TYPE_UINT - SUB_INT_PLAIN = 15, ///< Compare as a plain TYPE_INT - SUB_UINT_PLAIN = 14, ///< Compare as a plain TYPE_UINT - SUB_INT_ENUM = 13, ///< Signed enum, sub-type of TYPE_INT - SUB_UINT_ENUM = 12, ///< Unsigned enum, sub-type of TYPE_UINT - SUB_INT_UNICODE = 11, ///< Signed wide character, sub-type of TYPE_INT - SUB_UINT_UNICODE = 10, ///< Unsigned wide character, sub-type of TYPE_UINT - SUB_BOOL = 9, ///< Compare as TYPE_BOOL - SUB_CODE = 8, ///< Compare as TYPE_CODE - SUB_FLOAT = 7, ///< Compare as TYPE_FLOAT - SUB_PTRREL_UNK = 6, ///< Pointer to unknown field of struct, sub-type of TYPE_PTR - SUB_PTR = 5, ///< Compare as TYPE_PTR - SUB_PTRREL = 4, ///< Pointer relative to another data-type, sub-type of TYPE_PTR - SUB_PTR_STRUCT = 3, ///< Pointer into struct, sub-type of TYPE_PTR - SUB_ARRAY = 2, ///< Compare as TYPE_ARRAY - SUB_PARTIALSTRUCT = 1, ///< Compare as TYPE_PARTIALSTRUCT - SUB_STRUCT = 0 ///< Compare as TYPE_STRUCT + SUB_VOID = 21, ///< Compare as a TYPE_VOID + SUB_SPACEBASE = 20, ///< Compare as a TYPE_SPACEBASE + SUB_UNKNOWN = 19, ///< Compare as a TYPE_UNKNOWN + SUB_INT_CHAR = 18, ///< Signed 1-byte character, sub-type of TYPE_INT + SUB_UINT_CHAR = 17, ///< Unsigned 1-byte character, sub-type of TYPE_UINT + SUB_INT_PLAIN = 16, ///< Compare as a plain TYPE_INT + SUB_UINT_PLAIN = 15, ///< Compare as a plain TYPE_UINT + SUB_INT_ENUM = 14, ///< Signed enum, sub-type of TYPE_INT + SUB_UINT_ENUM = 13, ///< Unsigned enum, sub-type of TYPE_UINT + SUB_INT_UNICODE = 12, ///< Signed wide character, sub-type of TYPE_INT + SUB_UINT_UNICODE = 11, ///< Unsigned wide character, sub-type of TYPE_UINT + SUB_BOOL = 10, ///< Compare as TYPE_BOOL + SUB_CODE = 9, ///< Compare as TYPE_CODE + SUB_FLOAT = 8, ///< Compare as TYPE_FLOAT + SUB_PTRREL_UNK = 7, ///< Pointer to unknown field of struct, sub-type of TYPE_PTR + SUB_PTR = 6, ///< Compare as TYPE_PTR + SUB_PTRREL = 5, ///< Pointer relative to another data-type, sub-type of TYPE_PTR + SUB_PTR_STRUCT = 4, ///< Pointer into struct, sub-type of TYPE_PTR + SUB_ARRAY = 3, ///< Compare as TYPE_ARRAY + SUB_PARTIALSTRUCT = 2, ///< Compare as TYPE_PARTIALSTRUCT + SUB_STRUCT = 1, ///< Compare as TYPE_STRUCT + SUB_UNION = 0 ///< Compare as TYPE_UNION }; /// Convert type \b meta-type to name extern void metatype2string(type_metatype metatype,string &res); @@ -77,6 +79,7 @@ extern void metatype2string(type_metatype metatype,string &res); extern type_metatype string2metatype(const string &metastring); class Architecture; // Forward declarations +class PcodeOp; class Scope; class TypeFactory; struct DatatypeCompare; @@ -86,7 +89,7 @@ struct DatatypeCompare; /// Used for symbols, function prototypes, type propagation etc. class Datatype { protected: - static sub_metatype base2sub[13]; + static sub_metatype base2sub[14]; /// Boolean properties of datatypes enum { coretype = 1, ///< This is a basic type which will never be redefined @@ -105,6 +108,7 @@ protected: has_stripped = 0x100, ///< Datatype has a stripped form for formal declarations is_ptrrel = 0x200, ///< Datatype is a TypePointerRel type_incomplete = 0x400, ///< Set if \b this (recursive) data-type has not been fully defined yet + needs_resolution = 0x800 ///< Datatype (union, pointer to union) needs resolution before propagation }; friend class TypeFactory; friend struct DatatypeCompare; @@ -143,6 +147,7 @@ public: bool isFormalPointerRel(void) const { return (flags & (is_ptrrel | has_stripped))==is_ptrrel; } ///< Is \b this a non-ephemeral TypePointerRel bool hasStripped(void) const { return (flags & has_stripped)!=0; } ///< Return \b true if \b this has a stripped form bool isIncomplete(void) const { return (flags & type_incomplete)!=0; } ///< Is \b this an incompletely defined data-type + bool needsResolution(void) const { return (flags & needs_resolution)!=0; } ///< Is \b this a union or a pointer to union uint4 getInheritable(void) const { return (flags & coretype); } ///< Get properties pointers inherit type_metatype getMetatype(void) const { return metatype; } ///< Get the type \b meta-type sub_metatype getSubMeta(void) const { return submeta; } ///< Get the \b sub-metatype @@ -162,17 +167,25 @@ public: virtual void saveXml(ostream &s) const; ///< Serialize the data-type to XML virtual bool isPtrsubMatching(uintb off) const; ///< Is this data-type suitable as input to a CPUI_PTRSUB op virtual Datatype *getStripped(void) const; ///< Get a stripped version of \b this for formal use in formal declarations + virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); ///< Tailor data-type propagation based on Varnode use + virtual Datatype* findResolve(const PcodeOp *op,int4 slot); ///< Find a previously resolved sub-type + virtual int4 findCompatibleResolve(Datatype *ct) const; ///< Find a resolution compatible with the given data-type int4 typeOrder(const Datatype &op) const { if (this==&op) return 0; return compare(op,10); } ///< Order this with -op- datatype int4 typeOrderBool(const Datatype &op) const; ///< Order \b this with -op-, treating \e bool data-type as special void saveXmlRef(ostream &s) const; ///< Write an XML reference of \b this to stream }; -/// \brief Specifies subfields of a structure or what a pointer points to -struct TypeField { - int4 offset; ///< Offset (into containing struct) of subfield +/// \brief A field within a structure or union +class TypeField { +public: + int4 ident; ///< Id for identifying \b this within its containing structure or union + int4 offset; ///< Offset (into containing structure or union) of subfield string name; ///< Name of subfield - Datatype *type; ///< type of subfield + Datatype *type; ///< Data-type of subfield + TypeField(const Element *el,TypeFactory &typegrp); ///< Restore \b this field from an XML stream + TypeField(int4 id,int4 off,const string &nm,Datatype *ct) { ident=id; offset=off; name=nm; type=ct; } ///< Construct from components bool operator<(const TypeField &op2) const { return (offset < op2.offset); } ///< Compare based on offset + void saveXml(ostream &s) const; ///< Save \b this field as XML }; /// Compare two Datatype pointers for equivalence of their description @@ -291,6 +304,8 @@ public: virtual void saveXml(ostream &s) const; virtual TypePointer *downChain(uintb &off,TypePointer *&par,uintb &parOff,bool allowArrayWrap,TypeFactory &typegrp); virtual bool isPtrsubMatching(uintb off) const; + virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); + virtual Datatype* findResolve(const PcodeOp *op,int4 slot); }; /// \brief Datatype object representing an array of elements @@ -306,8 +321,7 @@ public: /// Construct from another TypeArray TypeArray(const TypeArray &op) : Datatype(op) { arrayof = op.arrayof; arraysize = op.arraysize; } /// Construct given an array size and element data-type - TypeArray(int4 n,Datatype *ao) : Datatype(n*ao->getSize(),TYPE_ARRAY) { - arraysize = n; arrayof = ao; } + TypeArray(int4 n,Datatype *ao); Datatype *getBase(void) const { return arrayof; } ///< Get the element data-type int4 numElements(void) const { return arraysize; } ///< Get the number of elements Datatype *getSubEntry(int4 off,int4 sz,int4 *newoff,int4 *el) const; ///< Figure out what a byte range overlaps @@ -320,6 +334,9 @@ public: virtual int4 compareDependency(const Datatype &op) const; // For tree structure virtual Datatype *clone(void) const { return new TypeArray(*this); } virtual void saveXml(ostream &s) const; + virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); + virtual Datatype* findResolve(const PcodeOp *op,int4 slot); + virtual int4 findCompatibleResolve(Datatype *ct) const; }; /// \brief An enumerated Datatype object: an integer with named values. @@ -351,7 +368,7 @@ public: virtual void saveXml(ostream &s) const; }; -/// \brief A composite Datatype object: A "structure" with component "fields" +/// \brief A composite Datatype object: A \b structure with component \b fields class TypeStruct : public Datatype { protected: friend class TypeFactory; @@ -360,12 +377,13 @@ protected: int4 getFieldIter(int4 off) const; ///< Get index into field list int4 getLowerBoundField(int4 off) const; ///< Get index of last field before or equal to given offset void restoreFields(const Element *el,TypeFactory &typegrp); ///< Restore fields from XML description + int4 scoreFill(PcodeOp *op,int4 slot) const; ///< Determine best type fit for given PcodeOp use public: TypeStruct(const TypeStruct &op); ///< Construct from another TypeStruct TypeStruct(void) : Datatype(0,TYPE_STRUCT) { flags |= type_incomplete; } ///< Construct incomplete/empty TypeStruct vector::const_iterator beginField(void) const { return field.begin(); } ///< Beginning of fields vector::const_iterator endField(void) const { return field.end(); } ///< End of fields - const TypeField *getField(int4 off,int4 sz,int4 *newoff) const; ///< Get field based on offset + const TypeField *resolveTruncation(int4 off,int4 sz,int4 *newoff) const; ///< Get field based on offset virtual Datatype *getSubType(uintb off,uintb *newoff) const; virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const; virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const; @@ -375,10 +393,40 @@ public: virtual int4 compareDependency(const Datatype &op) const; // For tree structure virtual Datatype *clone(void) const { return new TypeStruct(*this); } virtual void saveXml(ostream &s) const; + virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); + virtual Datatype* findResolve(const PcodeOp *op,int4 slot); + virtual int4 findCompatibleResolve(Datatype *ct) const; + static void assignFieldOffsets(vector &list,int4 align); ///< Assign field offsets given a byte alignment }; -/// \brief A pointer data-type that knows it is offset relative to another data-type +/// \brief A collection of overlapping Datatype objects: A \b union of component \b fields /// +/// The individual components have \b field names, as with a structure, but for a union, the components all +/// share the same memory. +class TypeUnion : public Datatype { +protected: + friend class TypeFactory; + vector field; ///< The list of fields + void setFields(const vector &fd); ///< Establish fields for \b this + void restoreFields(const Element *el,TypeFactory &typegrp); ///< Restore fields from XML description +public: + TypeUnion(const TypeUnion &op); ///< Construct from another TypeUnion + TypeUnion(void) : Datatype(0,TYPE_UNION) { flags |= (type_incomplete | needs_resolution); } ///< Construct incomplete TypeUnion + const TypeField *getField(int4 i) const { return &field[i]; } ///< Get the i-th field of the union +// virtual Datatype *getSubType(uintb off,uintb *newoff) const; + virtual int4 numDepend(void) const { return field.size(); } + virtual Datatype *getDepend(int4 index) const { return field[index].type; } + virtual int4 compare(const Datatype &op,int4 level) const; // For tree structure + virtual int4 compareDependency(const Datatype &op) const; // For tree structure + virtual Datatype *clone(void) const { return new TypeUnion(*this); } + virtual void saveXml(ostream &s) const; + virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); + virtual Datatype* findResolve(const PcodeOp *op,int4 slot); + virtual int4 findCompatibleResolve(Datatype *ct) const; + const TypeField *resolveTruncation(int4 offset,PcodeOp *op,int4 slot,int4 &newoff); + const TypeField *findTruncation(int4 offset,const PcodeOp *op,int4 slot,int4 &newoff) const; +}; + /// The other data, the \b container, is typically a TypeStruct or TypeArray. Even though \b this pointer /// does not point directly to the start of the container, it is possible to access the container through \b this, /// as the distance (the \b offset) to the start of the container is explicitly known. @@ -496,6 +544,7 @@ class TypeFactory { void orderRecurse(vector &deporder,DatatypeSet &mark,Datatype *ct) const; ///< Write out dependency list Datatype *restoreTypedef(const Element *el); ///< Restore a \ XML tag describing a typedef Datatype *restoreStruct(const Element *el,bool forcecore); ///< Restore a \ XML tag describing a structure + Datatype *restoreUnion(const Element *el,bool forcecore); ///< Restore a \ XML tag describing a union Datatype *restoreCode(const Element *el,bool isConstructor,bool isDestructor,bool forcecore); ///< Restore XML tag describing a code object Datatype *restoreXmlTypeNoRef(const Element *el,bool forcecore); ///< Restore from an XML tag void clearCache(void); ///< Clear the common type cache @@ -520,6 +569,7 @@ public: Datatype *findByName(const string &n); ///< Return type of given name Datatype *setName(Datatype *ct,const string &n); ///< Set the given types name bool setFields(vector &fd,TypeStruct *ot,int4 fixedsize,uint4 flags); ///< Set fields on a TypeStruct + bool setFields(vector &fd,TypeUnion *ot,int4 fixedsize,uint4 flags); ///< Set fields on a TypeUnion void setPrototype(const FuncProto *fp,TypeCode *newCode,uint4 flags); ///< Set the prototype on a TypeCode bool setEnumValues(const vector &namelist, const vector &vallist, @@ -538,6 +588,7 @@ public: TypePointer *getTypePointerNoDepth(int4 s,Datatype *pt,uint4 ws); ///< Construct a depth limited pointer data-type TypeArray *getTypeArray(int4 as,Datatype *ao); ///< Construct an array data-type TypeStruct *getTypeStruct(const string &n); ///< Create an (empty) structure + TypeUnion *getTypeUnion(const string &n); ///< Create an (empty) union TypeEnum *getTypeEnum(const string &n); ///< Create an (empty) enumeration TypeSpacebase *getTypeSpacebase(AddrSpace *id,const Address &addr); ///< Create a "spacebase" type TypeCode *getTypeCode(ProtoModel *model,Datatype *outtype, @@ -574,6 +625,17 @@ inline int4 Datatype::typeOrderBool(const Datatype &op) const return compare(op,10); } +inline TypeArray::TypeArray(int4 n,Datatype *ao) : Datatype(n*ao->getSize(),TYPE_ARRAY) + +{ + arraysize = n; + arrayof = ao; + // A varnode which is an array of size 1, should generally always be treated + // as the element data-type + if (n == 1) + flags |= needs_resolution; +} + /// \brief Set up the base pointer data-type \b this is modeling /// /// This base data-type is used for formal variable declarations in source code output. diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc index 8e3eb2ec21..016be7b88e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc @@ -215,10 +215,28 @@ Datatype *TypeOp::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *c const Varnode *vn = op->getIn(slot); if (vn->isAnnotation()) return (Datatype *)0; Datatype *reqtype = op->inputTypeLocal(slot); - Datatype *curtype = vn->getHigh()->getType(); + Datatype *curtype = vn->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,false,true); } +/// The data-type can propagate between any two Varnodes attached to the PcodeOp, either in or out. +/// The pair \b invn and \b inslot indicate the Varnode holding the \e incoming data-type. +/// The pair \b outvn and \b outslot indicate the Varnode that will hold the \e outgoing data-type. +/// The data-type for the outgoing Varnode is returned, which may be different then the incoming data-type +/// as the PcodeOp can transform the data-type as it propagates. +/// \param alttype is the incoming data-type +/// \param op is the PcodeOp to propagate across +/// \param invn is the Varnode holding the incoming data-type +/// \param outvn is the Varnode that will hold the outgoing data-type +/// \param inslot indicates how the incoming Varnode is attached to the PcodeOp (-1 indicates output >= indicates input) +/// \param outslot indicates how the outgoing Varnode is attached to the PcodeOp +/// \return the outgoing data-type or null (to indicate no propagation) +Datatype *TypeOp::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + return (Datatype *)0; // Don't propagate by default +} + /// Many languages can mark an integer constant as explicitly \e unsigned. When /// the decompiler is deciding on \e cast operations, this is one of the checks /// it performs. This method checks if the indicated input is an @@ -234,14 +252,14 @@ bool TypeOp::markExplicitUnsigned(PcodeOp *op,int4 slot) const if ((slot==1) && ((addlflags & inherits_sign_zero)!=0)) return false; Varnode *vn = op->getIn(slot); if (!vn->isConstant()) return false; - Datatype *dt = vn->getHigh()->getType(); + Datatype *dt = vn->getHighTypeReadFacing(op); type_metatype meta = dt->getMetatype(); if ((meta != TYPE_UINT)&&(meta != TYPE_UNKNOWN)) return false; if (dt->isCharPrint()) return false; if (dt->isEnumType()) return false; if ((op->numInput() == 2) && ((addlflags & inherits_sign_zero)==0)) { Varnode *firstvn = op->getIn(1-slot); - meta = firstvn->getHigh()->getType()->getMetatype(); + meta = firstvn->getHighTypeReadFacing(op)->getMetatype(); if ((meta == TYPE_UINT)||(meta == TYPE_UNKNOWN)) return false; // Other side of the operation will force the unsigned } @@ -336,15 +354,29 @@ TypeOpCopy::TypeOpCopy(TypeFactory *t) : TypeOp(t,CPUI_COPY,"copy") Datatype *TypeOpCopy::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const { - Datatype *reqtype = op->getOut()->getHigh()->getType(); // Require input to be same type as output - Datatype *curtype = op->getIn(0)->getHigh()->getType(); + Datatype *reqtype = op->getOut()->getHighTypeDefFacing(); // Require input to be same type as output + Datatype *curtype = op->getIn(0)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,false,true); } Datatype *TypeOpCopy::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - return op->getIn(0)->getHigh()->getType(); + return op->getIn(0)->getHighTypeReadFacing(op); +} + +Datatype *TypeOpCopy::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot!=-1)&&(outslot!=-1)) return (Datatype *)0; // Must propagate input <-> output + Datatype *newtype; + if (invn->isSpacebase()) { + AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); + newtype = tlst->getTypePointer(alttype->getSize(),tlst->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); + } + else + newtype = alttype; + return newtype; } void TypeOpCopy::printRaw(ostream &s,const PcodeOp *op) @@ -366,9 +398,9 @@ Datatype *TypeOpLoad::getInputCast(const PcodeOp *op,int4 slot,const CastStrateg { if (slot!=1) return (Datatype *)0; - Datatype *reqtype = op->getOut()->getHigh()->getType(); // Cast load pointer to match output + Datatype *reqtype = op->getOut()->getHighTypeDefFacing(); // Cast load pointer to match output const Varnode *invn = op->getIn(1); - Datatype *curtype = invn->getHigh()->getType(); + Datatype *curtype = invn->getHighTypeReadFacing(op); AddrSpace *spc = Address::getSpaceFromConst(op->getIn(0)->getAddr()); // Its possible that the input type is not a pointer to the output type // (or even a pointer) due to cycle trimming in the type propagation algorithms @@ -380,7 +412,7 @@ Datatype *TypeOpLoad::getInputCast(const PcodeOp *op,int4 slot,const CastStrateg // If we have a non-standard in = ptr a out = b (a!=b) // We may want to postpone casting BEFORE the load in favor of casting AFTER the load type_metatype curmeta = curtype->getMetatype(); - if ((curmeta!=TYPE_STRUCT)&&(curmeta!=TYPE_ARRAY)&&(curmeta!=TYPE_SPACEBASE)) { + if ((curmeta!=TYPE_STRUCT)&&(curmeta!=TYPE_ARRAY)&&(curmeta!=TYPE_SPACEBASE)&&(curmeta!=TYPE_UNION)) { // if the input is a pointer to a primitive type if ((!invn->isImplied())||(!invn->isWritten())||(invn->getDef()->code() != CPUI_CAST)) return (Datatype *)0; // Postpone cast to output @@ -397,7 +429,7 @@ Datatype *TypeOpLoad::getInputCast(const PcodeOp *op,int4 slot,const CastStrateg Datatype *TypeOpLoad::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - Datatype *ct = op->getIn(1)->getHigh()->getType(); + Datatype *ct = op->getIn(1)->getHighTypeReadFacing(op); if ((ct->getMetatype() == TYPE_PTR)&&(((TypePointer *)ct)->getPtrTo()->getSize() == op->getOut()->getSize())) return ((TypePointer *)ct)->getPtrTo(); // return TypeOp::getOutputToken(op); @@ -406,7 +438,27 @@ Datatype *TypeOpLoad::getOutputToken(const PcodeOp *op,CastStrategy *castStrateg // In this case, there will have to be a cast, so we assume // the cast will cause the load to produce the type matching // its output - return op->getOut()->getHigh()->getType(); + return op->getOut()->getHighTypeDefFacing(); +} + +Datatype *TypeOpLoad::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot==0)||(outslot==0)) return (Datatype *)0; // Don't propagate along this edge + if (invn->isSpacebase()) return (Datatype *)0; + Datatype *newtype; + if (inslot == -1) { // Propagating output to input (value to ptr) + AddrSpace *spc = Address::getSpaceFromConst(op->getIn(0)->getAddr()); + newtype = tlst->getTypePointerNoDepth(outvn->getTempType()->getSize(),alttype,spc->getWordSize()); + } + else if (alttype->getMetatype()==TYPE_PTR) { + newtype = ((TypePointer *)alttype)->getPtrTo(); + if (newtype->getSize() != outvn->getTempType()->getSize() || newtype->isVariableLength()) // Size must be appropriate + newtype = outvn->getTempType(); + } + else + newtype = outvn->getTempType(); // Don't propagate anything + return newtype; } void TypeOpLoad::printRaw(ostream &s,const PcodeOp *op) @@ -432,13 +484,14 @@ Datatype *TypeOpStore::getInputCast(const PcodeOp *op,int4 slot,const CastStrate { if (slot==0) return (Datatype *)0; const Varnode *pointerVn = op->getIn(1); - Datatype *pointerType = pointerVn->getHigh()->getType(); - Datatype *valueType = op->getIn(2)->getHigh()->getType(); + Datatype *pointerType = pointerVn->getHighTypeReadFacing(op); + Datatype *pointedToType = pointerType; + Datatype *valueType = op->getIn(2)->getHighTypeReadFacing(op); AddrSpace *spc = Address::getSpaceFromConst(op->getIn(0)->getAddr()); int4 destSize; if (pointerType->getMetatype() == TYPE_PTR) { - pointerType = ((TypePointer *)pointerType)->getPtrTo(); - destSize = pointerType->getSize(); + pointedToType = ((TypePointer *)pointerType)->getPtrTo(); + destSize = pointedToType->getSize(); } else destSize = -1; @@ -453,14 +506,34 @@ Datatype *TypeOpStore::getInputCast(const PcodeOp *op,int4 slot,const CastStrate if (pointerVn->isImplied() && pointerVn->loneDescend() == op) { // CAST is already in place, test if it is casting to the right type Datatype *newType = tlst->getTypePointer(pointerVn->getSize(), valueType, spc->getWordSize()); - if (pointerVn->getHigh()->getType() != newType) + if (pointerType != newType) return newType; } } return (Datatype *)0; } // If we reach here, cast the value, not the pointer - return castStrategy->castStandard(pointerType,valueType,false,true); + return castStrategy->castStandard(pointedToType,valueType,false,true); +} + +Datatype *TypeOpStore::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot==0)||(outslot==0)) return (Datatype *)0; // Don't propagate along this edge + if (invn->isSpacebase()) return (Datatype *)0; + Datatype *newtype; + if (inslot==2) { // Propagating value to ptr + AddrSpace *spc = Address::getSpaceFromConst(op->getIn(0)->getAddr()); + newtype = tlst->getTypePointerNoDepth(outvn->getTempType()->getSize(),alttype,spc->getWordSize()); + } + else if (alttype->getMetatype()==TYPE_PTR) { + newtype = ((TypePointer *)alttype)->getPtrTo(); + if (newtype->getSize() != outvn->getTempType()->getSize() || newtype->isVariableLength()) + newtype = outvn->getTempType(); + } + else + newtype = outvn->getTempType(); // Don't propagate anything + return newtype; } void TypeOpStore::printRaw(ostream &s,const PcodeOp *op) @@ -831,16 +904,59 @@ TypeOpEqual::TypeOpEqual(TypeFactory *t) Datatype *TypeOpEqual::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const { - Datatype *reqtype = op->getIn(0)->getHigh()->getType(); // Input arguments should be the same type - Datatype *othertype = op->getIn(1)->getHigh()->getType(); + Datatype *reqtype = op->getIn(0)->getHighTypeReadFacing(op); // Input arguments should be the same type + Datatype *othertype = op->getIn(1)->getHighTypeReadFacing(op); if (0>othertype->typeOrder(*reqtype)) reqtype = othertype; if (castStrategy->checkIntPromotionForCompare(op,slot)) return reqtype; - othertype = op->getIn(slot)->getHigh()->getType(); + othertype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,othertype,false,false); } +Datatype *TypeOpEqual::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + return TypeOpEqual::propagateAcrossCompare(alttype, tlst, invn, outvn, inslot, outslot); +} + +/// \brief Propagate a given data-type across a \e comparison PcodeOp +/// +/// This implements the propagateType() method for multiple p-code operators: +/// CPUI_INT_EQUAL, CPUI_INT_NOTEQUAL, CPUI_INT_LESS, etc. +/// The propagation must be across the input Varnodes of the comparison. +/// \param alttype is the incoming data-type to propagate +/// \param typegrp is the TypeFactory used for constructing transformed data-types +/// \param invn is the Varnode holding the incoming data-type +/// \param outvn is the Varnode that will hold the outgoing data-type +/// \param inslot indicates how the incoming Varnode is attached to the PcodeOp (-1 indicates output >= indicates input) +/// \param outslot indicates how the outgoing Varnode is attached to the PcodeOp +/// \return the outgoing data-type or null (to indicate no propagation) +Datatype *TypeOpEqual::propagateAcrossCompare(Datatype *alttype,TypeFactory *typegrp,Varnode *invn, + Varnode *outvn,int4 inslot,int4 outslot) +{ + if (inslot == -1 || outslot == -1) return (Datatype *)0; + Datatype *newtype; + if (invn->isSpacebase()) { + AddrSpace *spc = typegrp->getArch()->getDefaultDataSpace(); + newtype = typegrp->getTypePointer(alttype->getSize(),typegrp->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); + } + else if (alttype->isPointerRel() && !outvn->isConstant()) { + TypePointerRel *relPtr = (TypePointerRel *)alttype; + if (relPtr->getParent()->getMetatype() == TYPE_STRUCT && relPtr->getPointerOffset() >= 0) { + // If we know the pointer is in the middle of a structure, don't propagate across comparison operators + // as the two sides of the operator are likely to be different types , and the other side can also + // get data-type information from the structure pointer + newtype = typegrp->getTypePointer(relPtr->getSize(),typegrp->getBase(1,TYPE_UNKNOWN),relPtr->getWordSize()); + } + else + newtype = alttype; + } + else + newtype = alttype; + return newtype; +} + TypeOpNotEqual::TypeOpNotEqual(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_NOTEQUAL,"!=",TYPE_BOOL,TYPE_INT) { @@ -852,16 +968,22 @@ TypeOpNotEqual::TypeOpNotEqual(TypeFactory *t) Datatype *TypeOpNotEqual::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const { - Datatype *reqtype = op->getIn(0)->getHigh()->getType(); // Input arguments should be the same type - Datatype *othertype = op->getIn(1)->getHigh()->getType(); + Datatype *reqtype = op->getIn(0)->getHighTypeReadFacing(op); // Input arguments should be the same type + Datatype *othertype = op->getIn(1)->getHighTypeReadFacing(op); if (0>othertype->typeOrder(*reqtype)) reqtype = othertype; if (castStrategy->checkIntPromotionForCompare(op,slot)) return reqtype; - othertype = op->getIn(slot)->getHigh()->getType(); + othertype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,othertype,false,false); } +Datatype *TypeOpNotEqual::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + return TypeOpEqual::propagateAcrossCompare(alttype, tlst, invn, outvn, inslot, outslot); +} + TypeOpIntSless::TypeOpIntSless(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_SLESS,"<",TYPE_BOOL,TYPE_INT) { @@ -876,10 +998,18 @@ Datatype *TypeOpIntSless::getInputCast(const PcodeOp *op,int4 slot,const CastStr Datatype *reqtype = op->inputTypeLocal(slot); if (castStrategy->checkIntPromotionForCompare(op,slot)) return reqtype; - Datatype *curtype = op->getIn(slot)->getHigh()->getType(); + Datatype *curtype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,true,true); } +Datatype *TypeOpIntSless::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot==-1)||(outslot==-1)) return (Datatype *)0; // Must propagate input <-> input + if (alttype->getMetatype() != TYPE_INT) return (Datatype *)0; // Only propagate signed things + return alttype; +} + TypeOpIntSlessEqual::TypeOpIntSlessEqual(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_SLESSEQUAL,"<=",TYPE_BOOL,TYPE_INT) { @@ -894,10 +1024,18 @@ Datatype *TypeOpIntSlessEqual::getInputCast(const PcodeOp *op,int4 slot,const Ca Datatype *reqtype = op->inputTypeLocal(slot); if (castStrategy->checkIntPromotionForCompare(op,slot)) return reqtype; - Datatype *curtype = op->getIn(slot)->getHigh()->getType(); + Datatype *curtype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,true,true); } +Datatype *TypeOpIntSlessEqual::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot==-1)||(outslot==-1)) return (Datatype *)0; // Must propagate input <-> input + if (alttype->getMetatype() != TYPE_INT) return (Datatype *)0; // Only propagate signed things + return alttype; +} + TypeOpIntLess::TypeOpIntLess(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_LESS,"<",TYPE_BOOL,TYPE_UINT) { @@ -912,10 +1050,16 @@ Datatype *TypeOpIntLess::getInputCast(const PcodeOp *op,int4 slot,const CastStra Datatype *reqtype = op->inputTypeLocal(slot); if (castStrategy->checkIntPromotionForCompare(op,slot)) return reqtype; - Datatype *curtype = op->getIn(slot)->getHigh()->getType(); + Datatype *curtype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,true,false); } +Datatype *TypeOpIntLess::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + return TypeOpEqual::propagateAcrossCompare(alttype, tlst, invn, outvn, inslot, outslot); +} + TypeOpIntLessEqual::TypeOpIntLessEqual(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_LESSEQUAL,"<=",TYPE_BOOL,TYPE_UINT) { @@ -930,10 +1074,16 @@ Datatype *TypeOpIntLessEqual::getInputCast(const PcodeOp *op,int4 slot,const Cas Datatype *reqtype = op->inputTypeLocal(slot); if (castStrategy->checkIntPromotionForCompare(op,slot)) return reqtype; - Datatype *curtype = op->getIn(slot)->getHigh()->getType(); + Datatype *curtype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,true,false); } +Datatype *TypeOpIntLessEqual::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + return TypeOpEqual::propagateAcrossCompare(alttype, tlst, invn, outvn, inslot, outslot); +} + TypeOpIntZext::TypeOpIntZext(TypeFactory *t) : TypeOpFunc(t,CPUI_INT_ZEXT,"ZEXT",TYPE_UINT,TYPE_UINT) { @@ -956,7 +1106,7 @@ Datatype *TypeOpIntZext::getInputCast(const PcodeOp *op,int4 slot,const CastStra Datatype *reqtype = op->inputTypeLocal(slot); if (castStrategy->checkIntPromotionForExtension(op)) return reqtype; - Datatype *curtype = op->getIn(slot)->getHigh()->getType(); + Datatype *curtype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,true,false); } @@ -982,7 +1132,7 @@ Datatype *TypeOpIntSext::getInputCast(const PcodeOp *op,int4 slot,const CastStra Datatype *reqtype = op->inputTypeLocal(slot); if (castStrategy->checkIntPromotionForExtension(op)) return reqtype; - Datatype *curtype = op->getIn(slot)->getHigh()->getType(); + Datatype *curtype = op->getIn(slot)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,true,false); } @@ -1000,6 +1150,143 @@ Datatype *TypeOpIntAdd::getOutputToken(const PcodeOp *op,CastStrategy *castStrat return castStrategy->arithmeticOutputStandard(op); // Use arithmetic typing rules } +Datatype *TypeOpIntAdd::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + type_metatype invnMeta = alttype->getMetatype(); + if (invnMeta != TYPE_PTR) { + if (invnMeta != TYPE_INT && invnMeta != TYPE_UINT) + return (Datatype *)0; + if (outslot != 1 || !op->getIn(1)->isConstant()) + return (Datatype *)0; + } + else if ((inslot!=-1)&&(outslot!=-1)) + return (Datatype *)0; // Must propagate input <-> output for pointers + Datatype *newtype; + if (outvn->isConstant() && (alttype->getMetatype() != TYPE_PTR)) + newtype = alttype; + else if (inslot == -1) // Propagating output to input + newtype = op->getIn(outslot)->getTempType(); // Don't propagate pointer types this direction + else + newtype = propagateAddIn2Out(alttype,tlst,op,inslot); + return newtype; +} + +/// \brief Propagate a pointer data-type through an ADD operation. +/// +/// Assuming a pointer data-type from an ADD PcodeOp propagates from an input to +/// its output, calculate the transformed data-type of the output Varnode, which +/// will depend on details of the operation. If the edge doesn't make sense as +/// "an ADD to a pointer", prevent the propagation by returning the output Varnode's +/// current data-type. +/// \param alttype is the resolved input pointer data-type +/// \param typegrp is the TypeFactory for constructing the transformed Datatype +/// \param op is the ADD operation +/// \param inslot is the edge to propagate along +/// \return the transformed Datatype or the original output Datatype +Datatype *TypeOpIntAdd::propagateAddIn2Out(Datatype *alttype,TypeFactory *typegrp,PcodeOp *op,int4 inslot) + +{ + TypePointer *pointer = (TypePointer *)alttype; + uintb uoffset; + int4 command = propagateAddPointer(uoffset,op,inslot,pointer->getPtrTo()->getSize()); + if (command == 2) return op->getOut()->getTempType(); // Doesn't look like a good pointer add + TypePointer *parent = (TypePointer *)0; + uintb parentOff; + if (command != 3) { + uoffset = AddrSpace::addressToByte(uoffset,pointer->getWordSize()); + bool allowWrap = (op->code() != CPUI_PTRSUB); + do { + pointer = pointer->downChain(uoffset,parent,parentOff,allowWrap,*typegrp); + if (pointer == (TypePointer *)0) + break; + } while(uoffset != 0); + } + if (parent != (TypePointer *)0) { + // If the innermost containing object is a TYPE_STRUCT or TYPE_ARRAY + // preserve info about this container + Datatype *pt; + if (pointer == (TypePointer *)0) + pt = typegrp->getBase(1,TYPE_UNKNOWN); // Offset does not point at a proper sub-type + else + pt = pointer->getPtrTo(); // The sub-type being directly pointed at + pointer = typegrp->getTypePointerRel(parent, pt, parentOff); + } + if (pointer == (TypePointer *)0) { + if (command == 0) + return alttype; + return op->getOut()->getTempType(); + } + if (op->getIn(inslot)->isSpacebase()) { + if (pointer->getPtrTo()->getMetatype() == TYPE_SPACEBASE) + pointer = typegrp->getTypePointer(pointer->getSize(),typegrp->getBase(1,TYPE_UNKNOWN),pointer->getWordSize()); + } + return pointer; +} + +/// Determine if the given data-type edge looks like a pointer +/// propagating through an "add a constant" operation. We assume the input +/// to the edge has a pointer data-type. This routine returns one the commands: +/// - 0 indicates this is "add a constant" adding a zero (PTRSUB or PTRADD) +/// - 1 indicates this is "add a constant" and the constant is passed back +/// - 2 indicating the pointer does not propagate through +/// - 3 the input data-type propagates through untransformed +/// +/// \param off passes back the constant offset if the command is '0' or '1' +/// \param op is the PcodeOp propagating the data-type +/// \param slot is the input edge being propagated +/// \param sz is the size of the data-type being pointed to +/// \return a command indicating how the op should be treated +int4 TypeOpIntAdd::propagateAddPointer(uintb &off,PcodeOp *op,int4 slot,int4 sz) + +{ + if (op->code() == CPUI_PTRADD) { + if (slot != 0) return 2; + Varnode *constvn = op->getIn(1); + uintb mult = op->getIn(2)->getOffset(); + if (constvn->isConstant()) { + off = (constvn->getOffset() * mult) & calc_mask(constvn->getSize()) ; + return (off == 0) ? 0 : 1; + } + if (sz != 0 && (mult % sz) != 0) + return 2; + return 3; + } + if (op->code() == CPUI_PTRSUB) { + if (slot != 0) return 2; + off = op->getIn(1)->getOffset(); + return (off == 0) ? 0 : 1; + } + if (op->code() == CPUI_INT_ADD) { + Varnode *othervn = op->getIn(1-slot); + // Check if othervn is an offset + if (!othervn->isConstant()) { + if (othervn->isWritten()) { + PcodeOp *multop = othervn->getDef(); + if (multop->code() == CPUI_INT_MULT) { + Varnode *constvn = multop->getIn(1); + if (constvn->isConstant()) { + uintb mult = constvn->getOffset(); + if (mult == calc_mask(constvn->getSize())) // If multiplying by -1 + return 2; // Assume this is a pointer difference and don't propagate + if (sz != 0 && (mult % sz) !=0) + return 2; + } + return 3; + } + } + if (sz == 1) + return 3; + return 2; + } + if (othervn->getTempType()->getMetatype() == TYPE_PTR) // Check if othervn marked as ptr + return 2; + off = othervn->getOffset(); + return (off == 0) ? 0 : 1; + } + return 2; +} + TypeOpIntSub::TypeOpIntSub(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_SUB,"-",TYPE_INT,TYPE_INT) { @@ -1101,6 +1388,20 @@ Datatype *TypeOpIntXor::getOutputToken(const PcodeOp *op,CastStrategy *castStrat return castStrategy->arithmeticOutputStandard(op); } +Datatype *TypeOpIntXor::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if (!alttype->isPowerOfTwo()) return (Datatype *)0; // Only propagate flag enums + Datatype *newtype; + if (invn->isSpacebase()) { + AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); + newtype = tlst->getTypePointer(alttype->getSize(),tlst->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); + } + else + newtype = alttype; + return newtype; +} + TypeOpIntAnd::TypeOpIntAnd(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_AND,"&",TYPE_UINT,TYPE_UINT) { @@ -1115,6 +1416,20 @@ Datatype *TypeOpIntAnd::getOutputToken(const PcodeOp *op,CastStrategy *castStrat return castStrategy->arithmeticOutputStandard(op); } +Datatype *TypeOpIntAnd::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if (!alttype->isPowerOfTwo()) return (Datatype *)0; // Only propagate flag enums + Datatype *newtype; + if (invn->isSpacebase()) { + AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); + newtype = tlst->getTypePointer(alttype->getSize(),tlst->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); + } + else + newtype = alttype; + return newtype; +} + TypeOpIntOr::TypeOpIntOr(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_OR,"|",TYPE_UINT,TYPE_UINT) { @@ -1129,6 +1444,20 @@ Datatype *TypeOpIntOr::getOutputToken(const PcodeOp *op,CastStrategy *castStrate return castStrategy->arithmeticOutputStandard(op); } +Datatype *TypeOpIntOr::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if (!alttype->isPowerOfTwo()) return (Datatype *)0; // Only propagate flag enums + Datatype *newtype; + if (invn->isSpacebase()) { + AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); + newtype = tlst->getTypePointer(alttype->getSize(),tlst->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); + } + else + newtype = alttype; + return newtype; +} + TypeOpIntLeft::TypeOpIntLeft(TypeFactory *t) : TypeOpBinary(t,CPUI_INT_LEFT,"<<",TYPE_INT,TYPE_INT) { @@ -1148,7 +1477,7 @@ Datatype *TypeOpIntLeft::getInputLocal(const PcodeOp *op,int4 slot) const Datatype *TypeOpIntLeft::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - Datatype *res1 = op->getIn(0)->getHigh()->getType(); + Datatype *res1 = op->getIn(0)->getHighTypeReadFacing(op); if (res1->getMetatype() == TYPE_BOOL) res1 = tlst->getBase(res1->getSize(),TYPE_INT); return res1; @@ -1176,7 +1505,7 @@ Datatype *TypeOpIntRight::getInputCast(const PcodeOp *op,int4 slot,const CastStr if (slot == 0) { const Varnode *vn = op->getIn(0); Datatype *reqtype = op->inputTypeLocal(slot); - Datatype *curtype = vn->getHigh()->getType(); + Datatype *curtype = vn->getHighTypeReadFacing(op); int4 promoType = castStrategy->intPromotionType(vn); if (promoType != CastStrategy::NO_PROMOTION && ((promoType & CastStrategy::UNSIGNED_EXTENSION)==0)) return reqtype; @@ -1188,7 +1517,7 @@ Datatype *TypeOpIntRight::getInputCast(const PcodeOp *op,int4 slot,const CastStr Datatype *TypeOpIntRight::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - Datatype *res1 = op->getIn(0)->getHigh()->getType(); + Datatype *res1 = op->getIn(0)->getHighTypeReadFacing(op); if (res1->getMetatype() == TYPE_BOOL) res1 = tlst->getBase(res1->getSize(),TYPE_INT); return res1; @@ -1218,7 +1547,7 @@ Datatype *TypeOpIntSright::getInputCast(const PcodeOp *op,int4 slot,const CastSt if (slot == 0) { const Varnode *vn = op->getIn(0); Datatype *reqtype = op->inputTypeLocal(slot); - Datatype *curtype = vn->getHigh()->getType(); + Datatype *curtype = vn->getHighTypeReadFacing(op); int4 promoType = castStrategy->intPromotionType(vn); if (promoType != CastStrategy::NO_PROMOTION && ((promoType & CastStrategy::SIGNED_EXTENSION)==0)) return reqtype; @@ -1238,7 +1567,7 @@ Datatype *TypeOpIntSright::getInputLocal(const PcodeOp *op,int4 slot) const Datatype *TypeOpIntSright::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - Datatype *res1 = op->getIn(0)->getHigh()->getType(); + Datatype *res1 = op->getIn(0)->getHighTypeReadFacing(op); if (res1->getMetatype() == TYPE_BOOL) res1 = tlst->getBase(res1->getSize(),TYPE_INT); return res1; @@ -1271,7 +1600,7 @@ Datatype *TypeOpIntDiv::getInputCast(const PcodeOp *op,int4 slot,const CastStrat { const Varnode *vn = op->getIn(slot); Datatype *reqtype = op->inputTypeLocal(slot); - Datatype *curtype = vn->getHigh()->getType(); + Datatype *curtype = vn->getHighTypeReadFacing(op); int4 promoType = castStrategy->intPromotionType(vn); if (promoType != CastStrategy::NO_PROMOTION && ((promoType & CastStrategy::UNSIGNED_EXTENSION)==0)) return reqtype; @@ -1291,7 +1620,7 @@ Datatype *TypeOpIntSdiv::getInputCast(const PcodeOp *op,int4 slot,const CastStra { const Varnode *vn = op->getIn(slot); Datatype *reqtype = op->inputTypeLocal(slot); - Datatype *curtype = vn->getHigh()->getType(); + Datatype *curtype = vn->getHighTypeReadFacing(op); int4 promoType = castStrategy->intPromotionType(vn); if (promoType != CastStrategy::NO_PROMOTION && ((promoType & CastStrategy::SIGNED_EXTENSION)==0)) return reqtype; @@ -1311,7 +1640,7 @@ Datatype *TypeOpIntRem::getInputCast(const PcodeOp *op,int4 slot,const CastStrat { const Varnode *vn = op->getIn(slot); Datatype *reqtype = op->inputTypeLocal(slot); - Datatype *curtype = vn->getHigh()->getType(); + Datatype *curtype = vn->getHighTypeReadFacing(op); int4 promoType = castStrategy->intPromotionType(vn); if (promoType != CastStrategy::NO_PROMOTION && ((promoType & CastStrategy::UNSIGNED_EXTENSION)==0)) return reqtype; @@ -1331,7 +1660,7 @@ Datatype *TypeOpIntSrem::getInputCast(const PcodeOp *op,int4 slot,const CastStra { const Varnode *vn = op->getIn(slot); Datatype *reqtype = op->inputTypeLocal(slot); - Datatype *curtype = vn->getHigh()->getType(); + Datatype *curtype = vn->getHighTypeReadFacing(op); int4 promoType = castStrategy->intPromotionType(vn); if (promoType != CastStrategy::NO_PROMOTION && ((promoType & CastStrategy::SIGNED_EXTENSION)==0)) return reqtype; @@ -1499,6 +1828,20 @@ TypeOpMulti::TypeOpMulti(TypeFactory *t) : TypeOp(t,CPUI_MULTIEQUAL,"?") behave = new OpBehavior(CPUI_MULTIEQUAL,false,true); // Dummy behavior } +Datatype *TypeOpMulti::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot!=-1)&&(outslot!=-1)) return (Datatype *)0; // Must propagate input <-> output + Datatype *newtype; + if (invn->isSpacebase()) { + AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); + newtype = tlst->getTypePointer(alttype->getSize(),tlst->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); + } + else + newtype = alttype; + return newtype; +} + void TypeOpMulti::printRaw(ostream &s,const PcodeOp *op) { @@ -1537,6 +1880,23 @@ Datatype *TypeOpIndirect::getInputLocal(const PcodeOp *op,int4 slot) const return tlst->getTypePointer(op->getIn(0)->getSize(),ct,spc->getWordSize()); // Second parameter is code pointer } +Datatype *TypeOpIndirect::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if (op->isIndirectCreation()) return (Datatype *)0; + if ((inslot==1)||(outslot==1)) return (Datatype *)0; + if ((inslot!=-1)&&(outslot!=-1)) return (Datatype *)0; // Must propagate input <-> output + + Datatype *newtype; + if (invn->isSpacebase()) { + AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); + newtype = tlst->getTypePointer(alttype->getSize(),tlst->getBase(1,TYPE_UNKNOWN),spc->getWordSize()); + } + else + newtype = alttype; + return newtype; +} + void TypeOpIndirect::printRaw(ostream &s,const PcodeOp *op) { @@ -1572,7 +1932,7 @@ Datatype *TypeOpPiece::getOutputToken(const PcodeOp *op,CastStrategy *castStrate { const Varnode *vn = op->getOut(); - Datatype *dt = vn->getHigh()->getType(); + Datatype *dt = vn->getHighTypeDefFacing(); type_metatype meta = dt->getMetatype(); if ((meta == TYPE_INT)||(meta == TYPE_UINT)) // PIECE casts to uint or int, based on output return dt; @@ -1598,13 +1958,99 @@ string TypeOpSubpiece::getOperatorName(const PcodeOp *op) const Datatype *TypeOpSubpiece::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { + int4 offset; + Datatype *parent; const Varnode *vn = op->getOut(); - Datatype *dt = vn->getHigh()->getType(); // SUBPIECE prints as cast to whatever its output is + const TypeField *field = testExtraction(true, op, parent, offset); + if (field != (const TypeField *)0) { + if (vn->getSize() == field->type->getSize()) + return field->type; + } + Datatype *dt = vn->getHighTypeDefFacing(); // SUBPIECE prints as cast to whatever its output is if (dt->getMetatype() != TYPE_UNKNOWN) return dt; return tlst->getBase(vn->getSize(),TYPE_INT); // If output is unknown, treat as cast to int } +Datatype *TypeOpSubpiece::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if (inslot != 0 || outslot != -1) return (Datatype *)0; // Propagation must be from in0 to out + int4 byteOff; + int4 newoff; + const TypeField *field; + if (alttype->getMetatype() == TYPE_UNION) { + // NOTE: We use an artificial slot here to store the field being truncated to + // as the facing data-type for slot 0 is already to the parent (this TYPE_UNION) + byteOff = computeByteOffsetForComposite(op); + field = ((TypeUnion *)alttype)->resolveTruncation(byteOff,op,1,newoff); + } + else if (alttype->getMetatype() == TYPE_STRUCT) { + int4 byteOff = computeByteOffsetForComposite(op); + field = ((TypeStruct *)alttype)->resolveTruncation(byteOff, outvn->getSize(), &newoff); + } + else + return (Datatype *)0; + if (field != (const TypeField *)0 && newoff == 0 && field->type->getSize() == outvn->getSize()) { + return field->type; + } + return (Datatype *)0; +} + +/// \brief Test if the given SUBPIECE PcodeOp is acting as a field extraction operator +/// +/// For packed structures with small fields, SUBPIECE may be used to extract the field. +/// Test if the HighVariable being truncated is a structure and if the truncation produces +/// part of a \e single field. If so return the TypeField descriptor, and pass back the parent +/// structure and the number of least significant bytes that have been truncated from the field. +/// \param useHigh is \b true if the HighVariable data-type is checked, otherwise the Varnode data-type is used +/// \param op is the given SUBPIECE PcodeOp +/// \param parent holds the parent Datatype being passed back +/// \param offset holds the LSB offset being passed back +/// \return the TypeField if a field is being extracted or null otherwise +const TypeField *TypeOpSubpiece::testExtraction(bool useHigh,const PcodeOp *op,Datatype *&parent,int4 &offset) + +{ + const Varnode *vn = op->getIn(0); + Datatype *ct = useHigh ? vn->getHigh()->getType() : vn->getType(); + if (ct->getMetatype() == TYPE_STRUCT) { + parent = ct; + int4 byteOff = computeByteOffsetForComposite(op); + return ((TypeStruct *)ct)->resolveTruncation(byteOff,op->getOut()->getSize(),&offset); + } + else if (ct->getMetatype() == TYPE_UNION) { + const Funcdata *fd = op->getParent()->getFuncdata(); + const ResolvedUnion *res = fd->getUnionField(ct, op, 1); // Use artificial slot + if (res != (const ResolvedUnion *)0 && res->getFieldNum() >= 0) { + parent = ct; + offset = 0; + return ((TypeUnion *)ct)->getField(res->getFieldNum()); + } + } + return (const TypeField *)0; +} + +/// \brief Compute the byte offset into an assumed composite data-type produced by the given CPUI_SUBPIECE +/// +/// If the input Varnode is a composite data-type, the extracted result of the SUBPIECE represent a +/// range of bytes starting at a particular offset within the data-type. Return this offset, which +/// depends on endianness of the input. +/// \param op is the given CPUI_SUBPIECE +/// \return the byte offset into the composite represented by the output of the SUBPIECE +int4 TypeOpSubpiece::computeByteOffsetForComposite(const PcodeOp *op) + +{ + int4 outSize = op->getOut()->getSize(); + int4 lsb = (int4)op->getIn(1)->getOffset(); + const Varnode *vn = op->getIn(0); + int byteOff; + if (vn->getSpace()->isBigEndian()) + byteOff = vn->getSize() - outSize - lsb; + else + byteOff = lsb; + return byteOff; +} + TypeOpCast::TypeOpCast(TypeFactory *t) : TypeOp(t,CPUI_CAST,"(cast)") { @@ -1642,7 +2088,7 @@ Datatype *TypeOpPtradd::getOutputLocal(const PcodeOp *op) const Datatype *TypeOpPtradd::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - return op->getIn(0)->getHigh()->getType(); // Cast to the input data-type + return op->getIn(0)->getHighTypeReadFacing(op); // Cast to the input data-type } Datatype *TypeOpPtradd::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const @@ -1650,13 +2096,28 @@ Datatype *TypeOpPtradd::getInputCast(const PcodeOp *op,int4 slot,const CastStrat { if (slot==0) { // The operation expects the type of the VARNODE // not the (possibly different) type of the HIGH - Datatype *reqtype = op->getIn(0)->getType(); - Datatype *curtype = op->getIn(0)->getHigh()->getType(); + Datatype *reqtype = op->getIn(0)->getTypeReadFacing(op); + Datatype *curtype = op->getIn(0)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,false,false); } return TypeOp::getInputCast(op,slot,castStrategy); } +Datatype *TypeOpPtradd::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot == 2) || (outslot == 2)) return (Datatype *)0; // Don't propagate along this edge + if ((inslot != -1) && (outslot != -1)) return (Datatype *)0; // Must propagate input <-> output + type_metatype metain = alttype->getMetatype(); + if (metain != TYPE_PTR) return (Datatype *)0; + Datatype *newtype; + if (inslot == -1) // Propagating output to input + newtype = op->getIn(outslot)->getTempType(); // Don't propagate pointer types this direction + else + newtype = TypeOpIntAdd::propagateAddIn2Out(alttype,tlst,op,inslot); + return newtype; +} + void TypeOpPtradd::printRaw(ostream &s,const PcodeOp *op) { @@ -1698,8 +2159,8 @@ Datatype *TypeOpPtrsub::getInputCast(const PcodeOp *op,int4 slot,const CastStrat { if (slot==0) { // The operation expects the type of the VARNODE // not the (possibly different) type of the HIGH - Datatype *reqtype = op->getIn(0)->getType(); - Datatype *curtype = op->getIn(0)->getHigh()->getType(); + Datatype *reqtype = op->getIn(0)->getTypeReadFacing(op); + Datatype *curtype = op->getIn(0)->getHighTypeReadFacing(op); return castStrategy->castStandard(reqtype,curtype,false,false); } return TypeOp::getInputCast(op,slot,castStrategy); @@ -1708,7 +2169,7 @@ Datatype *TypeOpPtrsub::getInputCast(const PcodeOp *op,int4 slot,const CastStrat Datatype *TypeOpPtrsub::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - TypePointer *ptype = (TypePointer *)op->getIn(0)->getHigh()->getType(); + TypePointer *ptype = (TypePointer *)op->getIn(0)->getHighTypeReadFacing(op); if (ptype->getMetatype() == TYPE_PTR) { uintb offset = AddrSpace::addressToByte(op->getIn(1)->getOffset(),ptype->getWordSize()); uintb unusedOffset; @@ -1722,6 +2183,20 @@ Datatype *TypeOpPtrsub::getOutputToken(const PcodeOp *op,CastStrategy *castStrat return TypeOp::getOutputToken(op,castStrategy); } +Datatype *TypeOpPtrsub::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot!=-1)&&(outslot!=-1)) return (Datatype *)0; // Must propagate input <-> output + type_metatype metain= alttype->getMetatype(); + if (metain != TYPE_PTR) return (Datatype *)0; + Datatype *newtype; + if (inslot == -1) // Propagating output to input + newtype = op->getIn(outslot)->getTempType(); // Don't propagate pointer types this direction + else + newtype = TypeOpIntAdd::propagateAddIn2Out(alttype,tlst,op,inslot); + return newtype; +} + void TypeOpPtrsub::printRaw(ostream &s,const PcodeOp *op) { @@ -1759,7 +2234,7 @@ void TypeOpSegment::printRaw(ostream &s,const PcodeOp *op) Datatype *TypeOpSegment::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { - return op->getIn(2)->getHigh()->getType(); // Assume type of ptr portion + return op->getIn(2)->getHighTypeReadFacing(op); // Assume type of ptr portion } Datatype *TypeOpSegment::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const @@ -1773,6 +2248,20 @@ Datatype *TypeOpSegment::getInputCast(const PcodeOp *op,int4 slot,const CastStra // { // } +Datatype *TypeOpSegment::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + // Must propagate slot2 <-> output + if ((inslot==0)||(inslot==1)) return (Datatype *)0; + if ((outslot==0)||(outslot==1)) return (Datatype *)0; + if (invn->isSpacebase()) return (Datatype *)0; + type_metatype metain = alttype->getMetatype(); + if (metain != TYPE_PTR) return (Datatype *)0; + AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); + Datatype *btype = ((TypePointer *)alttype)->getPtrTo(); + return tlst->getTypePointer(outvn->getSize(),btype,spc->getWordSize()); +} + TypeOpCpoolref::TypeOpCpoolref(TypeFactory *t) : TypeOp(t,CPUI_CPOOLREF,"cpoolref") { @@ -1831,6 +2320,16 @@ TypeOpNew::TypeOpNew(TypeFactory *t) : TypeOp(t,CPUI_NEW,"new") behave = new OpBehavior(CPUI_NEW,false,true); // Dummy behavior } +Datatype *TypeOpNew::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot) +{ + if ((inslot != 0)||(outslot != -1)) return (Datatype *)0; + Varnode *vn0 = op->getIn(0); + if (!vn0->isWritten()) return (Datatype *)0; // Don't propagate + if (vn0->getDef()->code() != CPUI_CPOOLREF) return (Datatype *)0; + return alttype; // Propagate cpool result as result of new operator +} + void TypeOpNew::printRaw(ostream &s,const PcodeOp *op) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh index 5f4552b909..e5206355b0 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh @@ -118,6 +118,10 @@ public: /// \brief Find the data-type of the input to a specific PcodeOp virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + /// \brief Propagate an incoming data-type across a specific PcodeOp + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + /// \brief Push the specific PcodeOp to the emitter's RPN stack /// /// Given a specific language and PcodeOp, emit the expression rooted at the operation. @@ -208,6 +212,8 @@ public: TypeOpCopy(TypeFactory *t); ///< Constructor virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opCopy(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -219,6 +225,8 @@ public: // virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opLoad(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -229,6 +237,8 @@ public: TypeOpStore(TypeFactory *t); ///< Constructor // virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opStore(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -304,6 +314,10 @@ public: TypeOpEqual(TypeFactory *t); ///< Constructor virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntEqual(op); } virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + static Datatype *propagateAcrossCompare(Datatype *alttype,TypeFactory *typegrp,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); }; /// \brief Information about the INT_NOTEQUAL op-code @@ -312,22 +326,28 @@ public: TypeOpNotEqual(TypeFactory *t); ///< Constructor virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntNotEqual(op); } virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); }; /// \brief Information about the INT_SLESS op-code class TypeOpIntSless : public TypeOpBinary { public: TypeOpIntSless(TypeFactory *t); ///< Constructor - virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntSless(op); } virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntSless(op); } }; /// \brief Information about the INT_SLESSEQUAL op-code class TypeOpIntSlessEqual : public TypeOpBinary { public: TypeOpIntSlessEqual(TypeFactory *t); ///< Constructor - virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntSlessEqual(op); } virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntSlessEqual(op); } }; /// \brief Information about the INT_LESS op-code @@ -336,6 +356,8 @@ public: TypeOpIntLess(TypeFactory *t); ///< Constructor virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntLess(op); } virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); }; /// \brief Information about the INT_LESSEQUAL op-code @@ -344,6 +366,8 @@ public: TypeOpIntLessEqual(TypeFactory *t); ///< Constructor virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntLessEqual(op); } virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); }; /// \brief Information about the INT_ZEXT op-code @@ -370,6 +394,10 @@ public: TypeOpIntAdd(TypeFactory *t); ///< Constructor virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntAdd(op); } virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + static Datatype *propagateAddIn2Out(Datatype *alttype,TypeFactory *typegrp,PcodeOp *op,int4 inslot); + static int4 propagateAddPointer(uintb &off,PcodeOp *op,int4 slot,int4 sz); }; /// \brief Information about the INT_SUB op-code @@ -424,24 +452,30 @@ public: class TypeOpIntXor : public TypeOpBinary { public: TypeOpIntXor(TypeFactory *t); ///< Constructor - virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntXor(op); } virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntXor(op); } }; /// \brief Information about the INT_AND op-code class TypeOpIntAnd : public TypeOpBinary { public: TypeOpIntAnd(TypeFactory *t); ///< Constructor - virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntAnd(op); } virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntAnd(op); } }; /// \brief Information about the INT_OR op-code class TypeOpIntOr : public TypeOpBinary { public: TypeOpIntOr(TypeFactory *t); ///< Constructor - virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntOr(op); } virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); + virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIntOr(op); } }; /// \brief Information about the INT_LEFT op-code @@ -672,6 +706,8 @@ public: class TypeOpMulti : public TypeOp { public: TypeOpMulti(TypeFactory *t); ///< Constructor + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opMultiequal(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -681,6 +717,8 @@ class TypeOpIndirect : public TypeOp { public: TypeOpIndirect(TypeFactory *t); ///< Constructor virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opIndirect(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -701,8 +739,12 @@ public: // virtual Datatype *getOutputLocal(const PcodeOp *op) const; // virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual string getOperatorName(const PcodeOp *op) const; virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opSubpiece(op); } + static const TypeField *testExtraction(bool useHigh,const PcodeOp *op,Datatype *&parent,int4 &offset); + static int4 computeByteOffsetForComposite(const PcodeOp *op); }; /// \brief Information about the CAST op-code @@ -723,6 +765,8 @@ public: virtual Datatype *getOutputLocal(const PcodeOp *op) const; virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opPtradd(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -735,6 +779,8 @@ public: virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opPtrsub(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -754,6 +800,8 @@ public: // virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opSegmentOp(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; @@ -775,6 +823,8 @@ class TypeOpNew : public TypeOp { public: TypeOpNew(TypeFactory *t); ///< Constructor virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const { return (Datatype *)0; } // Never needs casting + virtual Datatype *propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, + int4 inslot,int4 outslot); virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opNewOp(op); } virtual void printRaw(ostream &s,const PcodeOp *op); }; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.cc new file mode 100644 index 0000000000..74ad895b8b --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.cc @@ -0,0 +1,1102 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "unionresolve.hh" +#include "funcdata.hh" + +/// The original parent must either be a union, a pointer to a union, or a partial union. +/// The object is set up initially to resolve to the parent. +/// \param parent is the original parent data-type +ResolvedUnion::ResolvedUnion(Datatype *parent) + +{ + baseType = parent; + if (baseType->getMetatype() == TYPE_PTR) + baseType = ((TypePointer *)baseType)->getPtrTo(); + if (baseType->getMetatype() != TYPE_UNION && baseType->getMetatype() != TYPE_STRUCT) + throw LowlevelError("Unsupported data-type for ResolveUnion"); + resolve = parent; + fieldNum = -1; + lock = false; +} + +/// The original parent must be a union or structure. +/// \param parent is the original parent +/// \param fldNum is the index of the particular field to resolve to (or -1 to resolve to parent) +/// \param typegrp is a TypeFactory used to construct the resolved data-type of the field +ResolvedUnion::ResolvedUnion(Datatype *parent,int4 fldNum,TypeFactory &typegrp) + +{ + baseType = parent; + fieldNum = fldNum; + lock = false; + if (fldNum < 0) + resolve = parent; + else { + if (parent->getMetatype() == TYPE_PTR) { + TypePointer *pointer = (TypePointer *)parent; + Datatype *field = pointer->getPtrTo()->getDepend(fldNum); + resolve = typegrp.getTypePointer(parent->getSize(),field,pointer->getWordSize()); + } + else + resolve = parent->getDepend(fldNum); + } +} + +/// \param parent is a parent data-type that needs to be resolved +/// \param op is the PcodeOp reading/writing the \b parent data-type +/// \param slot is the slot (>=0 for input, -1 for output) accessing the \b parent +ResolveEdge::ResolveEdge(const Datatype *parent,const PcodeOp *op,int4 slot) + +{ + opTime = op->getTime(); + encoding = slot; + if (parent->getMetatype() == TYPE_PTR) { + typeId = ((TypePointer *)parent)->getPtrTo()->getId(); // Strip pointer + encoding += 0x1000; // Encode the fact that a pointer is getting accessed + } + else + typeId = parent->getId(); +} + +const int4 ScoreUnionFields::threshold = 256; +const int4 ScoreUnionFields::maxPasses = 6; +const int4 ScoreUnionFields::maxTrials = 1024; + +/// If the \b op is adding a constant size or a multiple of a constant size to the given input slot, where the +/// size is at least as large as the union, return \b true. +/// \param op is the given PcodeOp +/// \param inslot is given input slot +/// \return \b true if \b op is doing array arithmetic with elements at least as large as the union +bool ScoreUnionFields::testArrayArithmetic(PcodeOp *op,int4 inslot) + +{ + if (op->code() == CPUI_INT_ADD) { + Varnode *vn = op->getIn(1 - inslot); + if (vn->isConstant()) { + if (vn->getOffset() >= result.baseType->getSize()) + return true; // Array with union elements + } + else if (vn->isWritten()) { + PcodeOp *multOp = vn->getDef(); + if (multOp->code() == CPUI_INT_MULT) { + Varnode *vn2 = multOp->getIn(1); + if (vn2->isConstant() && vn2->getOffset() >= result.baseType->getSize()) + return true;// Array with union elements + } + } + } + else if (op->code() == CPUI_PTRADD) { + Varnode *vn = op->getIn(2); + if (vn->getOffset() >= result.baseType->getSize()) + return true; + } + return false; +} + +/// Identify cases where we know the union shouldn't be resolved to a field. +/// \param op is the PcodeOp manipulating the union variable +/// \param inslot is -1 if the union is the output, >=0 if the union is an input to the op +/// \param parent is the parent union or pointer to union +/// \return \b true if the union should \e not be resolved to a field +bool ScoreUnionFields::testSimpleCases(PcodeOp *op,int4 inslot,Datatype *parent) + +{ + if (op->isMarker()) + return true; // Propagate raw union across MULTIEQUAL and INDIRECT + if (parent->getMetatype() == TYPE_PTR) { + if (inslot < 0) + return true; // Don't resolve pointers "up", there's only 1 possibility for assignment + if (testArrayArithmetic(op, inslot)) + return true; + } + if (op->code() != CPUI_COPY) + return false; // A more complicated case + if (inslot < 0) + return false; // Generally we don't want to propagate union backward thru COPY + if (op->getOut()->isTypeLock()) + return false; // Do the full scoring + return true; // Assume we don't have to extract a field if copying +} + +/// A trial that encounters a locked data-type does not propagate through it but scores +/// the trial data-type against the locked data-type. +/// \param ct is the trial data-type +/// \param lockType is the locked data-type +/// \return the score +int4 ScoreUnionFields::scoreLockedType(Datatype *ct,Datatype *lockType) + +{ + int score = 0; + + if (lockType == ct) + score += 5; // Perfect match + + while(ct->getMetatype() == TYPE_PTR) { + if (lockType->getMetatype() != TYPE_PTR) break; + score += 5; + ct = ((TypePointer *)ct)->getPtrTo(); + lockType = ((TypePointer *)lockType)->getPtrTo(); + } + + type_metatype ctMeta = ct->getMetatype(); + type_metatype vnMeta = lockType->getMetatype(); + if (ctMeta == vnMeta) { + if (ctMeta == TYPE_STRUCT || ctMeta == TYPE_UNION || ctMeta == TYPE_ARRAY || ctMeta == TYPE_CODE) + score += 10; + else + score += 3; + } + else { + if ((ctMeta == TYPE_INT && vnMeta == TYPE_UINT)||(ctMeta == TYPE_UINT && vnMeta == TYPE_INT)) + score -= 1; + else + score -= 5; + if (ct->getSize() != lockType->getSize()) + score -= 2; + } + return score; +} + +/// Look up the call-specs for the given CALL. If the inputs are locked, find the corresponding +/// parameter and score the trial data-type against it. +/// \param ct is the trial data-type +/// \param callOp is the CALL +/// \param paramSlot is the input slot of the trial data-type +/// \return the score +int4 ScoreUnionFields::scoreParameter(Datatype *ct,const PcodeOp *callOp,int4 paramSlot) + +{ + const Funcdata *fd = callOp->getParent()->getFuncdata(); + + FuncCallSpecs *fc = fd->getCallSpecs(callOp); + if (fc != (FuncCallSpecs *)0 && fc->isInputLocked() && fc->numParams() > paramSlot) { + return scoreLockedType(ct,fc->getParam(paramSlot)->getType()); + } + type_metatype meta = ct->getMetatype(); + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE) + return -1; // Vaguely unlikely thing to pass as a param + return 0; +} + +/// Look up the call-specs for the given CALL. If the output is locked, +/// score the trial data-type against it. +/// \param ct is the trial data-type +/// \param callOp is the CALL +/// \return the score +int4 ScoreUnionFields::scoreReturnType(Datatype *ct,const PcodeOp *callOp) + +{ + const Funcdata *fd = callOp->getParent()->getFuncdata(); + + FuncCallSpecs *fc = fd->getCallSpecs(callOp); + if (fc != (FuncCallSpecs *)0 && fc->isOutputLocked()) { + return scoreLockedType(ct,fc->getOutputType()); + } + type_metatype meta = ct->getMetatype(); + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE) + return -1; // Vaguely unlikely thing to return from a function + return 0; +} + +/// Test if the data-type is a pointer and if the pointed-to data-type is +/// compatible with the size of the value being loaded or stored. A \b score is +/// passed back for how closely the data-type fits this scenario, and if it +/// does we return the data-type of the pointer value. +/// \param ct is the trial data-type +/// \param vn is the Varnode holding the value being loaded or stored +/// \param score is used to pass back the score +/// \return the data-type of the value or null +Datatype *ScoreUnionFields::derefPointer(Datatype *ct,Varnode *vn,int4 &score) + +{ + Datatype *resType = (Datatype *)0; + score = 0; + if (ct->getMetatype() == TYPE_PTR) { + Datatype *ptrto = ((TypePointer *)ct)->getPtrTo(); + while(ptrto != (Datatype *)0 && ptrto->getSize() > vn->getSize()) { + uintb newoff; + ptrto = ptrto->getSubType(0, &newoff); + } + if (ptrto != (Datatype *)0 && ptrto->getSize() == vn->getSize()) { + score = 10; + resType = ptrto; + } + } + else + score = -10; + return resType; +} + +/// If the Varnode has already been visited, no new trials are created +/// \param vn is the given Varnode +/// \param ct is the data-type to associate with the trial +/// \param scoreIndex is the field index to score the trial against +/// \param isArray is \b true if the data-type to fit is a pointer to an array +void ScoreUnionFields::newTrialsDown(Varnode *vn,Datatype *ct,int4 scoreIndex,bool isArray) + +{ + VisitMark mark(vn,scoreIndex); + if (!visited.insert(mark).second) + return; // Already visited this Varnode + if (vn->isTypeLock()) { + scores[scoreIndex] += scoreLockedType(ct, vn->getType()); + return; // Don't propagate through locked Varnode + } + list::const_iterator piter; + for(piter = vn->beginDescend();piter != vn->endDescend();++piter) { + PcodeOp *op = *piter; + trialNext.emplace_back(op,op->getSlot(vn),ct,scoreIndex,isArray); + } +} + +/// If the input slot is a Varnode that has already been visited, no new trial is created +/// \param op is the PcodeOp with the given slot +/// \param slot is the index of the given input slot +/// \param ct is the data-type to associate with the trial +/// \param scoreIndex is the field index to score the trial against +/// \param isArray is \b true if the data-type to fit is a pointer to an array +void ScoreUnionFields::newTrials(PcodeOp *op,int4 slot,Datatype *ct,int4 scoreIndex,bool isArray) + +{ + Varnode *vn = op->getIn(slot); + VisitMark mark(vn,scoreIndex); + if (!visited.insert(mark).second) + return; // Already visited this Varnode + if (vn->isTypeLock()) { + scores[scoreIndex] += scoreLockedType(ct, vn->getType()); + return; // Don't propagate through locked Varnode + } + trialNext.emplace_back(vn,ct,scoreIndex,isArray); // Try to fit up + list::const_iterator iter; + for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { + PcodeOp *readOp = *iter; + int4 inslot = readOp->getSlot(vn); + if (readOp == op && inslot == slot) + continue; // Don't go down PcodeOp we came from + trialNext.emplace_back(readOp,inslot,ct, scoreIndex,isArray); + } +} + +/// The trial's data-type is fitted to its PcodeOp as the incoming Varnode and a +/// score is computed and added to the score for the trial's union field. The fitting may +/// produce a new data-type which indicates scoring for the trial recurses into the output. +/// This method builds trials for any new data-type unless \b lastLevel is \b true +/// Varnode of its PcodeOp. +/// \param trial is the given trial +/// \param lastLevel is \b true if the method should skip building new trials +void ScoreUnionFields::scoreTrialDown(const Trial &trial,bool lastLevel) + +{ + if (trial.direction == Trial::fit_up) + return; // Trial doesn't push in this direction + Datatype *resType = (Datatype *)0; // Assume by default we don't propagate + type_metatype meta = trial.fitType->getMetatype(); + int4 score = 0; + switch(trial.op->code()) { + case CPUI_COPY: + case CPUI_MULTIEQUAL: + case CPUI_INDIRECT: + resType = trial.fitType; // No score, but we can propagate + break; + case CPUI_LOAD: + resType = derefPointer(trial.fitType, trial.op->getOut(), score); + break; + case CPUI_STORE: + if (trial.inslot == 1) { + Datatype *ptrto = derefPointer(trial.fitType,trial.op->getIn(2),score); + if (ptrto != (Datatype*)0) { + if (!lastLevel) + newTrials(trial.op,2,ptrto,trial.scoreIndex,trial.array); // Propagate to value being STOREd + } + } + else if (trial.inslot == 2) { + if (meta == TYPE_CODE) + score = -5; + else + score = 1; + } + break; + case CPUI_CBRANCH: + if (meta == TYPE_BOOL) + score = 10; + else + score = -10; + break; + case CPUI_BRANCHIND: + if (meta == TYPE_PTR || meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || + meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else + score = 1; + break; + case CPUI_CALL: + case CPUI_CALLOTHER: + if (trial.inslot > 0) + score = scoreParameter(trial.fitType, trial.op, trial.inslot-1); + break; + case CPUI_CALLIND: + if (trial.inslot == 0) { + if (meta == TYPE_PTR) { + Datatype *ptrto = ((TypePointer*)trial.fitType)->getPtrTo(); + if (ptrto->getMetatype() == TYPE_CODE) { + score = 10; + } + else { + score = -10; + } + } + } + else { + score = scoreParameter(trial.fitType, trial.op, trial.inslot-1); + } + break; + case CPUI_RETURN: + // We could check for locked return data-type + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE) + score = -1; + break; + case CPUI_INT_EQUAL: + case CPUI_INT_NOTEQUAL: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -1; + else + score = 1; + break; + case CPUI_INT_SLESS: + case CPUI_INT_SLESSEQUAL: + case CPUI_INT_SCARRY: + case CPUI_INT_SBORROW: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_UNKNOWN || meta == TYPE_UINT || meta == TYPE_BOOL) + score = -1; + else + score = 5; + break; + case CPUI_INT_LESS: + case CPUI_INT_LESSEQUAL: + case CPUI_INT_CARRY: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_UNKNOWN || meta == TYPE_UINT) + score = 5; + else if (meta == TYPE_INT) + score = -5; + break; + case CPUI_INT_ZEXT: + if (meta == TYPE_UINT) + score = 2; + else if (meta == TYPE_INT || meta == TYPE_BOOL) + score = 1; + else if (meta == TYPE_UNKNOWN) + score = 0; + else // struct,union,ptr,array,code,float + score = -5; + break; + case CPUI_INT_SEXT: + if (meta == TYPE_INT) + score = 2; + else if (meta == TYPE_UINT || meta == TYPE_BOOL) + score = 1; + else if (meta == TYPE_UNKNOWN) + score = 0; + else // struct,union,ptr,array,code,float + score = -5; + break; + case CPUI_INT_ADD: + case CPUI_INT_SUB: + case CPUI_PTRSUB: + if (meta == TYPE_PTR) { + if (trial.inslot >= 0) { + Varnode *vn = trial.op->getIn(1-trial.inslot); + if (vn->isConstant()) { + TypePointer *baseType = (TypePointer *)trial.fitType; + uintb off = vn->getOffset(); + uintb parOff; + TypePointer *par; + resType = baseType->downChain(off,par,parOff,trial.array,typegrp); + if (resType != (Datatype*)0) + score = 5; + } + else { + if (trial.array) { + score = 1; + int4 elSize = 1; + if (vn->isWritten()) { + PcodeOp *multOp = vn->getDef(); + if (multOp->code() == CPUI_INT_MULT) { + Varnode *multVn = multOp->getIn(1); + if (multVn->isConstant()) + elSize = (int4)multVn->getOffset(); + } + } + TypePointer *baseType = (TypePointer *)trial.fitType; + if (baseType->getPtrTo()->getSize() == elSize) { + score = 5; + resType = trial.fitType; + } + } + else + score = 5; // Indexing into something that is not an array + } + } + } + else if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else + score = 1; + break; + case CPUI_INT_2COMP: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_UNKNOWN || meta == TYPE_BOOL) + score = -1; + else if (meta == TYPE_INT) + score = 5; + break; + case CPUI_INT_NEGATE: + case CPUI_INT_XOR: + case CPUI_INT_AND: + case CPUI_INT_OR: + case CPUI_POPCOUNT: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -1; + else if (meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = 2; + break; + case CPUI_INT_LEFT: + case CPUI_INT_RIGHT: + if (trial.inslot == 0) { + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -1; + else if (meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = 2; + } + else { + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || + meta == TYPE_FLOAT || meta == TYPE_PTR) + score = -5; + else + score = 1; + } + break; + case CPUI_INT_SRIGHT: + if (trial.inslot == 0) { + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_BOOL || meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = -1; + else + score = 2; + } + else { + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || + meta == TYPE_FLOAT || meta == TYPE_PTR) + score = -5; + else + score = 1; + } + break; + case CPUI_INT_MULT: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -10; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -2; + else + score = 5; + break; + case CPUI_INT_DIV: + case CPUI_INT_REM: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -10; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -2; + else if (meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = 5; + break; + case CPUI_INT_SDIV: + case CPUI_INT_SREM: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -10; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -2; + else if (meta == TYPE_INT) + score = 5; + break; + case CPUI_BOOL_NEGATE: + case CPUI_BOOL_AND: + case CPUI_BOOL_XOR: + case CPUI_BOOL_OR: + if (meta == TYPE_BOOL) + score = 10; + else if (meta == TYPE_INT || meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = -1; + else + score = -10; + break; + case CPUI_FLOAT_EQUAL: + case CPUI_FLOAT_NOTEQUAL: + case CPUI_FLOAT_LESS: + case CPUI_FLOAT_LESSEQUAL: + case CPUI_FLOAT_NAN: + case CPUI_FLOAT_ADD: + case CPUI_FLOAT_DIV: + case CPUI_FLOAT_MULT: + case CPUI_FLOAT_SUB: + case CPUI_FLOAT_NEG: + case CPUI_FLOAT_ABS: + case CPUI_FLOAT_SQRT: + case CPUI_FLOAT_FLOAT2FLOAT: + case CPUI_FLOAT_TRUNC: + case CPUI_FLOAT_CEIL: + case CPUI_FLOAT_FLOOR: + case CPUI_FLOAT_ROUND: + if (meta == TYPE_FLOAT) + score = 10; + else + score = -10; + break; + case CPUI_FLOAT_INT2FLOAT: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -10; + else if (meta == TYPE_PTR) + score = -5; + else if (meta == TYPE_INT) + score = 5; + break; + case CPUI_PIECE: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + break; + case CPUI_SUBPIECE: + { + int4 offset = TypeOpSubpiece::computeByteOffsetForComposite(trial.op); + resType = scoreTruncation(trial.fitType, trial.op->getOut(), offset, trial.scoreIndex); + break; + } + case CPUI_PTRADD: + if (meta == TYPE_PTR) { + if (trial.inslot == 0) { + Datatype *ptrto = ((TypePointer *)trial.fitType)->getPtrTo(); + if (ptrto->getSize() == trial.op->getIn(2)->getOffset()) { + score = 10; + resType = trial.fitType; + } + } + else { + score = -10; + } + } + else if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else + score = 1; + break; + case CPUI_SEGMENTOP: + if (trial.inslot == 2) { + if (meta == TYPE_PTR) + score = 5; + else if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else + score = -1; + } + else { + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT || + meta == TYPE_PTR) + score = -2; + } + break; + default: + score = -10; // Doesn't fit + break; + } + scores[trial.scoreIndex] += score; + if (resType != (Datatype *)0 && !lastLevel) + newTrialsDown(trial.op->getOut(), resType, trial.scoreIndex, trial.array); +} + +void ScoreUnionFields::scoreTrialUp(const Trial &trial,bool lastLevel) + +{ + if (trial.direction == Trial::fit_down) + return; // Trial doesn't push in this direction + int score = 0; + if (!trial.vn->isWritten()) { + if (trial.vn->isConstant()) + scoreConstantFit(trial); + return; // Nothing to propagate up through + } + Datatype *resType = (Datatype *)0; // Assume by default we don't propagate + int4 newslot = 0; + type_metatype meta = trial.fitType->getMetatype(); + PcodeOp *def = trial.vn->getDef(); + switch(def->code()) { + case CPUI_COPY: + case CPUI_MULTIEQUAL: + case CPUI_INDIRECT: + resType = trial.fitType; // No score, but we can propagate + newslot = 0; + break; + case CPUI_LOAD: + resType = typegrp.getTypePointer(def->getIn(1)->getSize(),trial.fitType,1); + newslot = 1; // No score, but we can propagate + break; + case CPUI_CALL: + case CPUI_CALLOTHER: + case CPUI_CALLIND: + score = scoreReturnType(trial.fitType, def); + break; + case CPUI_INT_EQUAL: + case CPUI_INT_NOTEQUAL: + case CPUI_INT_SLESS: + case CPUI_INT_SLESSEQUAL: + case CPUI_INT_SCARRY: + case CPUI_INT_SBORROW: + case CPUI_INT_LESS: + case CPUI_INT_LESSEQUAL: + case CPUI_INT_CARRY: + case CPUI_BOOL_NEGATE: + case CPUI_BOOL_AND: + case CPUI_BOOL_XOR: + case CPUI_BOOL_OR: + case CPUI_FLOAT_EQUAL: + case CPUI_FLOAT_NOTEQUAL: + case CPUI_FLOAT_LESS: + case CPUI_FLOAT_LESSEQUAL: + case CPUI_FLOAT_NAN: + if (meta == TYPE_BOOL) + score = 10; + else if (trial.fitType->getSize() == 1) + score = 1; + else + score = -10; + break; + case CPUI_INT_ADD: + case CPUI_INT_SUB: + case CPUI_PTRSUB: + if (meta == TYPE_PTR) { + score = 5; // Don't try to back up further + } + else if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else + score = 1; + break; + case CPUI_INT_2COMP: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_UNKNOWN || meta == TYPE_BOOL) + score = -1; + else if (meta == TYPE_INT) + score = 5; + break; + case CPUI_INT_NEGATE: + case CPUI_INT_XOR: + case CPUI_INT_AND: + case CPUI_INT_OR: + case CPUI_POPCOUNT: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -1; + else if (meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = 2; + break; + case CPUI_INT_LEFT: + case CPUI_INT_RIGHT: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -1; + else if (meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = 2; + break; + case CPUI_INT_SRIGHT: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else if (meta == TYPE_PTR || meta == TYPE_BOOL || meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = -1; + else + score = 2; + break; + case CPUI_INT_MULT: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -10; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -2; + else + score = 5; + break; + case CPUI_INT_DIV: + case CPUI_INT_REM: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -10; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -2; + else if (meta == TYPE_UINT || meta == TYPE_UNKNOWN) + score = 5; + break; + case CPUI_INT_SDIV: + case CPUI_INT_SREM: + if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -10; + else if (meta == TYPE_PTR || meta == TYPE_BOOL) + score = -2; + else if (meta == TYPE_INT) + score = 5; + break; + case CPUI_FLOAT_ADD: + case CPUI_FLOAT_DIV: + case CPUI_FLOAT_MULT: + case CPUI_FLOAT_SUB: + case CPUI_FLOAT_NEG: + case CPUI_FLOAT_ABS: + case CPUI_FLOAT_SQRT: + case CPUI_FLOAT_FLOAT2FLOAT: + case CPUI_FLOAT_CEIL: + case CPUI_FLOAT_FLOOR: + case CPUI_FLOAT_ROUND: + case CPUI_FLOAT_INT2FLOAT: + if (meta == TYPE_FLOAT) + score = 10; + else + score = -10; + break; + case CPUI_FLOAT_TRUNC: + if (meta == TYPE_INT || meta == TYPE_UINT) + score = 2; + else + score = -2; + break; + case CPUI_PIECE: + if (meta == TYPE_FLOAT || meta == TYPE_BOOL) + score = -5; + else if (meta == TYPE_CODE || meta == TYPE_PTR) + score = -2; + break; + case CPUI_SUBPIECE: + if (meta == TYPE_INT || meta == TYPE_UINT || meta == TYPE_BOOL) { + if (def->getIn(1)->getOffset() == 0) + score = 3; // Likely truncation + else + score = 1; + } + else + score = -5; + break; + case CPUI_PTRADD: + if (meta == TYPE_PTR) { + Datatype *ptrto = ((TypePointer *)trial.fitType)->getPtrTo(); + if (ptrto->getSize() == def->getIn(2)->getOffset()) + score = 10; + else + score = 2; + } + else if (meta == TYPE_ARRAY || meta == TYPE_STRUCT || meta == TYPE_UNION || meta == TYPE_CODE || meta == TYPE_FLOAT) + score = -5; + else + score = 1; + break; + default: + score = -10; // Datatype doesn't fit + break; + } + scores[trial.scoreIndex] += score; + if (resType != (Datatype *)0 && !lastLevel) { + newTrials(def, newslot, resType, trial.scoreIndex, trial.array); + } +} + +/// The truncation may be an explicit CPUI_SUBPIECE, or it may be implied. +/// A score is computed for fitting a given data-type to the truncation, and a possible +/// data-type to recurse is also computed. +/// \param ct is the given data-type to truncate +/// \param vn is the Varnode the truncation will fit into +/// \param offset is the number of bytes truncated off the start of the data-type +/// \param scoreIndex is the field being scored +/// \return the data-type to recurse or null +Datatype *ScoreUnionFields::scoreTruncation(Datatype *ct,Varnode *vn,int4 offset,int4 scoreIndex) + +{ + int4 score; + if (ct->getMetatype() == TYPE_UNION) { + TypeUnion *unionDt = (TypeUnion *)ct; + ct = (Datatype *)0; // Don't recurse a data-type from truncation of a union + score = -10; // Negative score if the union has no field matching the size + int4 num = unionDt->numDepend(); + for(int4 i=0;igetField(i); + if (field->offset == offset && field->type->getSize() == vn->getSize()) { + score = 10; + if (result.getBase() == unionDt) + score += 5; + break; + } + } + } + else { + uintb off = offset; + score = 10; // If we can find a size match for the truncation + while(ct != (Datatype*)0 && (off != 0 || ct->getSize() != vn->getSize())) { + if (ct->getMetatype() == TYPE_INT || ct->getMetatype() == TYPE_UINT) { + if (ct->getSize() >= vn->getSize() + off) { + score = 1; // Size doesn't match, but still possibly a reasonable operation + break; + } + } + ct = ct->getSubType(off,&off); + } + if (ct == (Datatype *)0) + score = -10; + } + scores[scoreIndex] += score; + return ct; + } + +/// Assume the constant has no data-type of its own to match against. +/// Evaluate if the constant looks like an integer or pointer etc. and score the trial data-type against that. +/// \param trial is the trial of the constant Varnode +void ScoreUnionFields::scoreConstantFit(const Trial &trial) + +{ + int4 size = trial.vn->getSize(); + uintb val = trial.vn->getOffset(); + type_metatype meta = trial.fitType->getMetatype(); + int4 score = 0; + if (meta == TYPE_BOOL) { + score = (size == 1 && val < 2) ? 2 : -2; + } + else if (meta == TYPE_FLOAT) { + score = -1; + const FloatFormat *format = typegrp.getArch()->translate->getFloatFormat(size); + if (format != (const FloatFormat *)0) { + int4 exp = format->extractExponentCode(val); + if (exp < 7 && exp > -4) // Check for common exponent range + score = 2; + } + } + else if (meta == TYPE_INT || meta == TYPE_UINT || meta == TYPE_PTR) { + if (val == 0) { + score = 2; // Zero is equally valid as pointer or integer + } + else { + AddrSpace *spc = typegrp.getArch()->getDefaultDataSpace(); + bool looksLikePointer = false; + if (val >= spc->getPointerLowerBound() && val <= spc->getPointerUpperBound()) { + if (bit_transitions(val,size) >= 3) { + looksLikePointer = true; + } + } + if (meta == TYPE_PTR) { + score = looksLikePointer ? 2 : -2; + } + else { + score = looksLikePointer ? 1 : 2; + } + } + } + else + score = -2; + scores[trial.scoreIndex] += score; +} + +/// Run through each trial in the current list and compute a score. If the trial recurses and this is +/// \e not the final pass, build new trials for the recursion. +/// \param lastPass is \b true if this is the last pass +void ScoreUnionFields::runOneLevel(bool lastPass) + +{ + list::const_iterator iter; + for(iter=trialCurrent.begin();iter!=trialCurrent.end();++iter) { + trialCount += 1; + if (trialCount > maxTrials) + return; // Absolute number of trials reached + const Trial &trial(*iter); + scoreTrialDown(trial,lastPass); + scoreTrialUp(trial,lastPass); + } +} + +void ScoreUnionFields::computeBestIndex(void) + +{ + int4 bestScore = scores[0]; + int4 bestIndex = 0; + for(int4 i=1;i bestScore) { + bestScore = scores[i]; + bestIndex = i; + } + } + result.fieldNum = bestIndex - 1; // Renormalize score index to field index + result.resolve = fields[bestIndex]; +} + +/// Try to fit each possible field over multiple levels of the data-flow. +/// Return the index of the highest scoring field or -1 if the union data-type +/// itself is the best fit. +void ScoreUnionFields::run(void) + +{ + trialCount = 0; + for(int4 pass=0;pass < maxPasses;++pass) { + if (trialCurrent.empty()) + break; + if (trialCount > threshold) + break; // Threshold reached, don't score any more trials + if (pass + 1 == maxPasses) + runOneLevel(true); + else { + runOneLevel(false); + trialCurrent.swap(trialNext); + trialNext.clear(); + } + } +} + +/// \brief Score a given data-type involving a union against data-flow +/// +/// The data-type must either be a union or a pointer to union. +/// Set up the initial set of trials based on the given data-flow edge (PcodeOp and slot). +/// \param tgrp is the TypeFactory owning the data-types +/// \param parentType is the given data-type to score +/// \param op is PcodeOp of the given data-flow edge +/// \param slot is slot of the given data-flow edge +ScoreUnionFields::ScoreUnionFields(TypeFactory &tgrp,Datatype *parentType,PcodeOp *op,int4 slot) + : typegrp(tgrp), result(parentType) +{ + if (testSimpleCases(op, slot, parentType)) + return; + int4 wordSize = (parentType->getMetatype() == TYPE_PTR) ? ((TypePointer *)parentType)->getWordSize() : 0; + int4 numFields = result.baseType->numDepend(); + scores.resize(numFields + 1,0); + fields.resize(numFields + 1,(Datatype *)0); + Varnode *vn; + if (slot < 0) { + vn = op->getOut(); + if (vn->getSize() != parentType->getSize()) + scores[0] -= 10; // Data-type does not even match size of Varnode + else + trialCurrent.emplace_back(vn,parentType,0,false); + } + else { + vn = op->getIn(slot); + if (vn->getSize() != parentType->getSize()) + scores[0] -= 10; + else + trialCurrent.emplace_back(op,slot,parentType,0,false); + } + fields[0] = parentType; + visited.insert(VisitMark(vn,0)); + for(int4 i=0;igetDepend(i); + bool isArray = false; + if (wordSize != 0) { + if (fieldType->getMetatype() == TYPE_ARRAY) + isArray = true; + fieldType = typegrp.getTypePointerStripArray(parentType->getSize(),fieldType,wordSize); + } + if (vn->getSize() != fieldType->getSize()) + scores[i+1] -= 10; // Data-type does not even match size of Varnode, don't create trial + else if (slot < 0) { + trialCurrent.emplace_back(vn,fieldType,i+1,isArray); + } + else { + trialCurrent.emplace_back(op,slot,fieldType,i+1,isArray); + } + fields[i+1] = fieldType; + visited.insert(VisitMark(vn,i+1)); + } + run(); + computeBestIndex(); +} + +/// \brief Score a union data-type against data-flow, where there is a SUBPIECE +/// +/// A truncation is fit to each union field before doing the fit against data-flow. +/// Only fields that match the offset and the truncation size (of the SUBPIECE) are scored further. +/// If there is a good fit, the scoring for that field recurses into the given data-flow edge. +/// This is only used where there is a SUBPIECE and the base scoring indicates the whole union is +/// the best match for the input. +/// \param tgrp is the TypeFactory owning the data-types +/// \param unionType is the data-type to score, which must be a TypeUnion +/// \param offset is the given starting offset of the truncation +/// \param op is the SUBPIECE op +ScoreUnionFields::ScoreUnionFields(TypeFactory &tgrp,TypeUnion *unionType,int4 offset,PcodeOp *op) + :typegrp(tgrp), result(unionType) +{ + Varnode *vn = op->getOut(); + int numFields = unionType->numDepend(); + scores.resize(numFields + 1, 0); + fields.resize(numFields + 1, (Datatype *)0); + fields[0] = unionType; + scores[0] = -10; + for(int4 i=0;igetField(i); + fields[i+1] = unionField->type; + if (unionField->type->getSize() != vn->getSize() || unionField->offset != offset) { + scores[i+1] = -10; + continue; + } + newTrialsDown(vn, unionField->type, i+1, false); + } + trialCurrent.swap(trialNext); + if (trialCurrent.size() > 1) + run(); + computeBestIndex(); +} + +/// \brief Score a union data-type against data-flow, where there is an implied truncation +/// +/// A truncation is fit to each union field before doing the fit against data-flow, starting with +/// the given PcodeOp and input slot. +/// \param tgrp is the TypeFactory owning the data-types +/// \param unionType is the data-type to score, which must be a TypeUnion +/// \param offset is the given starting offset of the truncation +/// \param op is the PcodeOp initially reading/writing the union +/// \param slot is the -1 if the op is writing, >= 0 if reading +ScoreUnionFields::ScoreUnionFields(TypeFactory &tgrp,TypeUnion *unionType,int4 offset,PcodeOp *op,int4 slot) + :typegrp(tgrp), result(unionType) +{ + Varnode *vn = (slot < 0) ? op->getOut() : op->getIn(slot); + int numFields = unionType->numDepend(); + scores.resize(numFields + 1, 0); + fields.resize(numFields + 1, (Datatype *)0); + fields[0] = unionType; + scores[0] = -10; // Assume the untruncated entire union is not a good fit + for(int4 i=0;igetField(i); + fields[i+1] = unionField->type; + // Score the implied truncation + Datatype *ct = scoreTruncation(unionField->type,vn,offset-unionField->offset,i+1); + if (ct != (Datatype *)0) { + if (slot < 0) + trialCurrent.emplace_back(vn,ct,i+1,false); // Try to flow backward + else + trialCurrent.emplace_back(op,slot,ct,i+1,false); // Flow downward + visited.insert(VisitMark(vn,i+1)); + } + } + if (trialCurrent.size() > 1) + run(); + computeBestIndex(); +} + diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.hh new file mode 100644 index 0000000000..c60c706014 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.hh @@ -0,0 +1,180 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __UNION_RESOLVE__ +#define __UNION_RESOLVE__ + +#include "op.hh" + +/// \brief A data-type \e resolved from an associated TypeUnion or TypeStruct +/// +/// A \b parent refers to either: +/// 1) A union +/// 2) A structure that is an effective union (1 field filling the entire structure) OR +/// 3) A pointer to a union/structure +/// +/// This object represents a data-type that is resolved via analysis from the \b parent data-type. +/// The resolved data-type can be either: +/// 1) A specific field of the parent (if the parent is not a pointer) +/// 2) A pointer to a specific field of the underlying union/structure (if the parent is a pointer) +/// 3) The parent data-type itself (either a pointer or not) +/// The \b fieldNum (if non-negative) selects a particular field of the underlying union/structure. +/// If the parent is a pointer, the resolution is a pointer to the field. +/// If the parent is not a pointer, the resolution is the field itself. +/// A \b fieldNum of -1 indicates that the parent data-type itself is the resolution. +class ResolvedUnion { + friend class ScoreUnionFields; + Datatype *resolve; ///< The resolved data-type + Datatype *baseType; ///< Union or Structure being resolved + int4 fieldNum; ///< Index of field referenced by \b resolve + bool lock; ///< If \b true, resolution cannot be overridden +public: + ResolvedUnion(Datatype *parent); ///< Construct a data-type that resolves to itself + ResolvedUnion(Datatype *parent,int4 fldNum,TypeFactory &typegrp); ///< Construct a reference to a field + Datatype *getDatatype(void) const { return resolve; } ///< Get the resolved data-type + Datatype *getBase(void) const { return baseType; } ///< Get the union or structure being referenced + int4 getFieldNum(void) const { return fieldNum; } ///< Get the index of the resolved field or -1 + bool isLocked(void) const { return lock; } ///< Is \b this locked against overrides + void setLock(bool val) { lock = val; } ///< Set whether \b this resolution is locked against overrides +}; + +/// \brief A data-flow edge to which a resolved data-type can be assigned +/// +/// The edge is associated with the specific data-type that needs to be resolved, +/// which is typically a union or a pointer to a union. The edge collapses different +/// kinds of pointers to the same base union. +class ResolveEdge { + uint8 typeId; ///< Id of base data-type being resolved + uintm opTime; ///< Id of PcodeOp edge + int4 encoding; ///< Encoding of the slot and pointer-ness +public: + ResolveEdge(const Datatype *parent,const PcodeOp *op,int4 slot); ///< Construct from components + bool operator<(const ResolveEdge &op2) const; ///< Compare two edges +}; + +/// \brief Analyze data-flow to resolve which field of a union data-type is being accessed +/// +/// A Varnode with a data-type that is either a union, a pointer to union, or a part of a union, can +/// be accessed in multiple ways. Each individual read (or write) of the Varnode may be accessing either +/// a specific field of the union or accessing the union as a whole. The particular access may not be +/// explicitly known but can sometimes be inferred from data-flow near the Varnode. This class scores +/// all the possible fields of a data-type involving a union for a specific Varnode. +/// +/// Because the answer may be different for different accesses, the Varnode must be specified as an +/// access \e edge, a PcodeOp and a \b slot. A slot >= 0 indicates the index of a Varnode that is being read +/// by the PcodeOp, a slot == -1 indicates the output Varnode being written by the PcodeOp. +/// +/// The result of scoring is returned as a ResolvedUnion record. +class ScoreUnionFields { + /// \brief A trial data-type fitted to a specific place in the data-flow + class Trial { + friend class ScoreUnionFields; + /// \brief An enumerator to distinguish how an individual trial follows data-flow + enum dir_type { + fit_down, ///< Only push the fit down \e with the data-flow + fit_up ///< Only push the fit up \e against the data-flow + }; + Varnode *vn; ///< The Varnode we are testing for data-type fit + PcodeOp *op; ///< The PcodeOp reading the Varnode (or null) + int4 inslot; ///< The slot reading the Varnode (or -1) + dir_type direction; ///< Direction to push fit. 0=down 1=up + bool array; ///< Field can be accessed as an array + Datatype *fitType; ///< The putative data-type of the Varnode + int4 scoreIndex; ///< The original field being scored by \b this trial + public: + /// \brief Construct a downward trial for a Varnode + /// + /// \param o is the PcodeOp reading the Varnode + /// \param slot is the input slot being read + /// \param ct is the trial data-type to fit + /// \param index is the scoring index + /// \param isArray is \b true if the data-type to fit is a pointer to an array + Trial(PcodeOp *o,int4 slot,Datatype *ct,int4 index,bool isArray) { + op = o; inslot = slot; direction = fit_down; fitType = ct; scoreIndex = index; vn = o->getIn(slot); array=isArray; } + + /// \brief Construct an upward trial for a Varnode + /// + /// \param v is the Varnode to fit + /// \param ct is the trial data-type to fit + /// \param index is the scoring index + /// \param isArray is \b true if the data-type to fit is a pointer to an array + Trial(Varnode *v,Datatype *ct,int4 index,bool isArray) { + vn = v; op = (PcodeOp *)0; inslot=-1; direction = fit_up; fitType = ct; scoreIndex = index; array=isArray; } + }; + + /// \brief A mark accumulated when a given Varnode is visited with a specific field index + class VisitMark { + Varnode *vn; ///< Varnode reached by trial field + int4 index; ///< Index of the trial field + public: + VisitMark(Varnode *v,int4 i) { vn = v; index = i; } ///< Constructor + + /// \brief Compare two VisitMarks for use in a set container + /// + /// \param op2 is the other VisitMark being compared with \b this + /// \return \b true if \b this should be ordered before \b op2 + bool operator<(const VisitMark &op2) const { + if (vn != op2.vn) + return (vn < op2.vn); + return (index < op2.index); + } + }; + TypeFactory &typegrp; ///< The factory containing data-types + vector scores; ///< Score for each field, indexed by fieldNum + 1 (whole union is index=0) + vector fields; ///< Field corresponding to each score + set visited; ///< Places that have already been visited + list trialCurrent; ///< Current trials being pushed + list trialNext; ///< Next set of trials + ResolvedUnion result; ///< The best result + int4 trialCount; ///< Number of trials evaluated so far + static const int4 maxPasses; ///< Maximum number of levels to score through + static const int4 threshold; ///< Threshold of trials over which to cancel additional passes + static const int4 maxTrials; ///< Maximum number of trials to evaluate + bool testArrayArithmetic(PcodeOp *op,int4 inslot); ///< Check if given PcodeOp is operating on array with union elements + bool testSimpleCases(PcodeOp *op,int4 inslot,Datatype *parent); ///< Preliminary checks before doing full scoring + int4 scoreLockedType(Datatype *ct,Datatype *lockType); ///< Score trial data-type against a locked data-type + int4 scoreParameter(Datatype *ct,const PcodeOp *callOp,int4 paramSlot); ///< Score trial data-type against a parameter + int4 scoreReturnType(Datatype *ct,const PcodeOp *callOp); ///< Score trial data-type against return data-type of function + Datatype *derefPointer(Datatype *ct,Varnode *vn,int4 &score); ///< Score trial data-type as a pointer to LOAD/STORE + void newTrialsDown(Varnode *vn,Datatype *ct,int4 scoreIndex,bool isArray); ///< Create new trials based an reads of given Varnode + void newTrials(PcodeOp *op,int4 slot,Datatype *ct,int4 scoreIndex,bool isArray); ///< Create new trials based on given input slot + void scoreTrialDown(const Trial &trial,bool lastLevel); ///< Try to fit the given trial following data-flow down + void scoreTrialUp(const Trial &trial,bool lastLevel); ///< Try to fit the given trial following data-flow up + Datatype *scoreTruncation(Datatype *ct,Varnode *vn,int4 offset,int4 scoreIndex); ///< Score a truncation in the data-flow + void scoreConstantFit(const Trial &trial); ///< Score trial data-type against a constant + void runOneLevel(bool lastPass); ///< Score all the current trials + void computeBestIndex(void); ///< Assuming scoring is complete, compute the best index + void run(void); ///< Calculate best fitting field +public: + ScoreUnionFields(TypeFactory &tgrp,Datatype *parentType,PcodeOp *op,int4 slot); + ScoreUnionFields(TypeFactory &tgrp,TypeUnion *unionType,int4 offset,PcodeOp *op); + ScoreUnionFields(TypeFactory &tgrp,TypeUnion *unionType,int4 offset,PcodeOp *op,int4 slot); + const ResolvedUnion &getResult(void) const { return result; } ///< Get the resulting best field resolution +}; + +/// Compare based on the data-type, the \b slot, and the PcodeOp's unique id. +/// \param op2 is the other edge to compare with \b this +/// \return \b true if \b this should be ordered before the other edge +inline bool ResolveEdge::operator<(const ResolveEdge &op2) const + +{ + if (typeId != op2.typeId) + return (typeId < op2.typeId); + if (encoding != op2.encoding) + return (encoding < op2.encoding); + return (opTime < op2.opTime); +} + +#endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc index d4f25b44cc..b814f2d39c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc @@ -51,7 +51,7 @@ void HighVariable::setSymbol(Varnode *vn) const symbol = entry->getSymbol(); if (entry->isDynamic()) // Dynamic symbols match whole variable symboloffset = -1; - else if (symbol->getCategory() == 1) + else if (symbol->getCategory() == Symbol::equate) symboloffset = -1; // For equates, we don't care about size else if (symbol->getType()->getSize() == vn->getSize() && entry->getAddr() == vn->getAddr() && !entry->isPiece()) @@ -466,7 +466,7 @@ void HighVariable::saveXml(ostream &s) const else if (isConstant()) a_v(s,"class",string("constant")); else if (!isPersist() && (symbol != (Symbol *)0)) { - if (symbol->getCategory() == 0) + if (symbol->getCategory() == Symbol::function_parameter) a_v(s,"class",string("param")); else a_v(s,"class",string("local")); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc index 1cc0fa4ca4..3813489adb 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc @@ -395,7 +395,7 @@ void ScopeLocal::markNotMapped(AddrSpace *spc,uintb first,int4 sz,bool parameter // If the symbol and the use are both as parameters // this is likely the special case of a shared return call sharing the parameter location // of the original function in which case we don't print a warning - if ((!parameter) || (sym->getCategory() != 0)) + if ((!parameter) || (sym->getCategory() != Symbol::function_parameter)) fd->warningHeader("Variable defined which should be unmapped: "+sym->getName()); return; } @@ -814,7 +814,7 @@ void MapState::reconcileDatatypes(void) maplist.swap(newList); } -/// The given LoadGuard, which may be a LOAD or STORE is converted into an appropriate +/// The given LoadGuard, which may be a LOAD or STORE, is converted into an appropriate /// RangeHint, attempting to make use of any data-type or index information. /// \param guard is the given LoadGuard /// \param opc is the expected op-code (CPUI_LOAD or CPUI_STORE) @@ -825,7 +825,7 @@ void MapState::addGuard(const LoadGuard &guard,OpCode opc,TypeFactory *typeFacto if (!guard.isValid(opc)) return; int4 step = guard.getStep(); if (step == 0) return; // No definitive sign of array access - Datatype *ct = guard.getOp()->getIn(1)->getType(); + Datatype *ct = guard.getOp()->getIn(1)->getTypeReadFacing(guard.getOp()); if (ct->getMetatype() == TYPE_PTR) { ct = ((TypePointer *) ct)->getPtrTo(); while (ct->getMetatype() == TYPE_ARRAY) @@ -1157,7 +1157,7 @@ void ScopeLocal::markUnaliased(const vector &alias) void ScopeLocal::fakeInputSymbols(void) { - int4 lockedinputs = getCategorySize(0); + int4 lockedinputs = getCategorySize(Symbol::function_parameter); VarnodeDefSet::const_iterator iter,enditer; iter = fd->beginDef(Varnode::input); @@ -1194,7 +1194,7 @@ void ScopeLocal::fakeInputSymbols(void) uint4 vflags = 0; SymbolEntry *entry = queryProperties(vn->getAddr(),vn->getSize(),usepoint,vflags); if (entry != (SymbolEntry *)0) { - if (entry->getSymbol()->getCategory()==0) + if (entry->getSymbol()->getCategory()==Symbol::function_parameter) continue; // Found a matching symbol } } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc index 5c773d414b..a88025dc15 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc @@ -564,6 +564,58 @@ Varnode::~Varnode(void) } } +/// This generally just returns the data-type of the Varnode itself unless it is a \e union data-type. +/// In this case, the data-type of the resolved field of the \e union, associated with writing to the Varnode, +/// is returned. The Varnode \b must be written to, to call this method. +/// \return the resolved data-type +Datatype *Varnode::getTypeDefFacing(void) const + +{ + if (!type->needsResolution()) + return type; + return type->findResolve(def,-1); +} + +/// This generally just returns the data-type of the Varnode itself unless it is a \e union data-type. +/// In this case, the data-type of the resolved field of the \e union, associated with reading the Varnode, +/// is returned. +/// \param op is the PcodeOp reading \b this Varnode +/// \return the resolved data-type +Datatype *Varnode::getTypeReadFacing(const PcodeOp *op) const + +{ + if (!type->needsResolution()) + return type; + return type->findResolve(op, op->getSlot(this)); +} + +/// This generally just returns the data-type of the HighVariable associated with \b this, unless it is a +/// \e union data-type. In this case, the data-type of the resolved field of the \e union, associated with +/// writing to the Varnode, is returned. +/// \return the resolved data-type +Datatype *Varnode::getHighTypeDefFacing(void) const + +{ + Datatype *ct = high->getType(); + if (!ct->needsResolution()) + return ct; + return ct->findResolve(def,-1); +} + +/// This generally just returns the data-type of the HighVariable associated with \b this, unless it is a +/// \e union data-type. In this case, the data-type of the resolved field of the \e union, associated with +/// reading the Varnode, is returned. +/// \param op is the PcodeOp reading \b this Varnode +/// \return the resolved data-type +Datatype *Varnode::getHighTypeReadFacing(const PcodeOp *op) const + +{ + Datatype *ct = high->getType(); + if (!ct->needsResolution()) + return ct; + return ct->findResolve(op, op->getSlot(this)); +} + /// This is a convenience method for quickly finding the unique PcodeOp that reads this Varnode /// \return only descendant (if there is 1 and ONLY 1) or \b null otherwise PcodeOp *Varnode::loneDescend(void) const diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh index 2d6fcf1add..bffc500eb5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh @@ -118,7 +118,8 @@ public: locked_input = 0x100, ///< Input that exists even if its unused spacebase_placeholder = 0x200, ///< This varnode is inserted artificially to track a register ///< value at a specific point in the code - stop_uppropagation = 0x400 ///< Data-types do not propagate from an output into \b this + stop_uppropagation = 0x400, ///< Data-types do not propagate from an output into \b this + has_implied_field = 0x800 ///< The varnode is implied but also has a data-type that needs resolution }; private: mutable uint4 flags; ///< The collection of boolean attributes for this Varnode @@ -176,6 +177,10 @@ public: SymbolEntry *getSymbolEntry(void) const { return mapentry; } ///< Get symbol and scope information associated with this Varnode uint4 getFlags(void) const { return flags; } ///< Get all the boolean attributes Datatype *getType(void) const { return type; } ///< Get the Datatype associated with this Varnode + Datatype *getTypeDefFacing(void) const; ///< Return the data-type of \b this when it is written to + Datatype *getTypeReadFacing(const PcodeOp *op) const; ///< Get the data-type of \b this when it is read by the given PcodeOp + Datatype *getHighTypeDefFacing(void) const; ///< Return the data-type of the HighVariable when \b this is written to + Datatype *getHighTypeReadFacing(const PcodeOp *op) const; ///< Return data-type of the HighVariable when read by the given PcodeOp void setTempType(Datatype *t) const { temp.dataType = t; } ///< Set the temporary Datatype Datatype *getTempType(void) const { return temp.dataType; } ///< Get the temporary Datatype (used during type propagation) void setValueSet(ValueSet *v) const { temp.valueSet = v; } ///< Set the temporary ValueSet record @@ -245,6 +250,7 @@ public: bool isStackStore(void) const { return ((addlflags&Varnode::stack_store)!=0); } ///< Was this originally produced by an explicit STORE bool isLockedInput(void) const { return ((addlflags&Varnode::locked_input)!=0); } ///< Is always an input, even if unused bool stopsUpPropagation(void) const { return ((addlflags&Varnode::stop_uppropagation)!=0); } ///< Is data-type propagation stopped + bool hasImpliedField(void) const { return ((addlflags&Varnode::has_implied_field)!=0); } ///< Does \b this have an implied field /// Is \b this just a special placeholder representing INDIRECT creation? bool isIndirectZero(void) const { return ((flags&(Varnode::indirect_creation|Varnode::constant))==(Varnode::indirect_creation|Varnode::constant)); } @@ -305,6 +311,7 @@ public: void setUnsignedPrint(void) { addlflags |= Varnode::unsignedprint; } ///< Force \b this to be printed as unsigned void setStopUpPropagation(void) { addlflags |= Varnode::stop_uppropagation; } ///< Stop up-propagation thru \b this void clearStopUpPropagation(void) { addlflags &= ~Varnode::stop_uppropagation; } ///< Stop up-propagation thru \b this + void setImpliedField(void) { addlflags |= Varnode::has_implied_field; } ///< Mark \this as having an implied field bool updateType(Datatype *ct,bool lock,bool override); ///< (Possibly) set the Datatype given various restrictions void setStackStore(void) { addlflags |= Varnode::stack_store; } ///< Mark as produced by explicit CPUI_STORE void setLockedInput(void) { addlflags |= Varnode::locked_input; } ///< Mark as existing input, even if unused diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/impliedfield.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/impliedfield.xml new file mode 100644 index 0000000000..885a176cb7 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/impliedfield.xml @@ -0,0 +1,33 @@ + + + + + 4889f848c1f8 +20894424fc85f67419660fefc0f30f5a +4424fcf20f58050d010000f20f2cc001 +f8c30fafc7c3 + + + 0000000000000840 + + + + +return \(int4\)\(val\.u\.myfloat \+ 3\.0\) \+ val\.a; +return val\.u\.myint \* val\.a; + diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/union_datatype.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/union_datatype.xml new file mode 100644 index 0000000000..901c6a7260 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/union_datatype.xml @@ -0,0 +1,210 @@ + + + + + 554889e548897de88975e4 +8955e0c745fc00000000eb45837de000 +751e8b45fc4898488d14850000000048 +8b45e84801d08b55fc83c20a8910eb1d +8b45fc4898488d148500000000488b45 +e84801d0f30f2a45fcf30f11008345fc +018b45fc3b45e47cb3905dc3554889e5 +48897df88975f48955f0837df4007508 +488b45f88b00eb0d488b45f88b55f048 +63d28b04905dc3554889e54883ec2089 +7dec837dec0075278b0d52182000ba56 +55555589c8f7ea89c8c1f81f29c289d0 +8945fc8b45fc89c7e814ffffffeb4683 +7dec01752bf30f100d23182000f30f10 +051f040000f30f58c1f30f1145f8f30f +1045f8f30f2cc089c7e8e3feffffeb15 +8b05fa172000c1e8038945f48b45f489 +05eb17200090c9c3554889e54883ec10 +897dfc837dfc0075130fb605c8172000 +0fbec089c7e880feffffeb0d8b05b617 +200089c7e898feffff90c9c3554889e5 +4883ec10909090908975f4837df40075 +104889f8900fbec089c7e84bfeffffeb +25837df40175124889f8908b0083c00a +89c7e85afeffffeb0d4889f883c06489 +c7e84bfeffff90c9c3554889e548897d +e88975e4837de400750c488b45e883c0 +0a8945fceb2c837de401750e488b45e8 +8b0083c0038945fceb18f20f104de8f2 +0f100531030000f20f58c1f20f2cc089 +45fc8b45fc5dc3554889e54883ec5089 +7dbc64488b042528000000488945f831 +c08b55bc488d45d0be0a0000004889c7 +e8f0fdfffff30f104ddcf30f1045e0f3 +0f58c1f30f1145dcc745cc00000000eb +45837dbc0075158b45cc48988b4485d0 +83c00789c7e897fdffffeb268b45cc48 +98f30f104485d0f30f5ac0f20f100dad +020000f20f58c1f20f2cc089c7e86ffd +ffff8345cc01837dcc097eb590488b45 +f86448330425280000007405e8effbff +ffc9c3554889e548897df88975f4f30f +1145f0488b45f88b0085c0751cf30f10 +4df0f30f10055e020000f30f58c1488b +45f8f30f114004eb0d8b45f48d500348 +8b45f8895004905dc3554889e548897d +e8488b45e88b0085c07522488b45e8f3 +0f104004f30f5ac0f20f100d20020000 +f20f59c1f20f2cc08945fceb0d488b45 +e88b400483c01b8945fc8b45fc5dc355 +4889e548897de88975e4837de4007511 +488b45e8488b008b0083c00a8945fceb +62837de4017525488b45e8488b00f30f +104004f30f5ac0f20f100dc9010000f2 +0f58c1f20f2cc08945fceb37837de402 +7512488b45e8488b000fb6000fbec089 +45fceb1f837de403750f488b45e8488b +008b40048945fceb0a488b45e8488b00 +8945fc8b45fc5dc3554889e54883ec10 +48897df8488b45f88b0085c07514488b +45f80fb640080fbec089c7e8eafbffff +eb2f488b45f88b0083f8017516488b45 +f8488b40088b0083c00a89c7e8f0fbff +ffeb0e488b45f88b400889c7e8e0fbff +ff90c9c3554889e548897df88975f489 +55f0837df400750e8b45f08d500a488b +45f88910eb19f30f2a45f0f30f100dfd +000000f30f58c1488b45f8f30f110090 +5dc3554889e50fb605ab1420000fbec0 +89c7e863fbffff905dc3 + + + 000030410000000000804040 +9a999999999909400000b04000000000 +000000000000f83f0000000000001440 +0000a041 + + + + + + + + + + + + + + + + + + +ptr\[i\]\.ifield = i \+ 10; +ptr\[i\]\.ffield = \(float4\)i; +iVar1 = ptr-\>a; +iVar1 = ptr-\>arr\[index\]; +displayint\(glob\.intfield / 3\); +displayint\(\(int4\)\(glob\.floatfield \+ 11\.0\)\); +glob\.uintfield = glob\.uintfield \>\> 3; +displaychar\(\(char\)glob1\.a\); +displayint\(glob1\.a\); +displaychar\(glob1\.c\); +displaychar\(val\.c\); +displayint\(\*val\.ptr \+ 10\); +displayint\(val\.a \+ 100\); +res = \(int4\)val\.mylong \+ 10; +res = \*val\.myptr \+ 3; +res = \(int4\)\(val\.mydouble \+ 33\.0\); +arr\[3\]\.ffield = arr\[4\]\.ffield \+ arr\[3\]\.ffield; +displayint\(arr\[i\]\.ifield \+ 7\); +displayint\(\(int4\)\(arr\[i\]\.ffield \+ 3\.2\)\); +ptr-\>value\)\.myfloat = fval \+ 5\.5; +ptr-\>value\)\.myint = ival \+ 3; +val = \(int4\)\(\(ptr-\>value\)\.myfloat \* 1\.5\); +val = \(ptr-\>value\)\.myint \+ 0x1b; +res = ptr-\>a-\>aIntField \+ 10; +res = \(int4\)\(ptr-\>a-\>aFloatField \+ 5\.0\); +res = \(int4\)ptr-\>b-\>bCharField; +res = ptr-\>b-\>bIntField; +res = \(int4\)ptr-\>longField; +displaychar\(\(s-\>value\)\.c\); +displayint\(\*\(s-\>value\)\.ptr \+ 10\); +displayint\(\(s-\>value\)\.a\); +ptr-\>myint = val \+ 10; +ptr-\>myfloat = \(float4\)val \+ 20\.0; + diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangFieldToken.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangFieldToken.java index 5f7ed8abc0..8fcc77583b 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangFieldToken.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangFieldToken.java @@ -1,6 +1,5 @@ /* ### * IP: GHIDRA - * REVIEWED: YES * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +17,7 @@ package ghidra.app.decompiler; import ghidra.program.model.data.DataType; import ghidra.program.model.pcode.PcodeFactory; +import ghidra.program.model.pcode.PcodeOp; import ghidra.util.xml.SpecXmlUtils; import ghidra.xml.XmlElement; @@ -28,35 +28,49 @@ import ghidra.xml.XmlElement; public class ClangFieldToken extends ClangToken { private DataType datatype; // Structure from which this field is a part private int offset; // Byte offset of the field within the structure + private PcodeOp op; // The op associated with the field extraction public ClangFieldToken(ClangNode par) { super(par); datatype = null; } - + /** * @return the structure datatype associated with this field token */ public DataType getDataType() { return datatype; } - + /** * @return the byte offset of this field with its structure */ public int getOffset() { return offset; } - + @Override - public void restoreFromXML(XmlElement el,XmlElement end,PcodeFactory pfactory) { - super.restoreFromXML(el,end,pfactory); + public PcodeOp getPcodeOp() { + return op; + } + + @Override + public void restoreFromXML(XmlElement el, XmlElement end, PcodeFactory pfactory) { + super.restoreFromXML(el, end, pfactory); String datatypestring = el.getAttribute("name"); // Name of the structure - if (datatypestring != null) - datatype = pfactory.getDataTypeManager().findBaseType(datatypestring,el.getAttribute("id")); + if (datatypestring != null) { + datatype = + pfactory.getDataTypeManager().findBaseType(datatypestring, el.getAttribute("id")); + } String offsetstring = el.getAttribute(ClangXML.OFFSET); - if (offsetstring != null) + if (offsetstring != null) { offset = SpecXmlUtils.decodeInt(offsetstring); + } + String oprefstring = el.getAttribute(ClangXML.OPREF); + if (oprefstring != null) { + int refid = SpecXmlUtils.decodeInt(oprefstring); + op = pfactory.getOpRef(refid); + } } } diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java index 05859ad5cc..d3389c0fb8 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java @@ -340,9 +340,9 @@ public class DecompileDebug { DataTypeDependencyOrderer TypeOrderer = new DataTypeDependencyOrderer(program.getDataTypeManager(), dtypes); //First output all structures as zero size so to avoid any cyclic dependencies. - for (DataType dataType : TypeOrderer.getStructList()) { + for (DataType dataType : TypeOrderer.getCompositeList()) { debugStream.write( - (dtmanage.buildStructTypeZeroSizeOveride(dataType) + "\n").toString().getBytes()); + (dtmanage.buildCompositeZeroSizePlaceholder(dataType) + "\n").toString().getBytes()); } //Next, use the dependency stack to output types. for (DataType dataType : TypeOrderer.getDependencyList()) { diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java index c47ec9baab..c5f1f011f3 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java @@ -809,6 +809,9 @@ public class DecompilerProvider extends NavigatableComponentProviderAdapter RenameFieldAction renameFieldAction = new RenameFieldAction(); setGroupInfo(renameFieldAction, variableGroup, subGroupPosition++); + ForceUnionAction forceUnionAction = new ForceUnionAction(); + setGroupInfo(forceUnionAction, variableGroup, subGroupPosition++); + RetypeLocalAction retypeLocalAction = new RetypeLocalAction(); setGroupInfo(retypeLocalAction, variableGroup, subGroupPosition++); @@ -982,6 +985,7 @@ public class DecompilerProvider extends NavigatableComponentProviderAdapter addLocalAction(renameLocalAction); addLocalAction(renameGlobalAction); addLocalAction(renameFieldAction); + addLocalAction(forceUnionAction); addLocalAction(setSecondaryHighlightAction); addLocalAction(setSecondaryHighlightColorChooserAction); addLocalAction(removeSecondaryHighlightAction); diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/AbstractDecompilerAction.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/AbstractDecompilerAction.java index f8ef696b1c..a2ef36cd78 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/AbstractDecompilerAction.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/AbstractDecompilerAction.java @@ -149,11 +149,11 @@ public abstract class AbstractDecompilerAction extends DockingAction { } /** - * Get the structure associated with a field token + * Get the structure/union associated with a field token * @param tok is the token representing a field - * @return the structure which contains this field + * @return the structure/union which contains this field */ - public static Structure getStructDataType(ClangToken tok) { + public static Composite getCompositeDataType(ClangToken tok) { // We already know tok is a ClangFieldToken ClangFieldToken fieldtok = (ClangFieldToken) tok; DataType dt = fieldtok.getDataType(); @@ -163,8 +163,8 @@ public abstract class AbstractDecompilerAction extends DockingAction { if (dt instanceof TypeDef) { dt = ((TypeDef) dt).getBaseDataType(); } - if (dt instanceof Structure) { - return (Structure) dt; + if (dt instanceof Composite) { + return (Composite) dt; } return null; } @@ -204,7 +204,8 @@ public abstract class AbstractDecompilerAction extends DockingAction { return false; } - protected DataType chooseDataType(PluginTool tool, Program program, DataType currentDataType) { + protected static DataType chooseDataType(PluginTool tool, Program program, + DataType currentDataType) { DataTypeManager dataTypeManager = program.getDataTypeManager(); DataTypeSelectionDialog chooserDialog = new DataTypeSelectionDialog(tool, dataTypeManager, Integer.MAX_VALUE, AllowedDataTypes.FIXED_LENGTH); diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/ForceUnionAction.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/ForceUnionAction.java new file mode 100644 index 0000000000..f9b6095021 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/ForceUnionAction.java @@ -0,0 +1,232 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.decompile.actions; + +import java.util.ArrayList; + +import docking.action.MenuData; +import docking.widgets.OkDialog; +import docking.widgets.OptionDialog; +import ghidra.app.decompiler.ClangFieldToken; +import ghidra.app.decompiler.ClangToken; +import ghidra.app.plugin.core.decompile.DecompilerActionContext; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.Function; +import ghidra.program.model.listing.Program; +import ghidra.program.model.pcode.*; +import ghidra.program.model.symbol.SourceType; +import ghidra.util.Msg; +import ghidra.util.UndefinedFunction; +import ghidra.util.exception.DuplicateNameException; +import ghidra.util.exception.InvalidInputException; + +public class ForceUnionAction extends AbstractDecompilerAction { + private Varnode accessVn; // The Varnode being accessed with a union data-type + private PcodeOp accessOp; // PcodeOp accessing the union + private int accessSlot; // Slot containing the union variable (-1 for output >=0 for input) + private int fieldNumber; // The field (number) selected by the user to force + private Union unionDt; // The union data-type + private DataType parentDt; // The data-type associated with accessVn + private Address pcAddr; // Address at which field extraction takes place + + public ForceUnionAction() { + super("Force Union Field"); +// setHelpLocation(new HelpLocation(HelpTopics.DECOMPILER, "ActionRenameField")); + setPopupMenuData(new MenuData(new String[] { "Force Field" }, "Decompile")); +// setKeyBindingData(new KeyBindingData(KeyEvent.VK_L, 0)); + } + + @Override + protected boolean isEnabledForDecompilerContext(DecompilerActionContext context) { + Function function = context.getFunction(); + if (function == null || function instanceof UndefinedFunction) { + return false; + } + + ClangToken tokenAtCursor = context.getTokenAtCursor(); + if (!(tokenAtCursor instanceof ClangFieldToken)) { + return false; + } + Composite composite = getCompositeDataType(tokenAtCursor); + return (composite instanceof Union); + } + + private DataType typeIsUnionRelated(Varnode vn) { + if (vn == null) { + return null; + } + HighVariable high = vn.getHigh(); + if (high == null) { + return null; + } + DataType dt = high.getDataType(); + if (dt instanceof TypeDef) { + dt = ((TypeDef) dt).getBaseDataType(); + } + DataType innerType = dt; + if (innerType instanceof Pointer) { + innerType = ((Pointer) innerType).getDataType(); + } + if (innerType == unionDt) { + return dt; + } + // Its possible the varnode is a truncated symbol + dt = high.getSymbol().getDataType(); + if (dt instanceof TypeDef) { + dt = ((TypeDef) dt).getBaseDataType(); + } + return (dt == unionDt) ? dt : null; + } + + private void determineFacet(ClangToken tokenAtCursor) { + accessOp = tokenAtCursor.getPcodeOp(); + int opcode = accessOp.getOpcode(); + if (opcode == PcodeOp.PTRSUB) { + parentDt = typeIsUnionRelated(accessOp.getInput(0)); + if (accessOp.getInput(1).getOffset() == 0) { // Artificial op + accessVn = accessOp.getOutput(); + accessOp = accessVn.getLoneDescend(); + if (accessOp == null) { + return; + } + accessSlot = accessOp.getSlot(accessVn); + } + else { + accessVn = accessOp.getInput(0); + accessSlot = 0; + } + } + else { + for (accessSlot = 0; accessSlot < accessOp.getNumInputs(); ++accessSlot) { + accessVn = accessOp.getInput(accessSlot); + parentDt = typeIsUnionRelated(accessVn); + if (parentDt != null) { + break; + } + } + if (accessSlot >= accessOp.getNumInputs()) { + accessSlot = -1; + accessVn = accessOp.getOutput(); + parentDt = typeIsUnionRelated(accessVn); + if (parentDt == null) { + accessOp = null; + return; // Give up, could not find type associated with field + } + } + if (opcode == PcodeOp.SUBPIECE && accessSlot == 0 && !(parentDt instanceof Pointer)) { + // SUBPIECE acts directly as resolution operator + // Choose field based on output varnode, even though it isn't the union data-type + accessSlot = -1; + accessVn = accessOp.getOutput(); + } + } + } + + private String[] buildFieldOptions(ArrayList allFields, int size) { + DataTypeComponent[] components = unionDt.getDefinedComponents(); + ArrayList res = new ArrayList<>(); + allFields.add("(no field)"); + if (size == 0 || unionDt.getLength() == size) { + res.add("(no field)"); + } + for (DataTypeComponent component : components) { + String nm = component.getFieldName(); + allFields.add(nm); + if (size == 0 || component.getDataType().getLength() == size) { + res.add(nm); + } + } + String[] resArray = new String[res.size()]; + res.toArray(resArray); + return resArray; + } + + private static int findStringIndex(ArrayList list, String value) { + for (int i = 0; i < list.size(); ++i) { + if (list.get(i).equals(value)) { + return i; + } + } + return -1; + } + + private boolean selectFieldNumber(String defaultFieldName) { + int size = 0; + if (!(parentDt instanceof Pointer)) { + size = accessVn.getSize(); + } + ArrayList allFields = new ArrayList<>(); + String[] choices = buildFieldOptions(allFields, size); + if (choices.length < 2) { // If only one field fits the Varnode + OkDialog.show("No Field Choices", "Only one field fits the selected variable"); + return false; + } + int currentChoice = findStringIndex(allFields, defaultFieldName); + if (currentChoice < 0) { + defaultFieldName = null; + } + String userChoice = OptionDialog.showInputChoiceDialog(null, + "Select Field for " + unionDt.getName(), "Field for " + unionDt.getName() + ": ", + choices, defaultFieldName, OptionDialog.PLAIN_MESSAGE); + if (userChoice == null) { + return false; // User cancelled when making the choice + } + fieldNumber = findStringIndex(allFields, userChoice); + if (fieldNumber < 0 || fieldNumber == currentChoice) { + return false; // User chose original value or something not in list, treat as cancel + } + fieldNumber -= 1; // Convert choice index to field number + return true; + } + + @Override + protected void decompilerActionPerformed(DecompilerActionContext context) { + Program program = context.getProgram(); + ClangToken tokenAtCursor = context.getTokenAtCursor(); + HighFunction highFunction = context.getHighFunction(); + unionDt = (Union) getCompositeDataType(tokenAtCursor); + determineFacet(tokenAtCursor); + if (accessOp == null || accessVn == null) { + Msg.showError(this, null, "Force Union failed", "Could not recover p-code op"); + return; + } + if (!selectFieldNumber(tokenAtCursor.getText())) { + return; // User cancelled or no options to choose from + } + Function function = highFunction.getFunction(); + DynamicHash dhash = new DynamicHash(accessOp, accessSlot, highFunction); + pcAddr = dhash.getAddress(); + if (pcAddr == Address.NO_ADDRESS) { + Msg.showError(this, null, "Force Union failed", "Unable to find a unique hash"); + } + int transaction = program.startTransaction("Force Union"); + try { + HighFunctionDBUtil.writeUnionFacet(function, parentDt, fieldNumber, pcAddr, + dhash.getHash(), SourceType.USER_DEFINED); + } + catch (DuplicateNameException e) { + Msg.showError(this, null, "Force Union failed", e.getMessage()); + } + catch (InvalidInputException e) { + Msg.showError(this, null, "Force Union failed", e.getMessage()); + } + finally { + program.endTransaction(transaction, true); + } + } + +} diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameFieldAction.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameFieldAction.java index 30ae9c7da1..9cf97ca1b4 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameFieldAction.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameFieldAction.java @@ -24,6 +24,7 @@ import ghidra.app.decompiler.ClangToken; import ghidra.app.plugin.core.decompile.DecompilerActionContext; import ghidra.app.util.HelpTopics; import ghidra.framework.plugintool.PluginTool; +import ghidra.program.model.data.Composite; import ghidra.program.model.data.Structure; import ghidra.program.model.listing.Function; import ghidra.util.*; @@ -59,7 +60,7 @@ public class RenameFieldAction extends AbstractDecompilerAction { protected void decompilerActionPerformed(DecompilerActionContext context) { PluginTool tool = context.getTool(); ClangToken tokenAtCursor = context.getTokenAtCursor(); - Structure dt = getStructDataType(tokenAtCursor); + Composite dt = getCompositeDataType(tokenAtCursor); if (dt == null) { Msg.showError(this, tool.getToolFrame(), "Rename Failed", "Could not find structure datatype"); @@ -73,9 +74,15 @@ public class RenameFieldAction extends AbstractDecompilerAction { return; } - RenameStructureFieldTask nameTask = - new RenameStructureFieldTask(tool, context.getProgram(), context.getComponentProvider(), - tokenAtCursor, dt, offset); + RenameTask nameTask; + if (dt instanceof Structure) { + nameTask = new RenameStructFieldTask(tool, context.getProgram(), + context.getComponentProvider(), tokenAtCursor, (Structure) dt, offset); + } + else { + nameTask = new RenameUnionFieldTask(tool, context.getProgram(), + context.getComponentProvider(), tokenAtCursor, dt, offset); + } nameTask.runTask(true); } } diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructureFieldTask.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructFieldTask.java similarity index 94% rename from Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructureFieldTask.java rename to Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructFieldTask.java index df414a9e10..4c9538d304 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructureFieldTask.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructFieldTask.java @@ -24,12 +24,12 @@ import ghidra.util.Msg; import ghidra.util.exception.DuplicateNameException; import ghidra.util.exception.InvalidInputException; -public class RenameStructureFieldTask extends RenameTask { +public class RenameStructFieldTask extends RenameTask { private Structure structure; public int offset; - public RenameStructureFieldTask(PluginTool tool, Program program, DecompilerProvider provider, + public RenameStructFieldTask(PluginTool tool, Program program, DecompilerProvider provider, ClangToken token, Structure structure, int offset) { super(tool, program, provider, token, token.getText()); this.structure = structure; diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameUnionFieldTask.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameUnionFieldTask.java new file mode 100644 index 0000000000..c024cd1339 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameUnionFieldTask.java @@ -0,0 +1,65 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.decompile.actions; + +import ghidra.app.decompiler.ClangToken; +import ghidra.app.plugin.core.decompile.DecompilerProvider; +import ghidra.framework.plugintool.PluginTool; +import ghidra.program.model.data.Composite; +import ghidra.program.model.data.DataTypeComponent; +import ghidra.program.model.listing.Program; +import ghidra.util.exception.DuplicateNameException; +import ghidra.util.exception.InvalidInputException; + +public class RenameUnionFieldTask extends RenameTask { + private Composite composite; + private int ordinal; + + public RenameUnionFieldTask(PluginTool tool, Program program, DecompilerProvider provider, + ClangToken token, Composite composite, int ordinal) { + super(tool, program, provider, token, token.getText()); + this.composite = composite; + this.ordinal = ordinal; + } + + @Override + public void commit() throws DuplicateNameException, InvalidInputException { + DataTypeComponent dtc = composite.getComponent(ordinal); + dtc.setFieldName(newName); + } + + @Override + public String getTransactionName() { + return "Rename Union Field"; + } + + @Override + public boolean isValid(String newNm) { + newName = newNm; + DataTypeComponent[] comp = composite.getDefinedComponents(); + for (DataTypeComponent element : comp) { + String fieldname = element.getFieldName(); + if (fieldname == null) { + continue; + } + if (fieldname.equals(newName)) { + errorMsg = "Duplicate Field Name"; + return false; + } + } + return true; + } +} diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeFieldAction.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeFieldAction.java index 9c9df34fbd..64810b2da9 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeFieldAction.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeFieldAction.java @@ -20,15 +20,14 @@ import java.awt.event.KeyEvent; import docking.action.KeyBindingData; import docking.action.MenuData; -import docking.widgets.OptionDialog; import ghidra.app.decompiler.ClangFieldToken; import ghidra.app.decompiler.ClangToken; import ghidra.app.plugin.core.decompile.DecompilerActionContext; import ghidra.app.util.HelpTopics; import ghidra.program.model.data.*; import ghidra.program.model.listing.Function; -import ghidra.program.model.listing.Program; -import ghidra.util.*; +import ghidra.util.HelpLocation; +import ghidra.util.UndefinedFunction; /** * Action triggered from a specific token in the decompiler window to change the data-type of @@ -59,7 +58,7 @@ public class RetypeFieldAction extends AbstractDecompilerAction { return false; } if (tokenAtCursor instanceof ClangFieldToken) { - DataType dt = getStructDataType(tokenAtCursor); + DataType dt = getCompositeDataType(tokenAtCursor); return (dt != null); } return false; @@ -69,156 +68,16 @@ public class RetypeFieldAction extends AbstractDecompilerAction { protected void decompilerActionPerformed(DecompilerActionContext context) { ClangToken tokenAtCursor = context.getTokenAtCursor(); - Structure struct = getStructDataType(tokenAtCursor); - if (struct == null) { - Msg.showError(this, null, "Retype Failed", "Failed to retype structure field"); - return; + Composite composite = getCompositeDataType(tokenAtCursor); + RetypeFieldTask retypeTask; + if (composite instanceof Structure) { + retypeTask = new RetypeStructFieldTask(context.getTool(), context.getProgram(), + context.getComponentProvider(), tokenAtCursor, composite); } - - int offset = ((ClangFieldToken) tokenAtCursor).getOffset(); - if (offset < 0 || offset >= struct.getLength()) { - Msg.showError(this, null, "Retype Failed", - "Failed to retype structure field at offset " + offset + ": " + struct.getName()); - return; + else { + retypeTask = new RetypeUnionFieldTask(context.getTool(), context.getProgram(), + context.getComponentProvider(), tokenAtCursor, composite); } - - // get original component and datatype - structure may be packed so an offset which - // corresponds to padding byte may return null - DataTypeComponent comp = struct.getComponentContaining(offset); - if (comp != null && comp.getOffset() != offset) { - Msg.showError(this, null, "Retype Failed", - "Retype offset does not correspond to start of field"); - return; - } - - DataType originalType = comp != null ? comp.getDataType() : DataType.DEFAULT; - if (originalType instanceof BitFieldDataType) { - Msg.showError(this, null, "Retype Failed", - "Retype of defind bit-field is not supported."); - return; - } - - Program program = context.getProgram(); - DataType newType = chooseDataType(context.getTool(), program, originalType); - if (newType == null || newType.isEquivalent(originalType)) { - return; // cancelled - } - - // check for permitted datatype - if (newType instanceof FactoryDataType || newType.getLength() <= 0) { - Msg.showError(this, null, "Retype Failed", - "Failed to retype structure field '" + newType.getName() + - "' - data type is not allowed."); - return; - } - - replaceType(program, struct, offset, comp, originalType, newType); - } - - private void replaceType(Program program, Structure struct, int offset, DataTypeComponent comp, - DataType originalType, DataType newType) { - - int transaction = program.startTransaction("Retype Structure Field"); - try { - - DataTypeManager dtm = program.getDataTypeManager(); - newType = dtm.resolve(newType, null); - int newDtLength = newType.getLength(); - if (DataTypeComponent.usesZeroLengthComponent(newType)) { - Msg.showError(this, null, "Retype Failed", "Failed to retype structure field '" + - newType.getName() + "' - zero-length component is not allowed."); - return; - } - - String fieldName = null; - String comment = null; - int nextOffset; - if (comp == null) { - nextOffset = offset + 1; // assume padding offset within packed structure - } - else { - fieldName = comp.getFieldName(); - comment = comp.getComment(); - nextOffset = comp.getEndOffset() + 1; - } - - // we cannot replace a default type, since it is not a real data type - if (originalType != DataType.DEFAULT && - newDtLength == originalType.getLength()) { - // Perform simple 1-for-1 component replacement. This allows to avoid unpack in - // some cases. Assume component is not null since we have a non-default type. - struct.replace(comp.getOrdinal(), newType, -1, fieldName, comment); - return; - } - - // check for datatype fit - int available = nextOffset - offset; - if (newDtLength > available) { - DataTypeComponent nextComp = struct.getDefinedComponentAtOrAfterOffset(nextOffset); - int endOffset = nextComp == null ? struct.getLength() : nextComp.getOffset(); - available += endOffset - nextOffset; - if (newDtLength > available) { - Msg.showError(this, null, "Retype Failed", - "Failed to retype structure field in '" + struct.getName() + - "' - datatype will not fit"); - return; - } - } - - if (!verifyPacking(struct, offset, comp, newType)) { - return; - } - - // The replaceAtOffset will only replace component containing offset plus any - // subsequent DEFAULT components available. Space check is performed prior to any - // clearing. Zero-length components at offset will be ignored. - struct.replaceAtOffset(offset, newType, -1, fieldName, comment); - } - catch (IllegalArgumentException e) { - Msg.showError(this, null, "Retype Failed", - "Failed to retype structure field in '" + struct.getName() + "':" + e.getMessage(), - e); - } - finally { - program.endTransaction(transaction, true); - } - } - - private boolean verifyPacking(Structure struct, int offset, DataTypeComponent comp, - DataType dataType) { - - if (!struct.isPackingEnabled()) { - return true; - } - - if (isAlignmentMaintained(comp, dataType, offset)) { - return true; - } - - int choice = OptionDialog.showOptionDialogWithCancelAsDefaultButton(null, - "Disable Structure Packing", - "Containing structure currently has packing enabled. Packing will be " + - "disabled if you continue.", - "Continue", OptionDialog.WARNING_MESSAGE); - if (choice != OptionDialog.OPTION_ONE) { - return false; // cancelled - } - - // alignment is maintained for struct since we do not know the impact if we change it - int alignment = struct.getAlignment(); - struct.setPackingEnabled(false); - struct.setExplicitMinimumAlignment(alignment); // preserve previous alignment - return true; - } - - private boolean isAlignmentMaintained(DataTypeComponent comp, DataType dataType, int offset) { - if (comp == null) { - return false; - } - int align = comp.getDataType().getAlignment(); - if (align != dataType.getAlignment()) { - return false; - } - return (offset % align) == 0; + retypeTask.runTask(); } } diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeFieldTask.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeFieldTask.java new file mode 100644 index 0000000000..08b6891e4b --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeFieldTask.java @@ -0,0 +1,99 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.decompile.actions; + +import ghidra.app.decompiler.ClangToken; +import ghidra.app.plugin.core.decompile.DecompilerProvider; +import ghidra.framework.plugintool.PluginTool; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.Program; +import ghidra.util.Msg; + +public abstract class RetypeFieldTask { + protected Composite composite; + protected DataType newType = null; + protected DataType oldType; + protected String errorMsg = null; // Error to return if isValid returns false + protected PluginTool tool; + protected Program program; + protected DecompilerProvider provider; + protected ClangToken tokenAtCursor; + + public RetypeFieldTask(PluginTool tool, Program program, DecompilerProvider provider, + ClangToken token, Composite composite) { + this.tool = tool; + this.program = program; + this.provider = provider; + this.tokenAtCursor = token; + this.composite = composite; + } + + /** + * @return the name to associate with the data-base transaction that actually changes the data-type + */ + public abstract String getTransactionName(); + + /** + * Check if the selected field is valid for retyping. + * If there is a problem, the errorMsg is populated and false is returned. + * @return true if the field is valid + */ + public abstract boolean isValidBefore(); + + /** + * Given a new data-type chosen by the user, check if the retype can proceed. + * If there is a problem, the errorMsg is populated and false is returned. + * @return true if the retype can proceed + */ + public abstract boolean isValidAfter(); + + /** + * Assuming the transaction is started, do the work of changing the data-type. + * @throws IllegalArgumentException if there is a final error committing the data-type + */ + public abstract void commit() throws IllegalArgumentException; + + public void runTask() { + if (!isValidBefore()) { + Msg.showError(this, null, "Retype Failed", errorMsg); + return; + } + newType = AbstractDecompilerAction.chooseDataType(tool, program, oldType); + if (newType == null || newType.isEquivalent(oldType)) { + return; // cancelled + } + + int transaction = program.startTransaction(getTransactionName()); + try { + DataTypeManager dtm = program.getDataTypeManager(); + newType = dtm.resolve(newType, null); + if (!isValidAfter()) { + Msg.showError(this, null, "Retype Failed", + "Cannot retype field in '" + composite.getName() + "': " + errorMsg); + return; + } + commit(); + } + catch (IllegalArgumentException e) { + Msg.showError(this, null, "Retype Failed", + "Failed to retype field in '" + composite.getName() + "': " + e.getMessage(), e); + } + finally { + program.endTransaction(transaction, true); + } + + } +} diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeStructFieldTask.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeStructFieldTask.java new file mode 100644 index 0000000000..01c2abf20c --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeStructFieldTask.java @@ -0,0 +1,179 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.decompile.actions; + +import docking.widgets.OptionDialog; +import ghidra.app.decompiler.ClangFieldToken; +import ghidra.app.decompiler.ClangToken; +import ghidra.app.plugin.core.decompile.DecompilerProvider; +import ghidra.framework.plugintool.PluginTool; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.Program; + +public class RetypeStructFieldTask extends RetypeFieldTask { + + private DataTypeComponent component; + private int offset; + private boolean disablePacking; + + public RetypeStructFieldTask(PluginTool tool, Program program, DecompilerProvider provider, + ClangToken token, Composite composite) { + super(tool, program, provider, token, composite); + disablePacking = false; + } + + @Override + public String getTransactionName() { + return "Retype Structure Field"; + } + + @Override + public boolean isValidBefore() { + if (!(composite instanceof Structure)) { + errorMsg = "Could not identify structure at cursor"; + return false; + } + + Structure struct = (Structure) composite; + offset = ((ClangFieldToken) tokenAtCursor).getOffset(); + if (offset < 0 || offset >= struct.getLength()) { + errorMsg = "Bad offset (" + offset + ") specified"; + return false; + } + + // get original component and datatype - structure may be packed so an offset which + // corresponds to padding byte may return null + component = struct.getComponentContaining(offset); + if (component != null && component.getOffset() != offset) { + errorMsg = "Offset does not correspond to start of field"; + return false; + } + + oldType = component != null ? component.getDataType() : DataType.DEFAULT; + if (oldType instanceof BitFieldDataType) { + errorMsg = "Retype of defined bit-field is not supported."; + return false; + } + return true; + } + + @Override + public boolean isValidAfter() { + int newDtLength = newType.getLength(); + // check for permitted datatype + if (newType instanceof FactoryDataType || newDtLength <= 0) { + errorMsg = "Field of type '" + newType.getName() + "' - is not allowed."; + return false; + } + if (DataTypeComponent.usesZeroLengthComponent(newType)) { + errorMsg = "Zero-length component is not allowed."; + return false; + } + if (oldType == DataType.DEFAULT || newDtLength == oldType.getLength()) { + return true; + } + + // check for datatype fit + int nextOffset; + if (component == null) { + nextOffset = offset + 1; // assume padding offset within packed structure + } + else { + nextOffset = component.getEndOffset() + 1; + } + int available = nextOffset - offset; + if (newDtLength > available) { + Structure struct = (Structure) composite; + DataTypeComponent nextComp = struct.getDefinedComponentAtOrAfterOffset(nextOffset); + int endOffset = nextComp == null ? struct.getLength() : nextComp.getOffset(); + available += endOffset - nextOffset; + if (newDtLength > available) { + errorMsg = "Datatype will not fit"; + return false; + } + } + if (!verifyPacking()) { + return false; + } + + return true; + } + + @Override + public void commit() throws IllegalArgumentException { + Structure struct = (Structure) composite; + + String fieldName = null; + String comment = null; + if (component != null) { + fieldName = component.getFieldName(); + comment = component.getComment(); + } + + // we cannot replace a default type, since it is not a real data type + if (oldType != DataType.DEFAULT && newType.getLength() == oldType.getLength()) { + // Perform simple 1-for-1 component replacement. This allows to avoid unpack in + // some cases. Assume component is not null since we have a non-default type. + struct.replace(component.getOrdinal(), newType, -1, fieldName, comment); + return; + } + + if (disablePacking) { // User has decided to disable packing for the structure + // alignment is maintained for struct since we do not know the impact if we change it + int alignment = struct.getAlignment(); + struct.setPackingEnabled(false); + struct.setExplicitMinimumAlignment(alignment); // preserve previous alignment + } + + // The replaceAtOffset will only replace component containing offset plus any + // subsequent DEFAULT components available. Space check is performed prior to any + // clearing. Zero-length components at offset will be ignored. + struct.replaceAtOffset(offset, newType, -1, fieldName, comment); + } + + private boolean verifyPacking() { + Structure struct = (Structure) composite; + if (!struct.isPackingEnabled()) { + return true; + } + + if (isAlignmentMaintained()) { + return true; + } + + int choice = OptionDialog.showOptionDialogWithCancelAsDefaultButton(null, + "Disable Structure Packing", + "Containing structure currently has packing enabled. Packing will be " + + "disabled if you continue.", + "Continue", OptionDialog.WARNING_MESSAGE); + if (choice != OptionDialog.OPTION_ONE) { + return false; + } + disablePacking = true; + return true; + } + + private boolean isAlignmentMaintained() { + if (component == null) { + return false; + } + int align = component.getDataType().getAlignment(); + if (align != newType.getAlignment()) { + return false; + } + return (offset % align) == 0; + } +} diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeUnionFieldTask.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeUnionFieldTask.java new file mode 100644 index 0000000000..497dbb0b78 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RetypeUnionFieldTask.java @@ -0,0 +1,112 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.decompile.actions; + +import docking.widgets.OptionDialog; +import ghidra.app.decompiler.ClangFieldToken; +import ghidra.app.decompiler.ClangToken; +import ghidra.app.plugin.core.decompile.DecompilerProvider; +import ghidra.framework.plugintool.PluginTool; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.Program; + +public class RetypeUnionFieldTask extends RetypeFieldTask { + + private DataTypeComponent component; + private int ordinal; + + public RetypeUnionFieldTask(PluginTool tool, Program program, DecompilerProvider provider, + ClangToken token, Composite composite) { + super(tool, program, provider, token, composite); + } + + @Override + public String getTransactionName() { + return "Retype Union Field"; + } + + @Override + public boolean isValidBefore() { + if (!(composite instanceof Union)) { + errorMsg = "Could not identify union at cursor"; + return false; + } + + ordinal = ((ClangFieldToken) tokenAtCursor).getOffset(); + component = composite.getComponent(ordinal); + if (component == null) { + errorMsg = "Could not identify component of " + composite.getName(); + return false; + } + oldType = component.getDataType(); + if (oldType instanceof BitFieldDataType) { + errorMsg = "Retype of defined bit-field is not supported."; + return false; + } + return true; + } + + /** + * @return true if the new field data-type will cause the size of the union to change + */ + private boolean hasSizeChange() { + int newTypeLength = newType.getLength(); + if (newTypeLength == composite.getLength()) { + return false; + } + if (newType.getLength() < composite.getLength()) { + DataTypeComponent[] components = composite.getDefinedComponents(); + for (DataTypeComponent dtc : components) { + if (dtc.getOffset() + dtc.getLength() > newTypeLength) { + return false; + } + } + } + return true; + } + + @Override + public boolean isValidAfter() { + // check for permitted datatype + if (newType instanceof FactoryDataType || newType.getLength() <= 0) { + errorMsg = "Field of type '" + newType.getName() + "' - is not allowed."; + return false; + } + if (hasSizeChange()) { + int choice = OptionDialog.showOptionDialogWithCancelAsDefaultButton(null, + "Increase the size of the union", + "The size of the containing union will be changed if you continue.", "Continue", + OptionDialog.WARNING_MESSAGE); + if (choice != OptionDialog.OPTION_ONE) { + return false; + } + } + return true; + } + + @Override + public void commit() throws IllegalArgumentException { + String fieldName = null; + String comment = null; + if (component != null) { + fieldName = component.getFieldName(); + comment = component.getComment(); + } + composite.delete(ordinal); + composite.insert(ordinal, newType, -1, fieldName, comment); + } + +} diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/util/DataTypeDependencyOrderer.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/util/DataTypeDependencyOrderer.java index 24ce98c5bd..6549fe5cca 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/util/DataTypeDependencyOrderer.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/util/DataTypeDependencyOrderer.java @@ -50,7 +50,7 @@ public class DataTypeDependencyOrderer { private HashSet procSet = new HashSet<>(); private HashSet doneSet = new HashSet<>(); - private ArrayList structList = new ArrayList<>(); + private ArrayList compositeList = new ArrayList<>(); private ArrayList orderedDependentsList = new ArrayList<>(); private HashMap> whoIDependOn = new HashMap<>(); @@ -142,34 +142,32 @@ public class DataTypeDependencyOrderer { /** * This method returns two lists: - * 1) is the set of structs. Intended for outputting zero-sized definitions. - * 2) is the acyclic dependency list (broken at structs and pointers to structs) + * 1) is the set of structs/unions. Intended for outputting zero-sized definitions. + * 2) is the acyclic dependency list (broken at composites and pointers to composites) * This works (and the dependency graph is able to be broken of cycles) because - * structures can be given zero size to start with and then later updated with full size. - * @return pair of arrayLists--one of structs and one complete list of dependents + * composites can be given zero size to start with and then later updated with full size. + * @return pair of arrayLists--one of composites and one complete list of dependents */ public Pair, ArrayList> getAcyclicDependencyLists() { if (processed == false) { processDependencyLists(); } - return new Pair<>(structList, orderedDependentsList); + return new Pair<>(compositeList, orderedDependentsList); } /** - * This method returns the ArrayList of structs - * to structs found in the input list, intended - * to be used initially as zero-sized structures. - * @return An arrayList of structs + * This method returns the ArrayList of structs/unions + * @return An arrayList of Composite */ - public ArrayList getStructList() { + public ArrayList getCompositeList() { if (processed == false) { processDependencyLists(); } - return structList; + return compositeList; } /** - * This returns the acyclic dependency list (broken at structs and pointers to structs) + * This returns the acyclic dependency list (broken at composites and pointers to composites) * @return An ArrayList of dependents. */ public ArrayList getDependencyList() { @@ -225,7 +223,7 @@ public class DataTypeDependencyOrderer { } catch (Exception e) { //If exception, return a basic list of inputs. - structList.clear(); + compositeList.clear(); orderedDependentsList.clear(); for (Entry entry : inputSet) { orderedDependentsList.add(entry.dataType); @@ -239,7 +237,7 @@ public class DataTypeDependencyOrderer { whoDependsOnMe.clear(); whoIDependOn.clear(); noDependentsQueue.clear(); - structList.clear(); + compositeList.clear(); orderedDependentsList.clear(); procSet.clear(); procSet.addAll(inputSet); @@ -314,9 +312,9 @@ public class DataTypeDependencyOrderer { //Msg.debug(this, "ORDERED_LIST_SIZE: " + orderedDependentsList.size() + " -- TYPE: " + // dataType.getName()); orderedDependentsList.add(entry.dataType); - //dependency stack of struct for which zero-sized structs should first be used. - if (entry.dataType instanceof Structure) { - structList.add(entry.dataType); + //dependency stack of struct/union for which zero-sized placeholders should be emitted first + if (entry.dataType instanceof Composite) { + compositeList.add(entry.dataType); } removeMyDependentsEdgesToMe(entry); } @@ -340,7 +338,7 @@ public class DataTypeDependencyOrderer { procSet.add(subEntry); } if (entry.dataType instanceof Pointer) { //avoid cycles with structures/composites - if (subType instanceof Structure) { + if (subType instanceof Composite) { return; } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java index c8c2bc0a7c..7d75b64ded 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java @@ -43,46 +43,32 @@ public class DynamicHash { public final static int transtable[] = { 0, PcodeOp.COPY, PcodeOp.LOAD, PcodeOp.STORE, PcodeOp.BRANCH, PcodeOp.CBRANCH, PcodeOp.BRANCHIND, - PcodeOp.CALL, - PcodeOp.CALLIND, - PcodeOp.CALLOTHER, - PcodeOp.RETURN, + PcodeOp.CALL, PcodeOp.CALLIND, PcodeOp.CALLOTHER, PcodeOp.RETURN, - PcodeOp.INT_EQUAL, - PcodeOp.INT_EQUAL, // NOT_EQUAL hashes same as EQUAL - PcodeOp.INT_SLESS, - PcodeOp.INT_SLESS, // SLESSEQUAL hashes same as SLESS - PcodeOp.INT_LESS, - PcodeOp.INT_LESS, // LESSEQUAL hashes same as LESS + PcodeOp.INT_EQUAL, PcodeOp.INT_EQUAL, // NOT_EQUAL hashes same as EQUAL + PcodeOp.INT_SLESS, PcodeOp.INT_SLESS, // SLESSEQUAL hashes same as SLESS + PcodeOp.INT_LESS, PcodeOp.INT_LESS, // LESSEQUAL hashes same as LESS - PcodeOp.INT_ZEXT, - PcodeOp.INT_SEXT, - PcodeOp.INT_ADD, - PcodeOp.INT_ADD, // SUB hashes same as ADD + PcodeOp.INT_ZEXT, PcodeOp.INT_SEXT, PcodeOp.INT_ADD, PcodeOp.INT_ADD, // SUB hashes same as ADD PcodeOp.INT_CARRY, PcodeOp.INT_SCARRY, PcodeOp.INT_SBORROW, PcodeOp.INT_2COMP, PcodeOp.INT_NEGATE, - PcodeOp.INT_XOR, PcodeOp.INT_AND, - PcodeOp.INT_OR, - PcodeOp.INT_MULT, // LEFT hashes same as MULT + PcodeOp.INT_XOR, PcodeOp.INT_AND, PcodeOp.INT_OR, PcodeOp.INT_MULT, // LEFT hashes same as MULT PcodeOp.INT_RIGHT, PcodeOp.INT_SRIGHT, PcodeOp.INT_MULT, PcodeOp.INT_DIV, PcodeOp.INT_SDIV, PcodeOp.INT_REM, PcodeOp.INT_SREM, PcodeOp.BOOL_NEGATE, PcodeOp.BOOL_XOR, PcodeOp.BOOL_AND, PcodeOp.BOOL_OR, - PcodeOp.FLOAT_EQUAL, - PcodeOp.FLOAT_EQUAL, // NOTEQUAL hashes same as EQUAL - PcodeOp.FLOAT_LESS, - PcodeOp.FLOAT_LESS, // LESSEQUAL hashes same as EQUAL + PcodeOp.FLOAT_EQUAL, PcodeOp.FLOAT_EQUAL, // NOTEQUAL hashes same as EQUAL + PcodeOp.FLOAT_LESS, PcodeOp.FLOAT_LESS, // LESSEQUAL hashes same as EQUAL 0, // Unused slot -- skip PcodeOp.FLOAT_NAN, - PcodeOp.FLOAT_ADD, PcodeOp.FLOAT_DIV, PcodeOp.FLOAT_MULT, - PcodeOp.FLOAT_ADD, // SUB hashes same as ADD + PcodeOp.FLOAT_ADD, PcodeOp.FLOAT_DIV, PcodeOp.FLOAT_MULT, PcodeOp.FLOAT_ADD, // SUB hashes same as ADD PcodeOp.FLOAT_NEG, PcodeOp.FLOAT_ABS, PcodeOp.FLOAT_SQRT, - PcodeOp.FLOAT_INT2FLOAT, PcodeOp.FLOAT_FLOAT2FLOAT, PcodeOp.FLOAT_TRUNC, - PcodeOp.FLOAT_CEIL, PcodeOp.FLOAT_FLOOR, PcodeOp.FLOAT_ROUND, + PcodeOp.FLOAT_INT2FLOAT, PcodeOp.FLOAT_FLOAT2FLOAT, PcodeOp.FLOAT_TRUNC, PcodeOp.FLOAT_CEIL, + PcodeOp.FLOAT_FLOOR, PcodeOp.FLOAT_ROUND, PcodeOp.MULTIEQUAL, PcodeOp.INDIRECT, PcodeOp.PIECE, PcodeOp.SUBPIECE, @@ -162,10 +148,10 @@ public class DynamicHash { private long hash; private DynamicHash() { - markop = new ArrayList(); - markvn = new ArrayList(); - vnedge = new ArrayList(); - opedge = new ArrayList(); + markop = new ArrayList<>(); + markvn = new ArrayList<>(); + vnedge = new ArrayList<>(); + opedge = new ArrayList<>(); } /** @@ -181,7 +167,7 @@ public class DynamicHash { /** * Construct a unique hash for the given Varnode, which must be in - * a syntax tree. The hash method is cycled into a uniquely identifying one is found. + * a syntax tree. The hash method is cycled until a uniquely identifying one is found. * @param root is the given Varnode * @param fd is the PcodeSyntaxTree containing the Varnode */ @@ -190,6 +176,18 @@ public class DynamicHash { uniqueHash(root, fd); } + /** + * Construct a unique hash that allows recovery of a specific PcodeOp and slot from the + * syntax tree. The hash method is cycled until a uniquely identifying one is found. + * @param op is the specific PcodeOp to hash + * @param slot is the specific slot (-1 is the output, >=0 is an input) + * @param fd is the PcodeSyntaxTree containing the PcodeOp + */ + public DynamicHash(PcodeOp op, int slot, PcodeSyntaxTree fd) { + this(); + uniqueHash(op, slot, fd); + } + /** * Construct a level 0 hash on the input Varnode to the given PcodeOp * @@ -223,11 +221,47 @@ public class DynamicHash { opedge.clear(); } + /** + * Encode a particular PcodeOp and slot + * @param op is the PcodeOp to preserve + * @param slot is the slot to preserve (-1 for output, >=0 for input) + * @param method is the method to use for encoding (4, 5, or 6) + */ + private void calcHash(PcodeOp op, int slot, int method) { + vnproc = 0; + opproc = 0; + opedgeproc = 0; + markset = new HashSet<>(); + Varnode root = (slot < 0) ? op.getOutput() : op.getInput(slot); + opedge.add(new ToOpEdge(op, slot)); + switch (method) { + case 4: + break; + case 5: + gatherUnmarkedOp(); + for (; opproc < markop.size(); ++opproc) { + buildOpUp(markop.get(opproc)); + } + gatherUnmarkedVn(); + break; + case 6: + gatherUnmarkedOp(); + for (; opproc < markop.size(); ++opproc) { + buildOpDown(markop.get(opproc)); + } + gatherUnmarkedVn(); + break; + default: + break; + } + pieceTogetherHash(root, method); + } + private void calcHash(Varnode root, int method) { vnproc = 0; opproc = 0; opedgeproc = 0; - markset = new HashSet(); + markset = new HashSet<>(); vnedge.add(root); gatherUnmarkedVn(); @@ -278,6 +312,10 @@ public class DynamicHash { break; } + pieceTogetherHash(root, method); + } + + private void pieceTogetherHash(Varnode root, int method) { if (opedge.size() == 0) { hash = 0; addrresult = null; @@ -296,8 +334,8 @@ public class DynamicHash { } } - for (int i = 0; i < opedge.size(); ++i) { - reg = opedge.get(i).hash(reg); + for (ToOpEdge element : opedge) { + reg = element.hash(reg); } // Build the final 64-bit hash @@ -335,10 +373,74 @@ public class DynamicHash { addrresult = op.getSeqnum().getTarget(); } + private void uniqueHash(PcodeOp op, int slot, PcodeSyntaxTree fd) { + ArrayList oplist = new ArrayList<>(); + ArrayList oplist2 = new ArrayList<>(); + ArrayList champion = new ArrayList<>(); + int method; + long tmphash = 0; + Address tmpaddr = null; + int maxduplicates = 8; + + gatherOpsAtAddress(oplist, fd, op.getSeqnum().getTarget()); + for (method = 4; method < 7; ++method) { + clear(); + calcHash(op, slot, method); + if (hash == 0) { + return; // Can't get a good hash + } + tmphash = hash; + tmpaddr = addrresult; + oplist2.clear(); + for (PcodeOp tmpop : oplist) { + if (slot >= tmpop.getNumInputs()) { + continue; + } + clear(); + calcHash(tmpop, slot, method); + if (hash == tmphash) { // Hash collision + oplist2.add(tmpop); + if (oplist2.size() > maxduplicates) { + break; + } + } + } + if (oplist2.size() <= maxduplicates) { + if ((champion.size() == 0) || (oplist2.size() < champion.size())) { + champion = oplist2; + oplist2 = new ArrayList<>(); + if (champion.size() == 1) { + break; // Current hash is unique + } + } + } + } + if (champion.size() == 0) { + hash = 0; + addrresult = Address.NO_ADDRESS; // Couldn't find a unique hash + return; + } + int total = champion.size() - 1; // total is in range [0,maxduplicates-1] + int pos; + for (pos = 0; pos <= total; ++pos) { + if (champion.get(pos) == op) { + break; + } + } + if (pos > total) { + hash = 0; + addrresult = Address.NO_ADDRESS; + return; + } + hash = tmphash | ((long) pos << 49); // Store three bits for position with list of duplicate hashes + hash |= ((long) total << 52); // Store three bits for total number of duplicate hashes + addrresult = tmpaddr; + } + private void uniqueHash(Varnode root, PcodeSyntaxTree fd) { - ArrayList vnlist = new ArrayList(); - ArrayList vnlist2 = new ArrayList(); - ArrayList champion = new ArrayList(); + ArrayList vnlist = new ArrayList<>(); + ArrayList vnlist2 = new ArrayList<>(); + ArrayList champion = new ArrayList<>(); int method; long tmphash = 0; Address tmpaddr = null; @@ -369,7 +471,7 @@ public class DynamicHash { if (vnlist2.size() <= maxduplicates) { if ((champion.size() == 0) || (vnlist2.size() < champion.size())) { champion = vnlist2; - vnlist2 = new ArrayList(); + vnlist2 = new ArrayList<>(); if (champion.size() == 1) { break; // Current hash is unique } @@ -420,7 +522,7 @@ public class DynamicHash { return; // no descendants } - ArrayList newedge = new ArrayList(); + ArrayList newedge = new ArrayList<>(); while (iter.hasNext()) { PcodeOp op = iter.next(); @@ -491,8 +593,8 @@ public class DynamicHash { int total = getTotalFromHash(h); int pos = getPositionFromHash(h); h = clearTotalPosition(h); - ArrayList vnlist = new ArrayList(); - ArrayList vnlist2 = new ArrayList(); + ArrayList vnlist = new ArrayList<>(); + ArrayList vnlist2 = new ArrayList<>(); gatherFirstLevelVars(vnlist, fd, addr, h); for (int i = 0; i < vnlist.size(); ++i) { Varnode tmpvn = vnlist.get(i); @@ -508,6 +610,39 @@ public class DynamicHash { return vnlist2.get(pos); } + public static PcodeOp findOp(PcodeSyntaxTree fd, Address addr, long h) { + DynamicHash dhash = new DynamicHash(); + int method = getMethodFromHash(h); + int slot = getSlotFromHash(h); + int total = getTotalFromHash(h); + int pos = getPositionFromHash(h); + h = clearTotalPosition(h); + ArrayList oplist = new ArrayList<>(); + ArrayList oplist2 = new ArrayList<>(); + gatherOpsAtAddress(oplist, fd, addr); + for (PcodeOp tmpop : oplist) { + if (slot >= tmpop.getNumInputs()) + continue; + dhash.clear(); + dhash.calcHash(tmpop, slot, method); + if (dhash.getHash() == h) { + oplist2.add(tmpop); + } + } + if (total != oplist2.size()) { + return null; + } + return oplist2.get(pos); + } + + public static void gatherOpsAtAddress(ArrayList oplist, PcodeSyntaxTree fd, + Address addr) { + Iterator iter = fd.getPcodeOps(addr); + while (iter.hasNext()) { + oplist.add(iter.next()); + } + } + public static void gatherFirstLevelVars(ArrayList varlist, PcodeSyntaxTree fd, Address addr, long h) { int opc = getOpCodeFromHash(h); @@ -615,7 +750,7 @@ public class DynamicHash { * @param value of the constant * @return array of hash values (may be zero length) */ - public static long[] calcConstantHash(Instruction instr,long value) { + public static long[] calcConstantHash(Instruction instr, long value) { long[] tmp = new long[2]; int count = 0; for (PcodeOp op : instr.getPcode(true)) { @@ -625,12 +760,12 @@ public class DynamicHash { matchWithPossibleExtension(inputs[i].getOffset(), inputs[i].getSize(), value)) { if (count >= tmp.length) { long[] newtmp = new long[count + 10]; - for(int j=0;j 3) { + int fieldNum = UnionFacetSymbol.extractFieldNumber(nm); + sym = new UnionFacetSymbol(id, nm, dt, fieldNum, func); + } + else { + sym = new HighSymbol(id, nm, dt, func); + } DynamicEntry entry = new DynamicEntry(sym, pcaddr, hash); sym.addMapEntry(entry); insertSymbol(sym); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java index 07ff801485..861abe0ae7 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java @@ -577,6 +577,9 @@ public class PcodeDataTypeManager { resBuf.append("\n"); } } + else if (type instanceof Union) { + buildUnion(resBuf, (Union) type); + } else if (type instanceof Enum) { appendNameIdAttributes(resBuf, type); Enum enumDt = (Enum) type; @@ -746,18 +749,51 @@ public class PcodeDataTypeManager { * * @return XML string document */ - public StringBuilder buildStructTypeZeroSizeOveride(DataType type) { + public StringBuilder buildCompositeZeroSizePlaceholder(DataType type) { StringBuilder resBuf = new StringBuilder(); - if (!(type instanceof Structure)) { + String metaString; + if (type instanceof Structure) { + metaString = "struct"; + } + else if (type instanceof Union) { + metaString = "union"; + } + else { return resBuf; //empty. Could throw AssertException. } resBuf.append(""); + resBuf.append(" id=\"0x" + Long.toHexString(progDataTypes.getID(type)) + "\" metatype=\""); + resBuf.append(metaString); + resBuf.append("\" size=\"0\">"); return resBuf; } + public void buildUnion(StringBuilder buffer, Union unionType) { + appendNameIdAttributes(buffer, unionType); + SpecXmlUtils.encodeStringAttribute(buffer, "metatype", "union"); + SpecXmlUtils.encodeSignedIntegerAttribute(buffer, "size", unionType.getLength()); + buffer.append(">\n"); + DataTypeComponent[] comps = unionType.getDefinedComponents(); + for (DataTypeComponent comp : comps) { + if (comp.getLength() == 0) { + continue; + } + buffer.append("'); + DataType fieldtype = comp.getDataType(); + buildTypeRef(buffer, fieldtype, comp.getLength()); + buffer.append("\n"); + } + } + private void generateCoreTypes() { voidDt = new VoidDataType(progDataTypes); ArrayList typeList = new ArrayList<>(); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/UnionFacetSymbol.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/UnionFacetSymbol.java new file mode 100644 index 0000000000..7ffb9eae3e --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/UnionFacetSymbol.java @@ -0,0 +1,80 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.program.model.pcode; + +import ghidra.program.model.address.Address; +import ghidra.program.model.data.DataType; +import ghidra.util.xml.SpecXmlUtils; + +/** + * A specialized HighSymbol that directs the decompiler to use a specific field of a union, + * when interpreting a particular PcodeOp that accesses a Varnode whose data-type involves the + * union. The symbol is stored as a dynamic variable annotation. The data-type must either be the + * union itself or a pointer to the union. The firstUseOffset and dynamic hash + * identify the particular PcodeOp and Varnode affected. The field number is the ordinal + * of the desired field (DataTypeComponent) within the union. It is currently stored by + * encoding it in the symbol name. + */ +public class UnionFacetSymbol extends HighSymbol { + public static String BASENAME = "unionfacet"; + private int fieldNumber; // Ordinal of field within union being selected + + public UnionFacetSymbol(long uniqueId, String nm, DataType dt, int fldNum, HighFunction func) { + super(uniqueId, nm, dt, func); + category = 2; + fieldNumber = fldNum; + } + + @Override + public void saveXML(StringBuilder buf) { + buf.append("\n"); + dtmanage.buildTypeRef(buf, type, getSize()); + buf.append("\n"); + } + + /** + * Generate an automatic symbol name, given a field number and address + * @param fldNum is the field number + * @param addr is the Address + * @return the name + */ + public static String buildSymbolName(int fldNum, Address addr) { + StringBuilder buffer = new StringBuilder(); + buffer.append(BASENAME).append(fldNum + 1).append('_'); + buffer.append(Long.toHexString(addr.getOffset())); + return buffer.toString(); + } + + /** + * The actual field number is encoded in the symbol name + * @param nm is the symbol name + * @return the field number or -1 if we cannot parse + */ + public static int extractFieldNumber(String nm) { + int pos = nm.indexOf(BASENAME); + if (pos < 0) { + return -1; + } + int endpos = nm.indexOf('_', pos); + if (endpos < 0) { + return -1; + } + return Integer.decode(nm.substring(pos + BASENAME.length(), endpos)) - 1; + } +}