From c9ba3640bf49d58d796dd1eaa16ab9836f83b938 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Thu, 9 Sep 2021 15:18:06 -0400 Subject: [PATCH] Expanded RulePushPtr and RuleExtensionPush --- .../Decompiler/certification.manifest | 3 +- .../src/decompile/cpp/coreaction.cc | 1 + .../src/decompile/cpp/ruleaction.cc | 315 +++++++++++++----- .../src/decompile/cpp/ruleaction.hh | 16 +- .../src/decompile/datatests/dupptr.xml | 133 ++++++++ 5 files changed, 377 insertions(+), 91 deletions(-) create mode 100644 Ghidra/Features/Decompiler/src/decompile/datatests/dupptr.xml diff --git a/Ghidra/Features/Decompiler/certification.manifest b/Ghidra/Features/Decompiler/certification.manifest index 8aea441f58..5e64a5bf6d 100644 --- a/Ghidra/Features/Decompiler/certification.manifest +++ b/Ghidra/Features/Decompiler/certification.manifest @@ -11,8 +11,9 @@ src/decompile/cpp/Doxyfile||GHIDRA|||Most of this file is autogenerated by doxyg src/decompile/cpp/Makefile||GHIDRA||||END| src/decompile/datatests/convert.xml||GHIDRA||||END| src/decompile/datatests/deadvolatile.xml||GHIDRA||||END| -src/decompile/datatests/elseif.xml||GHIDRA||||END| src/decompile/datatests/deindirect.xml||GHIDRA||||END| +src/decompile/datatests/dupptr.xml||GHIDRA||||END| +src/decompile/datatests/elseif.xml||GHIDRA||||END| src/decompile/datatests/floatprint.xml||GHIDRA||||END| src/decompile/datatests/forloop1.xml||GHIDRA||||END| src/decompile/datatests/forloop_loaditer.xml||GHIDRA||||END| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 1cfaff0947..53314be6e5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -5174,6 +5174,7 @@ void ActionDatabase::universalAction(Architecture *conf) actcleanup->addRule( new Rule2Comp2Sub("cleanup") ); actcleanup->addRule( new RuleSubRight("cleanup") ); actcleanup->addRule( new RulePtrsubCharConstant("cleanup") ); + actcleanup->addRule( new RuleExtensionPush("cleanup") ); } act->addAction( actcleanup ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 96d7193869..db6a604447 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -6098,74 +6098,81 @@ void AddTreeState::buildTree(void) data.opDestroy(baseOp); } -/// \brief Verify that given PcodeOp occurs at the bottom of the CPUI_INT_ADD tree -/// -/// The main RulePtrArith algorithm assumes that the pointer Varnode is at the bottom -/// of the expression tree that is adding an offset to the pointer. This routine -/// verifies this condition. -/// \param op is the given PcodeOp which is the putative last operation in the tree -/// \param slot is the slot of the pointer Varnode within the given PcodeOp -/// \return \b true if the pointer is at the bottom of the tree, \b false otherwise -bool RulePtrArith::verifyAddTreeBottom(PcodeOp *op,int4 slot) - -{ - Varnode *vn = op->getOut(); - Varnode *ptrbase = op->getIn(slot); - list::const_iterator iter=vn->beginDescend(); - OpCode opc; - if (iter == vn->endDescend()) return false; // Don't bother if no descendants - PcodeOp *lowerop = *iter++; - opc = lowerop->code(); - if (vn->isSpacebase()) // For the RESULT to be a spacebase pointer - if (iter!=vn->endDescend()) // It must have only 1 descendant - return false; - if (opc == CPUI_INT_ADD) // Check for lone descendant which is an ADD - if (iter==vn->endDescend()) - return false; // this is not bottom of add tree - if (ptrbase->isSpacebase() && (ptrbase->isInput()||(ptrbase->isConstant())) && - (op->getIn(1-slot)->isConstant())) { - // Look for ANY descendant which LOADs or STOREs off of vn - if ((opc==CPUI_LOAD)||(opc==CPUI_STORE)) { - if (lowerop->getIn(1) == vn) - return false; - } - while(iter!=vn->endDescend()) { - opc = (*iter)->code(); - if ((opc==CPUI_LOAD)||(opc==CPUI_STORE)) { - if ((*iter)->getIn(1) == vn) - return false; - } - ++iter; - } - } - return true; -} - /// \brief Test for other pointers in the ADD tree above the given op that might be a preferred base /// -/// This tests the condition of RulePushPtr, making sure that the given op isn't the lone descendant -/// of a pointer constructed by INT_ADD on another pointer (which would then be preferred). +/// This tests the condition of RulePushPtr on the node immediately above the given putative base pointer /// \param op is the given op -/// \param slot is the input slot of the pointer +/// \param slot is the input slot of the putative base pointer /// \return \b true if the indicated slot holds the preferred pointer bool RulePtrArith::verifyPreferredPointer(PcodeOp *op,int4 slot) { Varnode *vn = op->getIn(slot); - // Check if RulePushPtr would apply here - if (op->getIn(1-slot)->getType()->getMetatype() != TYPE_PTR && vn->isWritten()) { - PcodeOp *preOp = vn->getDef(); - if (preOp->code() == CPUI_INT_ADD) { - if (vn->loneDescend() == op) { - int ptrCount = 0; - if (preOp->getIn(0)->getType()->getMetatype() == TYPE_PTR) ptrCount += 1; - if (preOp->getIn(1)->getType()->getMetatype() == TYPE_PTR) ptrCount += 1; - if (ptrCount == 1) - return false; // RulePushPtr would apply, so we are not preferred - } + if (!vn->isWritten()) return true; + PcodeOp *preOp = vn->getDef(); + if (preOp->code() != CPUI_INT_ADD) return true; + int preslot = 0; + if (preOp->getIn(preslot)->getType()->getMetatype() != TYPE_PTR) { + preslot = 1; + if (preOp->getIn(preslot)->getType()->getMetatype() != TYPE_PTR) + return true; + } + return (1 != evaluatePointerExpression(preOp, preslot)); // Does earlier varnode look like the base pointer +} + +/// \brief Determine if the expression rooted at the given INT_ADD operation is ready for conversion +/// +/// Converting an expression of INT_ADDs into PTRSUBs and PTRADDs requires that the base pointer +/// be at the root of the expression tree. This method evaluates whether given root has the base +/// pointer at the bottom. If not, a \e push transform needs to be performed before RulePtrArith can apply. +/// This method returns a command code: +/// - 0 if no action should be taken, the expression is not fully linked or should not be converted +/// - 1 if a \e push action should be taken, prior to conversion +/// - 2 if the pointer arithmetic conversion can proceed +/// \param op is the given INT_ADD +/// \param slot is the index of the pointer +/// \return the command code +int4 RulePtrArith::evaluatePointerExpression(PcodeOp *op,int4 slot) + +{ + int4 res = 1; // Assume we are going to push + int4 count = 0; // Count descendants + Varnode *ptrBase = op->getIn(slot); + if (ptrBase->isFree() && !ptrBase->isConstant()) + return 0; + if (op->getIn(1 - slot)->getType()->getMetatype() == TYPE_PTR) + res = 2; + Varnode *outVn = op->getOut(); + list::const_iterator iter; + for(iter=outVn->beginDescend();iter!=outVn->endDescend();++iter) { + PcodeOp *decOp = *iter; + count += 1; + OpCode opc = decOp->code(); + if (opc == CPUI_INT_ADD) { + Varnode *otherVn = decOp->getIn(1 - decOp->getSlot(outVn)); + if (otherVn->isFree() && !otherVn->isConstant()) + return 0; // No action if the data-flow isn't fully linked + if (otherVn->getType()->getMetatype() == TYPE_PTR) + res = 2; // Do not push in the presence of other pointers + } + else if ((opc == CPUI_LOAD || opc == CPUI_STORE) && decOp->getIn(1) == outVn) { // If use is as pointer for LOAD or STORE + if (ptrBase->isSpacebase() && (ptrBase->isInput()||(ptrBase->isConstant())) && + (op->getIn(1-slot)->isConstant())) + return 0; + res = 2; + } + else { // Any other op besides ADD, do not push + res = 2; } } - return true; + if (count == 0) + return 0; + if (count > 1) { + if (outVn->isSpacebase()) + return 0; // For the RESULT to be a spacebase pointer it must have only 1 descendent +// res = 2; // Uncommenting this line will not let pointers get pushed to multiple descendants + } + return res; } /// \class RulePtrArith @@ -6206,7 +6213,7 @@ int4 RulePtrArith::applyOp(PcodeOp *op,Funcdata &data) if (ct->getMetatype() == TYPE_PTR) break; } if (slot == op->numInput()) return 0; - if (!verifyAddTreeBottom(op, slot)) return 0; + if (evaluatePointerExpression(op, slot) != 2) return 0; if (!verifyPreferredPointer(op, slot)) return 0; const TypePointer *tp = (const TypePointer *) ct; @@ -6298,6 +6305,82 @@ int4 RuleStructOffset0::applyOp(PcodeOp *op,Funcdata &data) return 1; } +/// \brief Build a duplicate of the given Varnode as an output of a PcodeOp, preserving the storage address if possible +/// +/// If the Varnode is already a \e unique or is \e addrtied +/// \param vn is the given Varnode +/// \param op is the PcodeOp to which the duplicate should be an output +/// \param data is the function to add the duplicate to +/// \return the duplicate Varnode +Varnode *RulePushPtr::buildVarnodeOut(Varnode *vn,PcodeOp *op,Funcdata &data) + +{ + if (vn->isAddrTied() || vn->getSpace()->getType() == IPTR_INTERNAL) + return data.newUniqueOut(vn->getSize(), op); + return data.newVarnodeOut(vn->getSize(), vn->getAddr(), op); +} + +/// \brief Generate list of PcodeOps that need to be duplicated as part of pushing the pointer +/// +/// If the pointer INT_ADD is duplicated as part of the push, some of the operations building +/// the offset to the pointer may also need to be duplicated. Identify these and add them +/// to the result list. +/// \param reslist is the result list to be populated +/// \param vn is the offset Varnode being added to the pointer +void RulePushPtr::collectDuplicateNeeds(vector &reslist,Varnode *vn) + +{ + for(;;) { + if (!vn->isWritten()) return; + if (vn->isAutoLive()) return; + if (vn->loneDescend() == (PcodeOp *)0) return; // Already has multiple descendants + PcodeOp *op = vn->getDef(); + OpCode opc = op->code(); + if (opc == CPUI_INT_ZEXT || opc == CPUI_INT_SEXT || opc == CPUI_INT_2COMP) + reslist.push_back(op); + else if (opc == CPUI_INT_MULT) { + if (op->getIn(1)->isConstant()) + reslist.push_back(op); + } + else + return; + vn = op->getIn(0); + } +} + +/// \brief Duplicate the given PcodeOp so that the outputs have only 1 descendant +/// +/// Run through the descendants of the PcodeOp output and create a duplicate +/// of the PcodeOp right before the descendant. We assume the PcodeOp either has +/// a single input, or has 2 inputs where the second is a constant. +/// The (original) PcodeOp is destroyed. +/// \param op is the given PcodeOp to duplicate +/// \param data is function to build duplicates in +void RulePushPtr::duplicateNeed(PcodeOp *op,Funcdata &data) + +{ + Varnode *outVn = op->getOut(); + Varnode *inVn = op->getIn(0); + int num = op->numInput(); + OpCode opc = op->code(); + list::const_iterator iter = outVn->beginDescend(); + do { + PcodeOp *decOp = *iter; + int4 slot = decOp->getSlot(outVn); + PcodeOp *newOp = data.newOp(num, op->getAddr()); // Duplicate op associated with original address + Varnode *newOut = buildVarnodeOut(outVn, newOp, data); // Result contained in original storage + newOut->updateType(outVn->getType(),false,false); + data.opSetOpcode(newOp, opc); + data.opSetInput(newOp, inVn, 0); + if (num > 1) + data.opSetInput(newOp, op->getIn(1), 1); + data.opSetInput(decOp, newOut, slot); + data.opInsertBefore(newOp, decOp); + iter = outVn->beginDescend(); + } while(iter != outVn->endDescend()); + data.opDestroy(op); +} + /// \class RulePushPtr /// \brief Push a Varnode with known pointer data-type to the bottom of its additive expression /// @@ -6312,47 +6395,51 @@ void RulePushPtr::getOpList(vector &oplist) const int4 RulePushPtr::applyOp(PcodeOp *op,Funcdata &data) { - int4 i,j; - PcodeOp *decop,*newop; - Varnode *vn; + int4 slot; Varnode *vni = (Varnode *)0; - const Datatype *ct; if (!data.isTypeRecoveryOn()) return 0; - for(i=0;inumInput();++i) { // Search for pointer type - vni = op->getIn(i); - ct = vni->getType(); - if (ct->getMetatype() == TYPE_PTR) break; + for(slot=0;slotnumInput();++slot) { // Search for pointer type + vni = op->getIn(slot); + if (vni->getType()->getMetatype() == TYPE_PTR) break; } - if (i == op->numInput()) return 0; - if ((i==0)&&(op->getIn(1)->getType()->getMetatype() == TYPE_PTR)) return 0; // Prevent infinite loops - - vn = op->getOut(); - if ((decop=vn->loneDescend()) == (PcodeOp *)0) return 0; - if (decop->code() != CPUI_INT_ADD) return 0; + if (slot == op->numInput()) return 0; - j = decop->getSlot(vn); - if (decop->getIn(1-j)->getType()->getMetatype() == TYPE_PTR) return 0; // Prevent infinite loops + if (RulePtrArith::evaluatePointerExpression(op, slot) != 1) return 0; + Varnode *vn = op->getOut(); + Varnode *vnadd2 = op->getIn(1-slot); + vector duplicateList; + if (vn->loneDescend() == (PcodeOp *)0) + collectDuplicateNeeds(duplicateList, vnadd2); - Varnode *vnadd1 = decop->getIn(1-j); - Varnode *vnadd2 = op->getIn(1-i); - Varnode *newout; + for(;;) { + list::const_iterator iter = vn->beginDescend(); + if (iter == vn->endDescend()) break; + PcodeOp *decop = *iter; + int4 j = decop->getSlot(vn); - // vni and vnadd2 are propagated, so they shouldn't be free - if (vnadd2->isFree() && (!vnadd2->isConstant())) return 0; - if (vni->isFree() && (!vni->isConstant())) return 0; + Varnode *vnadd1 = decop->getIn(1-j); + Varnode *newout; - newop = data.newOp(2,decop->getAddr()); - data.opSetOpcode(newop,CPUI_INT_ADD); - newout = data.newUniqueOut(vnadd1->getSize(),newop); + // Create new INT_ADD for the intermediate result that didn't exist in original code. + // We don't associate it with the address of the original INT_ADD + // We don't preserve the Varnode address of the original INT_ADD + PcodeOp *newop = data.newOp(2,decop->getAddr()); // Use the later address + data.opSetOpcode(newop,CPUI_INT_ADD); + newout = data.newUniqueOut(vnadd1->getSize(),newop); // Use a temporary storage address - data.opSetInput(decop,vni,0); - data.opSetInput(decop,newout,1); + data.opSetInput(decop,vni,0); + data.opSetInput(decop,newout,1); - data.opSetInput(newop,vnadd1,0); - data.opSetInput(newop,vnadd2,1); + data.opSetInput(newop,vnadd1,0); + data.opSetInput(newop,vnadd2,1); - data.opInsertBefore(newop,decop); + data.opInsertBefore(newop,decop); + } + if (!vn->isAutoLive()) + data.opDestroy(op); + for(int4 i=0;i &oplist) const + +{ + oplist.push_back(CPUI_INT_ZEXT); + oplist.push_back(CPUI_INT_SEXT); +} + +int4 RuleExtensionPush::applyOp(PcodeOp *op,Funcdata &data) + +{ + Varnode *inVn = op->getIn(0); + if (inVn->isConstant()) return 0; + if (inVn->isAddrForce()) return 0; + if (inVn->isAddrTied()) return 0; + Varnode *outVn = op->getOut(); + if (outVn->isTypeLock() || outVn->isNameLock()) return 0; + if (outVn->isAddrForce() || outVn->isAddrTied()) return 0; + list::const_iterator iter; + int4 addcount = 0; // Number of INT_ADD descendants + int4 ptrcount = 0; // Number of PTRADD descendants + for(iter=outVn->beginDescend();iter!=outVn->endDescend();++iter) { + PcodeOp *decOp = *iter; + OpCode opc = decOp->code(); + if (opc == CPUI_PTRADD) { + // This extension will likely be hidden + ptrcount += 1; + } + else if (opc == CPUI_INT_ADD) { + PcodeOp *subOp = decOp->getOut()->loneDescend(); + if (subOp == (PcodeOp *)0 || subOp->code() != CPUI_PTRADD) + return 0; + addcount += 1; + } + else { + return 0; + } + } + if ((addcount + ptrcount) <= 1) return 0; + if (addcount > 0) { + if (op->getIn(0)->loneDescend() != (PcodeOp *)0) return 0; + } + RulePushPtr::duplicateNeed(op, data); // Duplicate the extension to all result descendants + return 1; +} + /// \class RuleSubNormal /// \brief Pull-back SUBPIECE through INT_RIGHT and INT_SRIGHT /// diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh index 732689751a..e6f0fb9da2 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh @@ -1027,7 +1027,6 @@ public: virtual int4 applyOp(PcodeOp *op,Funcdata &data); }; class RulePtrArith : public Rule { - static bool verifyAddTreeBottom(PcodeOp *op,int4 slot); static bool verifyPreferredPointer(PcodeOp *op,int4 slot); public: RulePtrArith(const string &g) : Rule(g, 0, "ptrarith") {} ///< Constructor @@ -1037,6 +1036,7 @@ public: } virtual void getOpList(vector &oplist) const; virtual int4 applyOp(PcodeOp *op,Funcdata &data); + static int4 evaluatePointerExpression(PcodeOp *op,int4 slot); }; class RuleStructOffset0 : public Rule { public: @@ -1049,6 +1049,8 @@ public: virtual int4 applyOp(PcodeOp *op,Funcdata &data); }; class RulePushPtr : public Rule { + static Varnode *buildVarnodeOut(Varnode *vn,PcodeOp *op,Funcdata &data); + static void collectDuplicateNeeds(vector &reslist,Varnode *vn); public: RulePushPtr(const string &g) : Rule(g, 0, "pushptr") {} ///< Constructor virtual Rule *clone(const ActionGroupList &grouplist) const { @@ -1057,6 +1059,7 @@ public: } virtual void getOpList(vector &oplist) const; virtual int4 applyOp(PcodeOp *op,Funcdata &data); + static void duplicateNeed(PcodeOp *op,Funcdata &data); }; class RulePtraddUndo : public Rule { public: @@ -1136,6 +1139,17 @@ public: virtual int4 applyOp(PcodeOp *op,Funcdata &data); }; +class RuleExtensionPush : public Rule { +public: + RuleExtensionPush(const string &g) : Rule( g, 0, "extensionpush") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RuleExtensionPush(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + class RuleSubNormal : public Rule { public: RuleSubNormal(const string &g) : Rule( g, 0, "subnormal") {} ///< Constructor diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/dupptr.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/dupptr.xml new file mode 100644 index 0000000000..a2cbcab1e3 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/dupptr.xml @@ -0,0 +1,133 @@ + + + + + 554889e54889 +7de88975e48b45e44898488d14c50000 +0000488b45e84801d0488945f8488b45 +f8488d5008488d057803000048890248 +8b45f8488d5010488d056c0300004889 +02905dc3 + + + 554889e54881 +eca800000089bd5cffffff488d8560ff +ffff4889c7e8aaffffff488d8560ffff +ff8b955cffffff4863d248c1e2024801 +d0488945f8488b45f84883c008488945 +f0488b45f08b00c1f80383e00f89c248 +8b45f0891090c9c3 + + + 554889e54883ec20 +48897de88975e4488b45e84889c7e851 +ffffff8b45e44898488d148500000000 +488b45e84801d0488945f8488b45f848 +83c008488945f0488b45f08b00c1f803 +83e00f89c2488b45f0891090c9c3 + + + 5548 +89e54881ec7805000089bd8cfaffff48 +8d8590faffff4889c7e8f6feffff488d +8590faffff4883c024488945f88b858c +faffff4898488945f0488b55f04889d0 +48c1e0044801d048c1e0034889c2488b +45f84801d0488945e8488b45f04883c0 +014889c24889d048c1e0044801d048c1 +e0034889c2488b45f84801d0488945e0 +488b45e88b008d5025488b45e0891090 +c9c3 + + + 554889e548897dd88975d48b45d4 +4898488d148500000000488b45d84801 +d0488945f8488b45f84883c008488945 +f0488b45f84883c048488945e8488b45 +f0c70007000000488b45e8c7006f0000 +00905dc3 + + + 554889e548897dd88975d48b +45d44863d04889d04801c04801d048c1 +e0024889c2488b45d84801d04883c004 +488945f8488b45f84883e880488945f0 +488b45f84883c008488945e8488b45f0 +c70014000000488b45e8c7001e000000 +905dc3 + + + 554889e548897dd88975d4488b +45d84883c01c488945f8488b45f8488d +50088b45d4489848c1e00a4801d04889 +45f0488b45f8488d50088b45d483c001 +489848c1e00a4801d0488945e8488b45 +e88b008d50fd488b45f08910905dc3 + + + 48656c6c6f00476f6f6462796500 + + + + + + + + + + + + + +argv\[\(int8\)a \+ 1\] = "Hello"; +argv\[\(int8\)a \+ 2\] = "Goodbye"; +aStack168\.arr1\[a\] = aStack168\.arr1\[a\] >> 3 & 0xf; +ptr->arr1\[a\] = ptr->arr1\[a\] >> 3 & 0xf; +aaStack1400\[\(int8\)a \+ 1\]\.arr1\[7\] = aaStack1400\[a\]\.arr1\[7\] \+ 0x25; +ptr->arr1\[a\] = 7; +ptr->arr2\[a\] = 0x6f; +ptr->arr1\[a\]\.d = 0x1e; +ptr->arr2\[a\]\.d = 0x14; +ptr->arr\[a\]\[7\] = ptr->arr\[a \+ 1\]\[7\] \+ -3; +