diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc index 46da66dfa2..f536f78e12 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc @@ -2345,7 +2345,7 @@ int4 BlockBasic::flipInPlaceTest(vector &fliplist) const PcodeOp *lastop = op.back(); if (lastop->code() != CPUI_CBRANCH) return 2; - return opFlipInPlaceTest(lastop,fliplist); + return Funcdata::opFlipInPlaceTest(lastop,fliplist); } void BlockBasic::flipInPlaceExecute(void) @@ -2726,6 +2726,29 @@ PcodeOp *BlockBasic::findMultiequal(const vector &varArray) return op; } +/// \brief Get the earliest use/read of a Varnode in \b this basic block +/// +/// \param vn is the Varnode to search for +/// \return the earliest PcodeOp reading the Varnode or NULL +PcodeOp *BlockBasic::earliestUse(Varnode *vn) + +{ + list::const_iterator iter; + PcodeOp *res = (PcodeOp *)0; + + for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { + PcodeOp *op = *iter; + if (op->getParent() != this) continue; + if (res == (PcodeOp *)0) + res = op; + else { + if (op->getSeqNum().getOrder() < res->getSeqNum().getOrder()) + res = op; + } + } + return res; +} + /// Each Varnode must be defined by a PcodeOp with the same OpCode. The Varnode, within the array, is replaced /// with the input Varnode in the indicated slot. /// \param varArray is the given array of Varnodes @@ -3035,7 +3058,7 @@ bool BlockIf::preferComplement(Funcdata &data) if (0 != split->flipInPlaceTest(fliplist)) return false; split->flipInPlaceExecute(); - opFlipInPlaceExecute(data,fliplist); + data.opFlipInPlaceExecute(fliplist); swapBlocks(1,2); return true; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh index d43b07ed3d..1b3146ed2b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh @@ -492,6 +492,7 @@ public: bool emptyOp(void) const { return op.empty(); } ///< Return \b true if \b block contains no operations bool noInterveningStatement(void) const; PcodeOp *findMultiequal(const vector &varArray); ///< Find MULTIEQUAL with given inputs + PcodeOp *earliestUse(Varnode *vn); static bool liftVerifyUnroll(vector &varArray,int4 slot); ///< Verify given Varnodes are defined with same PcodeOp }; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc index 71d7d5c41a..56fcd00b48 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -2135,9 +2135,9 @@ int4 ActionNormalizeBranches::apply(Funcdata &data) if (cbranch == (PcodeOp *)0) continue; if (cbranch->code() != CPUI_CBRANCH) continue; fliplist.clear(); - if (opFlipInPlaceTest(cbranch,fliplist) != 0) + if (Funcdata::opFlipInPlaceTest(cbranch,fliplist) != 0) continue; - opFlipInPlaceExecute(data,fliplist); + data.opFlipInPlaceExecute(fliplist); bb->flipInPlaceExecute(); count += 1; // Indicate a change was made } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc index 67b5fdbb80..dccf15f3f3 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc @@ -459,7 +459,7 @@ bool StringSequence::transform(void) return true; } -/// From a starting pointer, backtrack through PTRADDs to a putative root Varnode pointer. +/// From a starting pointer, backtrack through PTRADDs and COPYs to a putative root Varnode pointer. /// \param initPtr is pointer Varnode into the root STORE void HeapSequence::findBasePointer(Varnode *initPtr) @@ -467,22 +467,84 @@ void HeapSequence::findBasePointer(Varnode *initPtr) basePointer = initPtr; while(basePointer->isWritten()) { PcodeOp *op = basePointer->getDef(); - if (op->code() != CPUI_PTRADD) break; - int8 sz = op->getIn(2)->getOffset(); - if (sz != charType->getAlignSize()) break; + OpCode opc = op->code(); + if (opc == CPUI_PTRADD) { + int8 sz = op->getIn(2)->getOffset(); + if (sz != charType->getAlignSize()) break; + } + else if (opc != CPUI_COPY) + break; basePointer = op->getIn(0); } } +/// Back-track from \b basePointer through PTRSUBs, PTRADDs, and INT_ADDs to an earlier root, keeping track +/// of any offsets. If an earlier root exists, trace forward, through ops trying to match the offsets. +/// For trace of ops whose offsets match exactly, the resulting Varnode is added to the list of duplicates. +/// \param duplist will hold the list of duplicate Varnodes (including \b basePointer) +void HeapSequence::findDuplicateBases(vector &duplist) + +{ + if (!basePointer->isWritten()) { + duplist.push_back(basePointer); + return; + } + PcodeOp *op = basePointer->getDef(); + OpCode opc = op->code(); + if ((opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD) || !op->getIn(1)->isConstant()) { + duplist.push_back(basePointer); + return; + } + Varnode *copyRoot = basePointer; + vector offset; + do { + uintb off = op->getIn(1)->getOffset(); + if (opc == CPUI_PTRADD) + off *= op->getIn(2)->getOffset(); + offset.push_back(off); + copyRoot = op->getIn(0); + if (!copyRoot->isWritten()) break; + op = copyRoot->getDef(); + opc = op->code(); + if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB) + break; + } while(op->getIn(1)->isConstant()); + + duplist.push_back(copyRoot); + vector midlist; + for(int4 i=offset.size()-1;i>=0;--i) { + duplist.swap(midlist); + duplist.clear(); + for(int4 j=0;j::const_iterator iter = vn->beginDescend(); + while(iter != vn->endDescend()) { + op = *iter; + ++iter; + opc = op->code(); + if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB) + continue; + if (op->getIn(0) != vn || !op->getIn(1)->isConstant()) + continue; + uintb off = op->getIn(1)->getOffset(); + if (opc == CPUI_PTRADD) + off *= op->getIn(2)->getOffset(); + if (off != offset[i]) + continue; + duplist.push_back(op->getOut()); + } + } + } +} + /// Find STOREs with pointers derived from the \b basePointer and that are in the same /// basic block as the root STORE. The root STORE is \e not included in the resulting set. /// \param stores holds the collected STOREs void HeapSequence::findInitialStores(vector &stores) { - Datatype *ptrType = rootOp->getIn(1)->getTypeReadFacing(rootOp); vector ptradds; - ptradds.push_back(basePointer); + findDuplicateBases(ptradds); int4 pos = 0; int4 alignSize = charType->getAlignSize(); while(pos < ptradds.size()) { @@ -494,10 +556,14 @@ void HeapSequence::findInitialStores(vector &stores) OpCode opc = op->code(); if (opc == CPUI_PTRADD) { if (op->getIn(0) != vn) continue; - if (op->getOut()->getTypeDefFacing() != ptrType) continue; + // We only check array element size here, if we checked the data-type, we would + // need to take into account different pointer styles to the same element data-type if (op->getIn(2)->getOffset() != alignSize) continue; ptradds.push_back(op->getOut()); } + else if (opc == CPUI_COPY) { + ptradds.push_back(op->getOut()); + } else if (opc == CPUI_STORE && op->getParent() == block && op != rootOp) { if (op->getIn(1) != vn) continue; stores.push_back(op); @@ -530,7 +596,7 @@ uint8 HeapSequence::calcAddElements(Varnode *vn,vector &nonConst,int4 /// \brief Calculate the offset and any non-constant additive elements between the given Varnode and the \b basePointer /// -/// Walk backward from the given Varnode thru PTRADDs and ADDs, summing any offsets encountered. +/// Walk backward from the given Varnode thru PTRADDs and COPYs, summing any offsets encountered. /// Any non-constant Varnodes encountered in the path, that are not themselves a pointer, are passed back in a list. /// \param vn is the given Varnode to trace back to the \b basePointer /// \param nonConst will hold the list of non-constant Varnodes being passed back @@ -539,12 +605,23 @@ uint8 HeapSequence::calcPtraddOffset(Varnode *vn,vector &nonConst) { uint8 res = 0; - while(vn != basePointer) { - PcodeOp *ptradd = vn->getDef(); - uint8 off = calcAddElements(ptradd->getIn(1),nonConst,3); - off *= (uint8)ptradd->getIn(2)->getOffset(); - res += off; - vn = ptradd->getIn(0); + while(vn->isWritten()) { + PcodeOp *op = vn->getDef(); + OpCode opc = op->code(); + if (opc == CPUI_PTRADD) { + uint8 mult = op->getIn(2)->getOffset(); + if (mult != charType->getAlignSize()) + break; + uint8 off = calcAddElements(op->getIn(1),nonConst,3); + off *= mult; + res += off; + vn = op->getIn(0); + } + else if (opc == CPUI_COPY) { + vn = op->getIn(0); + } + else + break; } return res; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh index cd77ce2be2..8c17117ddf 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.hh @@ -89,6 +89,7 @@ class HeapSequence : public ArraySequence { uint8 baseOffset; ///< Offset relative to pointer to root STORE vector nonConstAdds; ///< non-constant Varnodes being added into pointer calculation void findBasePointer(Varnode *initPtr); ///< Find the base pointer for the sequence + void findDuplicateBases(vector &duplist); ///< Find any duplicates of \b basePointer void findInitialStores(vector &stores); static uint8 calcAddElements(Varnode *vn,vector &nonConst,int4 maxDepth); uint8 calcPtraddOffset(Varnode *vn,vector &nonConst); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index 4ff51cf49a..768a9117a0 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -488,6 +488,8 @@ public: Varnode *opStackLoad(AddrSpace *spc,uintb off,uint4 sz,PcodeOp *op,Varnode *stackptr,bool insertafter); PcodeOp *opStackStore(AddrSpace *spc,uintb off,PcodeOp *op,bool insertafter); void opUndoPtradd(PcodeOp *op,bool finalize); ///< Convert a CPUI_PTRADD back into a CPUI_INT_ADD + static int4 opFlipInPlaceTest(PcodeOp *op,vector &fliplist); + void opFlipInPlaceExecute(vector &fliplist); /// \brief Start of PcodeOp objects with the given op-code list::const_iterator beginOp(OpCode opc) const { return obank.begin(opc); } @@ -563,6 +565,11 @@ public: bool replaceLessequal(PcodeOp *op); ///< Replace INT_LESSEQUAL and INT_SLESSEQUAL expressions bool distributeIntMultAdd(PcodeOp *op); ///< Distribute constant coefficient to additive input bool collapseIntMultMult(Varnode *vn); ///< Collapse constant coefficients for two chained CPUI_INT_MULT + Varnode *buildCopyTemp(Varnode *vn,PcodeOp *point); ///< Create a COPY of given Varnode in a temporary register + + static PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest); + PcodeOp *cseElimination(PcodeOp *op1,PcodeOp *op2); + void cseEliminateList(vector< pair > &list,vector &outlist); static bool compareCallspecs(const FuncCallSpecs *a,const FuncCallSpecs *b); #ifdef OPACTION_DEBUG @@ -688,14 +695,5 @@ public: bool execute(PcodeOp *op,int4 slot,ParamTrial *t,bool allowFail); }; -extern int4 opFlipInPlaceTest(PcodeOp *op,vector &fliplist); -extern void opFlipInPlaceExecute(Funcdata &data,vector &fliplist); - -extern PcodeOp *earliestUseInBlock(Varnode *vn,BlockBasic *bl); -extern PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest); -extern PcodeOp *cseElimination(Funcdata &data,PcodeOp *op1,PcodeOp *op2); -extern void cseEliminateList(Funcdata &data,vector< pair > &list, - vector &outlist); - } // End namespace ghidra #endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc index 56400b2b8a..c2dc6be72b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -1100,6 +1100,66 @@ bool Funcdata::collapseIntMultMult(Varnode *vn) return true; } +/// Return a Varnode in the \e unique space that is defined by a COPY op taking the given Varnode as input. +/// If a COPY op to a \e unique already exists, it may be returned. If the preexisting COPY is not usable +/// at the specified \b point, it is redefined at an earlier point in the control-flow so that it can be used. +/// \param vn is the given Varnode to COPY +/// \param point is the PcodeOp where the copy needs to be available +/// \return the \e unique Varnode COPY +Varnode *Funcdata::buildCopyTemp(Varnode *vn,PcodeOp *point) + +{ + PcodeOp *otherOp = (PcodeOp *)0; + PcodeOp *usedCopy = (PcodeOp *)0; + list::const_iterator iter; + for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { + PcodeOp *op = *iter; + if (op->code() != CPUI_COPY) continue; + Varnode *outvn = op->getOut(); + if (outvn->getSpace()->getType() == IPTR_INTERNAL) { + if (outvn->isTypeLock()) + continue; + otherOp = op; + break; + } + } + if (otherOp != (PcodeOp *)0) { + if (point->getParent() == otherOp->getParent()) { + if (point->getSeqNum().getOrder() < otherOp->getSeqNum().getOrder()) + usedCopy = (PcodeOp *)0; + else + usedCopy = otherOp; + } + else { + BlockBasic *common; + common = (BlockBasic *)FlowBlock::findCommonBlock(point->getParent(),otherOp->getParent()); + if (common == point->getParent()) + usedCopy = (PcodeOp *)0; + else if (common == otherOp->getParent()) + usedCopy = otherOp; + else { // Neither op is ancestor of the other + usedCopy = newOp(1,common->getStop()); + opSetOpcode(usedCopy,CPUI_COPY); + newUniqueOut(vn->getSize(),usedCopy); + opSetInput(usedCopy,vn,0); + opInsertEnd(usedCopy,common); + } + } + } + if (usedCopy == (PcodeOp *)0) { + usedCopy = newOp(1,point->getAddr()); + opSetOpcode(usedCopy, CPUI_COPY); + newUniqueOut(vn->getSize(), usedCopy); + opSetInput(usedCopy, vn, 0); + opInsertBefore(usedCopy, point); + } + if (otherOp != (PcodeOp *)0 && otherOp != usedCopy) { + totalReplace(otherOp->getOut(),usedCopy->getOut()); + opDestroy(otherOp); + } + return usedCopy->getOut(); +} + /// \brief Trace a boolean value to a set of PcodeOps that can be changed to flip the boolean value /// /// The boolean Varnode is either the output of the given PcodeOp or the @@ -1108,7 +1168,7 @@ bool Funcdata::collapseIntMultMult(Varnode *vn) /// \param op is the given PcodeOp /// \param fliplist is the array that will hold the ops to flip /// \return 0 if the change normalizes, 1 if the change is ambivalent, 2 if the change does not normalize -int4 opFlipInPlaceTest(PcodeOp *op,vector &fliplist) +int4 Funcdata::opFlipInPlaceTest(PcodeOp *op,vector &fliplist) { Varnode *vn; @@ -1168,7 +1228,7 @@ int4 opFlipInPlaceTest(PcodeOp *op,vector &fliplist) /// facilitate the flip. /// \param data is the function being modified /// \param fliplist is the list of PcodeOps to modify -void opFlipInPlaceExecute(Funcdata &data,vector &fliplist) +void Funcdata::opFlipInPlaceExecute(vector &fliplist) { Varnode *vn; @@ -1180,53 +1240,29 @@ void opFlipInPlaceExecute(Funcdata &data,vector &fliplist) vn = op->getIn(0); PcodeOp *otherop = op->getOut()->loneDescend(); // Must be a lone descendant int4 slot = otherop->getSlot(op->getOut()); - data.opSetInput(otherop,vn,slot); // Propagate -vn- into otherop - data.opDestroy(op); + opSetInput(otherop,vn,slot); // Propagate -vn- into otherop + opDestroy(op); } else if (opc == CPUI_MAX) { if (op->code() == CPUI_BOOL_AND) - data.opSetOpcode(op,CPUI_BOOL_OR); + opSetOpcode(op,CPUI_BOOL_OR); else if (op->code() == CPUI_BOOL_OR) - data.opSetOpcode(op,CPUI_BOOL_AND); + opSetOpcode(op,CPUI_BOOL_AND); else throw LowlevelError("Bad flipInPlace op"); } else { - data.opSetOpcode(op,opc); + opSetOpcode(op,opc); if (flipyes) { - data.opSwapInput(op,0,1); + opSwapInput(op,0,1); if ((opc == CPUI_INT_LESSEQUAL)||(opc == CPUI_INT_SLESSEQUAL)) - data.replaceLessequal(op); + replaceLessequal(op); } } } } -/// \brief Get the earliest use/read of a Varnode in a specified basic block -/// -/// \param vn is the Varnode to search for -/// \param bl is the specified basic block in which to search -/// \return the earliest PcodeOp reading the Varnode or NULL -PcodeOp *earliestUseInBlock(Varnode *vn,BlockBasic *bl) - -{ - list::const_iterator iter; - PcodeOp *res = (PcodeOp *)0; - - for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { - PcodeOp *op = *iter; - if (op->getParent() != bl) continue; - if (res == (PcodeOp *)0) - res = op; - else { - if (op->getSeqNum().getOrder() < res->getSeqNum().getOrder()) - res = op; - } - } - return res; -} - /// \brief Find a duplicate calculation of a given PcodeOp reading a specific Varnode /// /// We only match 1 level of calculation. Additionally the duplicate must occur in the @@ -1236,7 +1272,7 @@ PcodeOp *earliestUseInBlock(Varnode *vn,BlockBasic *bl) /// \param bl is the indicated basic block /// \param earliest is the specified op to be earlier than /// \return the discovered duplicate PcodeOp or NULL -PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest) +PcodeOp *Funcdata::cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest) { list::const_iterator iter; @@ -1265,11 +1301,10 @@ PcodeOp *cseFindInBlock(PcodeOp *op,Varnode *vn,BlockBasic *bl,PcodeOp *earliest /// (depth 1 functional equivalence) eliminate the redundancy. Return the remaining (dominating) /// PcodeOp. If neither op dominates the other, both are eliminated, and a new PcodeOp /// is built at a commonly accessible point. -/// \param data is the function being modified /// \param op1 is the first of the given PcodeOps /// \param op2 is the second given PcodeOp /// \return the dominating PcodeOp -PcodeOp *cseElimination(Funcdata &data,PcodeOp *op1,PcodeOp *op2) +PcodeOp *Funcdata::cseElimination(PcodeOp *op1,PcodeOp *op2) { PcodeOp *replace; @@ -1288,25 +1323,25 @@ PcodeOp *cseElimination(Funcdata &data,PcodeOp *op1,PcodeOp *op2) else if (common == op2->getParent()) replace = op2; else { // Neither op is ancestor of the other - replace = data.newOp(op1->numInput(),common->getStop()); - data.opSetOpcode(replace,op1->code()); - data.newVarnodeOut(op1->getOut()->getSize(),op1->getOut()->getAddr(),replace); + replace = newOp(op1->numInput(),common->getStop()); + opSetOpcode(replace,op1->code()); + newVarnodeOut(op1->getOut()->getSize(),op1->getOut()->getAddr(),replace); for(int4 i=0;inumInput();++i) { if (op1->getIn(i)->isConstant()) - data.opSetInput(replace,data.newConstant(op1->getIn(i)->getSize(),op1->getIn(i)->getOffset()),i); + opSetInput(replace,newConstant(op1->getIn(i)->getSize(),op1->getIn(i)->getOffset()),i); else - data.opSetInput(replace,op1->getIn(i),i); + opSetInput(replace,op1->getIn(i),i); } - data.opInsertEnd(replace,common); + opInsertEnd(replace,common); } } if (replace != op1) { - data.totalReplace(op1->getOut(),replace->getOut()); - data.opDestroy(op1); + totalReplace(op1->getOut(),replace->getOut()); + opDestroy(op1); } if (replace != op2) { - data.totalReplace(op2->getOut(),replace->getOut()); - data.opDestroy(op2); + totalReplace(op2->getOut(),replace->getOut()); + opDestroy(op2); } return replace; } @@ -1329,10 +1364,9 @@ static bool compareCseHash(const pair &a,const pair > &list,vector &outlist) +void Funcdata::cseEliminateList(vector< pair > &list,vector &outlist) { PcodeOp *op1,*op2,*resop; @@ -1350,9 +1384,9 @@ void cseEliminateList(Funcdata &data,vector< pair > &list,vecto if ((!op1->isDead())&&(!op2->isDead())&&op1->isCseMatch(op2)) { Varnode *outvn1 = op1->getOut(); Varnode *outvn2 = op2->getOut(); - if ((outvn1 == (Varnode *)0)||data.isHeritaged(outvn1)) { - if ((outvn2 == (Varnode *)0)||data.isHeritaged(outvn2)) { - resop = cseElimination(data,op1,op2); + if ((outvn1 == (Varnode *)0)||isHeritaged(outvn1)) { + if ((outvn2 == (Varnode *)0)||isHeritaged(outvn2)) { + resop = cseElimination(op1,op2); outlist.push_back(resop->getOut()); } } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index bfca3d6ee5..f61254494e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -204,7 +204,7 @@ int4 RuleSelectCse::applyOp(PcodeOp *op,Funcdata &data) list.push_back(pair(hash,otherop)); } if (list.size()<=1) return 0; - cseEliminateList(data,list,vlist); + data.cseEliminateList(list,vlist); if (vlist.empty()) return 0; return 1; } @@ -1048,7 +1048,7 @@ PcodeOp *RulePushMulti::findSubstitute(Varnode *in1,Varnode *in2,BlockBasic *bb, Varnode *vn = op1->getIn(i); if (vn->isConstant()) continue; if (vn == op2->getIn(i)) // Find matching inputs to op1 and op2, - return cseFindInBlock(op1,vn,bb,earliest); // search for cse of op1 in bb + return Funcdata::cseFindInBlock(op1,vn,bb,earliest); // search for cse of op1 in bb } return (PcodeOp *)0; @@ -1087,7 +1087,7 @@ int4 RulePushMulti::applyOp(PcodeOp *op,Funcdata &data) if (op1->code() == CPUI_SUBPIECE) return 0; // SUBPIECE is pulled not pushed BlockBasic *bl = op->getParent(); - PcodeOp *earliest = earliestUseInBlock(op->getOut(),bl); + PcodeOp *earliest = bl->earliestUse(op->getOut()); if (op1->code() == CPUI_COPY) { // Special case of MERGE of 2 shadowing varnodes if (res==0) return 0; PcodeOp *substitute = findSubstitute(buf1[0],buf2[0],bl,earliest); @@ -3036,13 +3036,13 @@ int4 RuleMultiCollapse::applyOp(PcodeOp *op,Funcdata &data) copyr->clearMark(); op = copyr->getDef(); if (func_eq) { // We have only functional equality - PcodeOp *earliest = earliestUseInBlock(op->getOut(),op->getParent()); + PcodeOp *earliest = op->getParent()->earliestUse(op->getOut()); newop = defcopyr->getDef(); // We must copy newop (defcopyr) PcodeOp *substitute = (PcodeOp *)0; for(int4 i=0;inumInput();++i) { Varnode *invn = newop->getIn(i); if (!invn->isConstant()) { - substitute = cseFindInBlock(newop,invn,op->getParent(),earliest); // Has newop already been copied in this block + substitute = Funcdata::cseFindInBlock(newop,invn,op->getParent(),earliest); // Has newop already been copied in this block break; } } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc index bcdccecfca..cafadf26f8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc @@ -1797,21 +1797,33 @@ bool SplitFlow::doTrace(void) return true; } -/// If \b pointer Varnode is written by an INT_ADD, PTRSUB, or PTRADD from a another pointer -/// to a structure or array, update \b pointer Varnode, \b baseOffset, and \b ptrType to this. +/// If \b pointer Varnode is written by a COPY, INT_ADD, PTRSUB, or PTRADD from another pointer to a +/// - structure +/// - array OR +/// - to an implied array with the given base type +/// +/// then update \b pointer Varnode, \b baseOffset, and \b ptrType to this. +/// \param impliedBase if non-null is the allowed element data-type for an implied array /// \return \b true if \b pointer was successfully updated bool SplitDatatype::RootPointer::backUpPointer(Datatype *impliedBase) { if (!pointer->isWritten()) return false; + int4 off; PcodeOp *addOp = pointer->getDef(); OpCode opc = addOp->code(); - if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD) - return false; - Varnode *cvn = addOp->getIn(1); - if (!cvn->isConstant()) + if (opc == CPUI_PTRSUB || opc == CPUI_INT_ADD || opc == CPUI_PTRADD) { + Varnode *cvn = addOp->getIn(1); + if (!cvn->isConstant()) + return false; + off = (int4)cvn->getOffset(); + } + else if (opc == CPUI_COPY) + off = 0; + else { return false; + } Varnode *tmpPointer = addOp->getIn(0); Datatype *ct = tmpPointer->getTypeReadFacing(addOp); if (ct->getMetatype() != TYPE_PTR) @@ -1819,11 +1831,10 @@ bool SplitDatatype::RootPointer::backUpPointer(Datatype *impliedBase) Datatype *parent = ((TypePointer *)ct)->getPtrTo(); type_metatype meta = parent->getMetatype(); if (meta != TYPE_STRUCT && meta != TYPE_ARRAY) { - if (opc != CPUI_PTRADD || parent != impliedBase) + if ((opc != CPUI_PTRADD && opc != CPUI_COPY) || parent != impliedBase) return false; } ptrType = (TypePointer *)ct; - int4 off = (int4)cvn->getOffset(); if (opc == CPUI_PTRADD) off *= (int4)addOp->getIn(2)->getOffset(); off = AddrSpace::addressToByteInt(off, ptrType->getWordSize()); @@ -1832,10 +1843,11 @@ bool SplitDatatype::RootPointer::backUpPointer(Datatype *impliedBase) return true; } -/// The LOAD or STORE pointer Varnode is examined. If it is a pointer to the given data-type, the -/// root \b pointer is returned. If not, we try to recursively walk back through either PTRSUB or INT_ADD instructions, -/// until a pointer Varnode matching the data-type is found. Any accumulated offset, relative to the original -/// LOAD or STORE pointer is recorded in the \b baseOffset. If a matching pointer is not found, \b false is returned. +/// We search for a pointer to the specified data-type starting with the LOAD/STORE. If we don't immediately +/// find it, we back up one level (through a PTRSUB, PTRADD, or INT_ADD). If it isn't found after 1 hop, +/// \b false is returned. Once this pointer is found, we back up through any single path of nested TYPE_STRUCT +/// and TYPE_ARRAY offsets to establish the final root \b pointer, and \b true is returned. Any accumulated offset, +/// relative to the original LOAD or STORE pointer is recorded in the \b baseOffset. /// \param op is the LOAD or STORE /// \param valueType is the specific data-type to match /// \return \b true if the root pointer is found @@ -1843,11 +1855,11 @@ bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType) { Datatype *impliedBase = (Datatype *)0; - if (valueType->getMetatype() == TYPE_PARTIALSTRUCT) + if (valueType->getMetatype() == TYPE_PARTIALSTRUCT) // Strip off partial to get containing struct or array valueType = ((TypePartialStruct *)valueType)->getParent(); - else if (valueType->getMetatype() == TYPE_ARRAY) { + if (valueType->getMetatype() == TYPE_ARRAY) { // If the data-type is an array valueType = ((TypeArray *)valueType)->getBase(); - impliedBase = valueType; + impliedBase = valueType; // we allow an implied array (pointer to element) as a match } loadStore = op; baseOffset = 0; @@ -1864,6 +1876,7 @@ bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType) if (ptrType->getPtrTo() != valueType) return false; } + // The required pointer is found. We try to back up to pointers to containing structures or arrays for(int4 i=0;i<3;++i) { if (pointer->isAddrTied() || pointer->loneDescend() == (PcodeOp *)0) break; if (!backUpPointer(impliedBase)) @@ -1872,6 +1885,19 @@ bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType) return true; } +/// Add a COPY op from the \b pointer Varnode to temporary register and make it the new root \b pointer. +/// This guarantees that the \b pointer Varnode will not be modified by subsequent STOREs and +/// can be implicit in the expressions. +/// \param data is the containing function +/// \param followOp is the point where the COPY should be inserted +void SplitDatatype::RootPointer::duplicateToTemp(Funcdata &data,PcodeOp *followOp) + +{ + Varnode *newRoot = data.buildCopyTemp(pointer, followOp); + newRoot->updateType(ptrType, false, false); + pointer = newRoot; +} + /// If the pointer Varnode is no longer used, recursively check and remove the op producing it, /// which will be either an INT_ADD or PTRSUB, until the root \b pointer is reached or /// a Varnode still being used is encountered. @@ -1920,8 +1946,9 @@ Datatype *SplitDatatype::getComponent(Datatype *ct,int4 offset,bool &isHole) /// For the given data-type, taking into account configuration options, return: /// - -1 for not splittable -/// - 0 for data-type that needs to be split -/// - 1 for data-type that can be split multiple ways +/// - 0 for struct based data-type that needs to be split +/// - 1 for array based data-type that needs to be split +/// - 2 for primitive data-type that can be split multiple ways /// \param ct is the given data-type /// \return the categorization int4 SplitDatatype::categorizeDatatype(Datatype *ct) @@ -1933,18 +1960,18 @@ int4 SplitDatatype::categorizeDatatype(Datatype *ct) if (!splitArrays) break; subType = ((TypeArray *)ct)->getBase(); if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1) - return 0; + return 1; else - return 1; // unknown1 array does not need splitting and acts as (large) primitive + return 2; // unknown1 array does not need splitting and acts as (large) primitive case TYPE_PARTIALSTRUCT: subType = ((TypePartialStruct *)ct)->getParent(); if (subType->getMetatype() == TYPE_ARRAY) { if (!splitArrays) break; subType = ((TypeArray *)subType)->getBase(); if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1) - return 0; + return 1; else - return 1; // unknown1 array does not need splitting and acts as (large) primitive + return 2; // unknown1 array does not need splitting and acts as (large) primitive } else if (subType->getMetatype() == TYPE_STRUCT) { if (!splitStructures) break; @@ -1959,7 +1986,7 @@ int4 SplitDatatype::categorizeDatatype(Datatype *ct) case TYPE_INT: case TYPE_UINT: case TYPE_UNKNOWN: - return 1; + return 2; default: break; } @@ -1985,22 +2012,21 @@ bool SplitDatatype::testDatatypeCompatibility(Datatype *inBase,Datatype *outBase int4 outCategory = categorizeDatatype(outBase); if (outCategory < 0) return false; - if (outCategory != 0 && inCategory != 0) + if (outCategory == 2 && inCategory == 2) return false; if (!inConstant && inBase == outBase && inBase->getMetatype() == TYPE_STRUCT) return false; // Don't split a whole structure unless it is getting initialized from a constant - if (isLoadStore && outCategory == 1 && inBase->getMetatype() == TYPE_ARRAY) + if (isLoadStore && outCategory == 2 && inCategory == 1) return false; // Don't split array pointer writing into primitive - if (isLoadStore && inCategory == 1 && !inConstant && outBase->getMetatype() == TYPE_ARRAY) + if (isLoadStore && inCategory == 2 && !inConstant && outCategory == 1) return false; // Don't split primitive into an array pointer, TODO: We could check if primitive is defined by PIECE - if (isLoadStore && inCategory == 0 && outCategory == 0 && !inConstant && - inBase->getMetatype() == TYPE_ARRAY && outBase->getMetatype() == TYPE_ARRAY) + if (isLoadStore && inCategory == 1 && outCategory == 1 && !inConstant) return false; // Don't split copies between arrays bool inHole; bool outHole; int4 curOff = 0; int4 sizeLeft = inBase->getSize(); - if (inCategory == 1) { + if (inCategory == 2) { // If input is primitive while(sizeLeft > 0) { Datatype *curOut = getComponent(outBase,curOff,outHole); if (curOut == (Datatype *)0) return false; @@ -2017,7 +2043,7 @@ bool SplitDatatype::testDatatypeCompatibility(Datatype *inBase,Datatype *outBase } } } - else if (outCategory == 1) { + else if (outCategory == 2) { // If output is primitive while(sizeLeft > 0) { Datatype *curIn = getComponent(inBase,curOff,inHole); if (curIn == (Datatype *)0) return false; @@ -2555,6 +2581,8 @@ bool SplitDatatype::splitStore(PcodeOp *storeOp,Datatype *outType) buildInSubpieces(inVn,storeOp,inVarnodes); vector storePtrs; + if (storeRoot.pointer->isAddrTied()) + storeRoot.duplicateToTemp(data, storeOp); buildPointers(storeRoot.pointer, storeRoot.ptrType, storeRoot.baseOffset, storeOp, storePtrs, false); // Preserve original STORE object, so that INDIRECT references are still valid // but convert it into the first of the smaller STOREs diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.hh index 0f54a624b2..b7650fd851 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.hh @@ -178,6 +178,7 @@ class SplitDatatype { bool backUpPointer(Datatype *impliedBase); ///< Follow flow of \b pointer back thru INT_ADD or PTRSUB public: bool find(PcodeOp *op,Datatype *valueType); ///< Locate root pointer for underlying LOAD or STORE + void duplicateToTemp(Funcdata &data,PcodeOp *followOp); ///< COPY the root varnode into a temp register void freePointerChain(Funcdata &data); ///< Remove unused pointer calculations }; Funcdata &data; ///< The containing function diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index 229d07c0c6..effd94d644 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -2282,6 +2282,19 @@ TypePartialStruct::TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype offset = off; } +/// If the parent is an array, return the element data-type. Otherwise return the \b stripped data-type. +/// \return the array element data-type or the \b stripped data-type. +Datatype *TypePartialStruct::getComponentForPtr(void) const + +{ + if (container->getMetatype() == TYPE_ARRAY) { + Datatype *eltype = ((TypeArray *)container)->getBase(); + if (eltype->getMetatype() != TYPE_UNKNOWN && (offset % eltype->getAlignSize()) == 0) + return eltype; + } + return stripped; +} + void TypePartialStruct::printRaw(ostream &s) const { @@ -3780,21 +3793,6 @@ TypePointer *TypeFactory::getTypePointer(int4 s,Datatype *pt,uint4 ws,const stri return res; } -/// Don't create more than a depth of 1, i.e. ptr->ptr -/// \param s is the size of the pointer -/// \param pt is the pointed-to data-type -/// \param ws is the wordsize associated with the pointer -/// \return the TypePointer object -TypePointer *TypeFactory::getTypePointerNoDepth(int4 s,Datatype *pt,uint4 ws) - -{ - if (pt->getMetatype()==TYPE_PTR) { - // Make sure that at least we return a pointer to something the size of -pt- - pt = getBase(pt->getSize(),TYPE_UNKNOWN); // Pass back unknown * - } - return getTypePointer(s,pt,ws); -} - /// \param as is the number of elements in the desired array /// \param ao is the data-type of the array element /// \return the TypeArray object diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index 0fb1a2891a..31ed62ab0f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -564,6 +564,7 @@ public: TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype *strip); ///< Constructor int4 getOffset(void) const { return offset; } ///< Get the byte offset into the containing data-type Datatype *getParent(void) const { return container; } ///< Get the data-type containing \b this piece + Datatype *getComponentForPtr(void) const; ///< Get (initial) component of array represented by \b this virtual void printRaw(ostream &s) const; virtual Datatype *getSubType(int8 off,int8 *newoff) const; virtual int4 getHoleSize(int4 off) const; @@ -792,7 +793,6 @@ public: TypePointer *getTypePointerStripArray(int4 s,Datatype *pt,uint4 ws); ///< Construct a pointer data-type, stripping an ARRAY level TypePointer *getTypePointer(int4 s,Datatype *pt,uint4 ws); ///< Construct an absolute pointer data-type TypePointer *getTypePointer(int4 s,Datatype *pt,uint4 ws,const string &n); ///< Construct a named pointer data-type - TypePointer *getTypePointerNoDepth(int4 s,Datatype *pt,uint4 ws); ///< Construct a depth limited pointer data-type TypeArray *getTypeArray(int4 as,Datatype *ao); ///< Construct an array data-type TypeStruct *getTypeStruct(const string &n); ///< Create an (empty) structure TypePartialStruct *getTypePartialStruct(Datatype *contain,int4 off,int4 sz); ///< Create a partial structure diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc index b9682339a0..541cb2462d 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc @@ -175,6 +175,27 @@ OpCode TypeOp::floatSignManipulation(PcodeOp *op) return CPUI_MAX; } +/// \brief Propagate a dereferenced data-type up to its pointer data-type +/// +/// Don't create more than a depth of 1, i.e. ptr->ptr +/// \param pt is the pointed-to data-type +/// \param sz is the size of the pointer +/// \param wordsz is the wordsize associated with the pointer +/// \return the TypePointer object +Datatype *TypeOp::propagateToPointer(TypeFactory *t,Datatype *dt,int4 sz,int4 wordsz) + +{ + type_metatype meta = dt->getMetatype(); + if (meta==TYPE_PTR) { + // Make sure that at least we return a pointer to something the size of -pt- + dt = t->getBase(dt->getSize(),TYPE_UNKNOWN); // Pass back unknown * + } + else if (meta == TYPE_PARTIALSTRUCT) { + dt = ((TypePartialStruct *)dt)->getComponentForPtr(); + } + return t->getTypePointer(sz,dt,wordsz); +} + /// \param t is the TypeFactory used to construct data-types /// \param opc is the op-code value the new object will represent /// \param n is the display name that will represent the op-code @@ -440,7 +461,7 @@ Datatype *TypeOpLoad::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn, Datatype *newtype; if (inslot == -1) { // Propagating output to input (value to ptr) AddrSpace *spc = op->getIn(0)->getSpaceFromConst(); - newtype = tlst->getTypePointerNoDepth(outvn->getSize(),alttype,spc->getWordSize()); + newtype = propagateToPointer(tlst,alttype,outvn->getSize(),spc->getWordSize()); } else if (alttype->getMetatype()==TYPE_PTR) { newtype = ((TypePointer *)alttype)->getPtrTo(); @@ -515,7 +536,7 @@ Datatype *TypeOpStore::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn Datatype *newtype; if (inslot==2) { // Propagating value to ptr AddrSpace *spc = op->getIn(0)->getSpaceFromConst(); - newtype = tlst->getTypePointerNoDepth(outvn->getSize(),alttype,spc->getWordSize()); + newtype = propagateToPointer(tlst,alttype,outvn->getSize(),spc->getWordSize()); } else if (alttype->getMetatype()==TYPE_PTR) { newtype = ((TypePointer *)alttype)->getPtrTo(); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh index f0d667feb5..72517448b5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh @@ -180,6 +180,7 @@ public: /// \brief Return the floating-point operation associated with the \e sign bit manipulation by the given PcodeOp static OpCode floatSignManipulation(PcodeOp *op); + static Datatype *propagateToPointer(TypeFactory *t,Datatype *dt,int4 sz,int4 wordsz); }; // Major classes of operations