diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc index 8c193d66fa..3967e6a4d7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc @@ -1301,6 +1301,7 @@ void Architecture::resetDefaultsInternal(void) flowoptions = FlowInfo::error_toomanyinstructions; max_instructions = 100000; infer_pointers = true; + analyze_for_loops = true; readonlypropagate = false; alias_block_level = 2; // Block structs and arrays by default } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.hh index 5e9071b561..2e07c92034 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.hh @@ -128,6 +128,7 @@ public: bool aggressive_ext_trim; ///< Aggressively trim inputs that look like they are sign extended bool readonlypropagate; ///< true if readonly values should be treated as constants bool infer_pointers; ///< True if we should infer pointers from constants that are likely addresses + bool analyze_for_loops; ///< True if we should attempt conversion of \e whiledo loops to \e for loops vector inferPtrSpaces; ///< Set of address spaces in which a pointer constant is inferable int4 funcptr_align; ///< How many bits of alignment a function ptr has uint4 flowoptions; ///< options passed to flow following engine diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc index 9b6d8d87e5..2fcb490e10 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/block.cc @@ -1254,7 +1254,16 @@ FlowBlock *BlockGraph::nextFlowAfter(const FlowBlock *bl) const return nextbl; } -void BlockGraph::finalizePrinting(const Funcdata &data) const +void BlockGraph::finalTransform(Funcdata &data) + +{ + // Recurse into all the substructures + vector::const_iterator iter; + for(iter=list.begin();iter!=list.end();++iter) + (*iter)->finalTransform(data); +} + +void BlockGraph::finalizePrinting(Funcdata &data) const { // Recurse into all the substructures @@ -2916,6 +2925,158 @@ void BlockIf::saveXmlBody(ostream &s) const } } +/// Try to find a Varnode that represents the controlling \e loop \e variable for \b this loop. +/// The Varnode must be: +/// - tested by the exit condition +/// - have a MULTIEQUAL in the head block +/// - have a modification coming in from the tail block +/// - the modification must be the last op or moveable to the last op +/// +/// If the loop variable is found, this routine sets the \e iterateOp and the \e loopDef. +/// \param cbranch is the CBRANCH implementing the loop exit +/// \param head is the head basic-block of the loop +/// \param tail is the tail basic-block of the loop +/// \param lastOp is the precomputed last PcodeOp of tail that isn't a BRANCH +void BlockWhileDo::findLoopVariable(PcodeOp *cbranch,BlockBasic *head,BlockBasic *tail,PcodeOp *lastOp) + +{ + Varnode *vn = cbranch->getIn(1); + if (!vn->isWritten()) return; // No loop variable found + PcodeOp *op = vn->getDef(); + int4 slot = tail->getOutRevIndex(0); + + PcodeOpNode path[4]; + int4 count = 0; + if (op->isCall() || op->isMarker()) { + return; + } + path[0].op = op; + path[0].slot = 0; + while(count>=0) { + PcodeOp *curOp = path[count].op; + int4 ind = path[count].slot++; + if (ind >= curOp->numInput()) { + count -= 1; + continue; + } + Varnode *nextVn = curOp->getIn(ind); + if (!nextVn->isWritten()) continue; + PcodeOp *defOp = nextVn->getDef(); + if (defOp->code() == CPUI_MULTIEQUAL) { + if (defOp->getParent() != head) continue; + Varnode *itvn = defOp->getIn(slot); + if (!itvn->isWritten()) continue; + PcodeOp *possibleIterate = itvn->getDef(); + if (possibleIterate->getParent() == tail) { // Found proper head/tail configuration + if (possibleIterate->isMarker()) + continue; // No iteration in tail + if (!possibleIterate->isMoveable(lastOp)) + continue; // Not the final statement + loopDef = defOp; + iterateOp = possibleIterate; + return; // Found the loop variable + } + } + else { + if (count == 3) continue; + if (defOp->isCall() || defOp->isMarker()) continue; + count += 1; + path[count].op = defOp; + path[count].slot = 0; + } + } + return; // No loop variable found +} + +/// Given a control flow loop, try to find a putative initializer PcodeOp for the loop variable. +/// The initializer must be read by read by \e loopDef and by in a block that +/// flows only into the loop. If an initializer is found, then +/// \e initializeOp is set and the lastOp (not including a branch) in the initializer +/// block is returned. Otherwise null is returned. +/// \param head is the head block of the loop +/// \param slot is the block input coming from the loop tail +/// \return the last PcodeOp in the initializer's block +PcodeOp *BlockWhileDo::findInitializer(BlockBasic *head,int4 slot) const + +{ + if (head->sizeIn() != 2) return (PcodeOp *)0; + slot = 1 - slot; + Varnode *initVn = loopDef->getIn(slot); + if (!initVn->isWritten()) return (PcodeOp *)0; + PcodeOp *res = initVn->getDef(); + if (res->isMarker()) return (PcodeOp *)0; + FlowBlock *initialBlock = res->getParent(); + if (initialBlock != head->getIn(slot)) + return (PcodeOp *)0; // Statement must terminate in block flowing to head + PcodeOp *lastOp = initialBlock->lastOp(); + if (lastOp == (PcodeOp *)0) return (PcodeOp *)0; + if (initialBlock->sizeOut() != 1) return (PcodeOp *)0; // Initializer block must flow only to for loop + if (lastOp->isBranch()) { + lastOp = lastOp->previousOp(); + if (lastOp == (PcodeOp *)0) return (PcodeOp *)0; + } + initializeOp = res; + return lastOp; +} + +/// For-loop initializer or iterator statements must be the final statement in +/// their respective basic block. This method tests that iterateOp/initializeOp (specified +/// by \e slot) is the root of or can be turned into the root of a terminal statement. +/// The root output must be an explicit variable being read by the +/// \e loopDef MULTIEQUAL at the top of the loop. If the root is not the last +/// PcodeOp in the block, an attempt is made to move it. +/// Return the root PcodeOp if all these conditions are met, otherwise return null. +/// \param data is the function containing the while loop +/// \param slot is the slot read by \e loopDef from the output of the statement +/// \return an explicit statement or null +PcodeOp *BlockWhileDo::testTerminal(Funcdata &data,int4 slot) const + +{ + Varnode *vn = loopDef->getIn(slot); + if (!vn->isWritten()) return (PcodeOp *)0; + PcodeOp *finalOp = vn->getDef(); + BlockBasic *parentBlock = (BlockBasic *)loopDef->getParent()->getIn(slot); + PcodeOp *resOp = finalOp; + if (finalOp->code() == CPUI_COPY && finalOp->notPrinted()) { + vn = finalOp->getIn(0); + if (!vn->isWritten()) return (PcodeOp *)0; + resOp = vn->getDef(); + if (resOp->getParent() != parentBlock) return (PcodeOp *)0; + } + + if (!vn->isExplicit()) return (PcodeOp *)0; + if (resOp->notPrinted()) + return (PcodeOp *)0; // Statement MUST be printed + + // finalOp MUST be the last op in the basic block (except for the branch) + PcodeOp *lastOp = finalOp->getParent()->lastOp(); + if (lastOp->isBranch()) + lastOp = lastOp->previousOp(); + if (!data.moveRespectingCover(finalOp, lastOp)) + return (PcodeOp *)0; + + return resOp; +} + +/// This is a final sanity check on the \e iterate statement. If statement is just a +/// CAST or COPY, we revert to displaying the whole loop using \e while +/// \return \b true is the statement looks like a suitable for-loop iterator. +bool BlockWhileDo::testIterateForm(void) const + +{ + PcodeOp *curOp = iterateOp; + OpCode opc = curOp->code(); + while(opc == CPUI_COPY || opc == CPUI_CAST) { + Varnode *vn = curOp->getIn(0); + if (!curOp->notPrinted()) + if (vn->isExplicit()) return false; // End of statement, no substantive op seen + if (!vn->isWritten()) return false; + curOp = vn->getDef(); + opc = curOp->code(); + } + return true; +} + void BlockWhileDo::markLabelBumpUp(bool bump) { @@ -2953,6 +3114,79 @@ FlowBlock *BlockWhileDo::nextFlowAfter(const FlowBlock *bl) const return nextbl; } +/// Determine if \b this block can be printed as a \e for loop, with an \e initializer statement +/// extracted from the previous block, and an \e iterator statement extracted from the body. +/// \param data is the function containing \b this loop +void BlockWhileDo::finalTransform(Funcdata &data) + +{ + BlockGraph::finalTransform(data); + if (!data.getArch()->analyze_for_loops) return; + if (hasOverflowSyntax()) return; + FlowBlock *copyBl = getFrontLeaf(); + if (copyBl == (FlowBlock *)0) return; + BlockBasic *head = (BlockBasic *)copyBl->subBlock(0); + if (head->getType() != t_basic) return; + PcodeOp *lastOp = getBlock(1)->lastOp(); // There must be a last op in body, for there to be an iterator statement + if (lastOp == (PcodeOp *)0) return; + BlockBasic *tail = lastOp->getParent(); + if (tail->sizeOut() != 1) return; + if (tail->getOut(0) != head) return; + PcodeOp *cbranch = getBlock(0)->lastOp(); + if (cbranch == (PcodeOp *)0 || cbranch->code() != CPUI_CBRANCH) return; + if (lastOp->isBranch()) { // Convert lastOp to -point- iterateOp must appear after + lastOp = lastOp->previousOp(); + if (lastOp == (PcodeOp *)0) return; + } + + findLoopVariable(cbranch, head, tail, lastOp); + if (iterateOp == (PcodeOp *)0) return; + + if (iterateOp != lastOp) { + data.opUninsert(iterateOp); + data.opInsertAfter(iterateOp, lastOp); + } + + // Try to set up initializer statement + lastOp = findInitializer(head, tail->getOutRevIndex(0)); + if (lastOp == (PcodeOp *)0) return; + if (!initializeOp->isMoveable(lastOp)) { + initializeOp = (PcodeOp *)0; // Turn it off + return; + } + if (initializeOp != lastOp) { + data.opUninsert(initializeOp); + data.opInsertAfter(initializeOp, lastOp); + } +} + +/// Assume that finalTransform() has run and that all HighVariable merging has occurred. +/// Do any final tests checking that the initialization and iteration statements are good. +/// Extract initialization and iteration statements from their basic blocks. +/// \param data is the function containing the loop +void BlockWhileDo::finalizePrinting(Funcdata &data) const + +{ + BlockGraph::finalizePrinting(data); // Continue recursing + if (iterateOp == (PcodeOp *)0) return; // For-loop printing not enabled + // TODO: We can check that iterate statement is not too complex + int4 slot = iterateOp->getParent()->getOutRevIndex(0); + iterateOp = testTerminal(data,slot); // Make sure iterator statement is explicit + if (iterateOp == (PcodeOp *)0) return; + if (!testIterateForm()) { + iterateOp = (PcodeOp *)0; + return; + } + if (initializeOp == (PcodeOp *)0) + findInitializer(loopDef->getParent(), slot); // Last chance initializer + if (initializeOp != (PcodeOp *)0) + initializeOp = testTerminal(data,1-slot); // Make sure initializer statement is explicit + + data.opMarkNonPrinting(iterateOp); + if (initializeOp != (PcodeOp *)0) + data.opMarkNonPrinting(initializeOp); +} + void BlockDoWhile::markLabelBumpUp(bool bump) { @@ -3083,7 +3317,7 @@ void BlockSwitch::grabCaseBasic(FlowBlock *switchbl,const vector &c } } -void BlockSwitch::finalizePrinting(const Funcdata &data) const +void BlockSwitch::finalizePrinting(Funcdata &data) const { BlockGraph::finalizePrinting(data); // Make sure to still recurse diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh index 30f867a35e..a6a8125727 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/block.hh @@ -170,7 +170,8 @@ public: virtual void flipInPlaceExecute(void); virtual bool isComplex(void) const { return true; } ///< Is \b this too complex to be a condition (BlockCondition) virtual FlowBlock *nextFlowAfter(const FlowBlock *bl) const; - virtual void finalizePrinting(const Funcdata &data) const {} ///< Make any final configurations necessary to print the block + virtual void finalTransform(Funcdata &data) {} ///< Do any structure driven final transforms + virtual void finalizePrinting(Funcdata &data) const {} ///< Make any final configurations necessary to print the block virtual void saveXmlHeader(ostream &s) const; ///< Save basic information as XML attributes virtual void restoreXmlHeader(const Element *el); ///< Restore basic information for XML attributes virtual void saveXmlBody(ostream &s) const {} ///< Save detail about components to an XML stream @@ -296,7 +297,8 @@ public: virtual void printRaw(ostream &s) const; virtual void emit(PrintLanguage *lng) const { lng->emitBlockGraph(this); } virtual FlowBlock *nextFlowAfter(const FlowBlock *bl) const; - virtual void finalizePrinting(const Funcdata &data) const; + virtual void finalTransform(Funcdata &data); + virtual void finalizePrinting(Funcdata &data) const; virtual void saveXmlBody(ostream &s) const; virtual void restoreXmlBody(List::const_iterator &iter,List::const_iterator enditer,BlockMap &resolver); void restoreXml(const Element *el,const AddrSpaceManager *m); ///< Restore \b this BlockGraph from an XML stream @@ -580,8 +582,23 @@ public: /// Overflow syntax refers to the situation where there is a proper BlockWhileDo structure but /// the conditional block is too long or complicated to emit as a single conditional expression. /// An alternate `while(true) { }` form is used instead. +/// +/// If an iterator op is provided, the block will be printed using \e for loop syntax, +/// `for(i=0;i<10;++i)` where an \e initializer statement and \e iterator statement are +/// printed alongside the \e condition statement. Otherwise, \e while loop syntax is used +/// `while(i<10)` class BlockWhileDo : public BlockGraph { + mutable PcodeOp *initializeOp; ///< Statement used as \e for loop initializer + mutable PcodeOp *iterateOp; ///< Statement used as \e for loop iterator + mutable PcodeOp *loopDef; ///< MULTIEQUAL merging loop variable + void findLoopVariable(PcodeOp *cbranch,BlockBasic *head,BlockBasic *tail,PcodeOp *lastOp); ///< Find a \e loop \e variable + PcodeOp *findInitializer(BlockBasic *head,int4 slot) const; ///< Find the for-loop initializer op + PcodeOp *testTerminal(Funcdata &data,int4 slot) const; ///< Test that given statement is terminal and explicit + bool testIterateForm(void) const; ///< Return \b false if the iterate statement is of an unacceptable form public: + BlockWhileDo(void) { initializeOp = (PcodeOp *)0; iterateOp = (PcodeOp *)0; loopDef = (PcodeOp *)0; } ///< Constructor + PcodeOp *getInitializeOp(void) const { return initializeOp; } + PcodeOp *getIterateOp(void) const { return iterateOp; } bool hasOverflowSyntax(void) const { return ((getFlags() & f_whiledo_overflow)!=0); } ///< Does \b this require overflow syntax void setOverflowSyntax(void) { setFlag(f_whiledo_overflow); } ///< Set that \b this requires overflow syntax virtual block_type getType(void) const { return t_whiledo; } @@ -590,6 +607,8 @@ public: virtual void printHeader(ostream &s) const; virtual void emit(PrintLanguage *lng) const { lng->emitBlockWhileDo(this); } virtual FlowBlock *nextFlowAfter(const FlowBlock *bl) const; + virtual void finalTransform(Funcdata &data); + virtual void finalizePrinting(Funcdata &data) const; }; /// \brief A loop structure where the condition is checked at the bottom. @@ -674,7 +693,7 @@ public: virtual void printHeader(ostream &s) const; virtual void emit(PrintLanguage *lng) const { lng->emitBlockSwitch(this); } virtual FlowBlock *nextFlowAfter(const FlowBlock *bl) const; - virtual void finalizePrinting(const Funcdata &data) const; + virtual void finalizePrinting(Funcdata &data) const; }; /// \brief Helper class for resolving cross-references while deserializing BlockGraph objects diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc index 86489e4e94..c5c8bc9a5a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.cc @@ -2095,6 +2095,13 @@ void ConditionalJoin::clear(void) mergeneed.clear(); } +int4 ActionStructureTransform::apply(Funcdata &data) + +{ + data.getStructure().finalTransform(data); + return 0; +} + int4 ActionNormalizeBranches::apply(Funcdata &data) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.hh index abb04b91bb..09b1779dca 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.hh @@ -262,6 +262,19 @@ public: void clear(void); ///< Clear for a new test }; +/// \brief Give each control-flow structure an opportunity to make a final transform +/// +/// This is currently used to set up \e for loops via BlockWhileDo +class ActionStructureTransform : public Action { +public: + ActionStructureTransform(const string &g) : Action(0,"structuretransform",g) {} ///< Constructor + virtual Action *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Action *)0; + return new ActionStructureTransform(getGroup()); + } + virtual int4 apply(Funcdata &data); +}; + /// \brief Flip conditional control-flow so that \e preferred comparison operators are used /// /// This is used as an alternative to the standard algorithm that structures control-flow, when diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 7952042417..d90f8a07c5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -5100,6 +5100,7 @@ void ActionDatabase::universalAction(Architecture *conf) act->addAction( actcleanup ); act->addAction( new ActionPreferComplement("blockrecovery") ); + act->addAction( new ActionStructureTransform("blockrecovery") ); act->addAction( new ActionNormalizeBranches("normalizebranches") ); act->addAction( new ActionAssignHigh("merge") ); act->addAction( new ActionMergeRequired("merge") ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index f062b47415..32492629b8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -482,6 +482,8 @@ public: /// \brief End of all (alive) PcodeOp objects attached to a specific Address PcodeOpTree::const_iterator endOp(const Address &addr) const { return obank.end(addr); } + bool moveRespectingCover(PcodeOp *op,PcodeOp *lastOp); ///< Move given op past \e lastOp respecting covers if possible + // Jumptable routines JumpTable *linkJumpTable(PcodeOp *op); ///< Link jump-table with a given BRANCHIND JumpTable *findJumpTable(const PcodeOp *op) const; ///< Find a jump-table associated with a given BRANCHIND diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc index d44ccf45fd..e4a4c85144 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc @@ -1361,3 +1361,54 @@ void cseEliminateList(Funcdata &data,vector< pair > &list,vecto liter2++; } } + +/// This routine should be called only after Varnode merging and explicit/implicit attributes have +/// been calculated. Determine if the given op can be moved (only within its basic block) to +/// after \e lastOp. The output of any PcodeOp moved across must not be involved, directly or +/// indirectly, with any variable in the expression rooted at the given op. +/// If the move is possible, perform the move. +/// \param op is the given PcodeOp +/// \param lastOp is the PcodeOp to move past +/// \return \b true if the move is possible +bool Funcdata::moveRespectingCover(PcodeOp *op,PcodeOp *lastOp) + +{ + if (op == lastOp) return true; // Nothing to move past + if (op->isCall()) return false; + PcodeOp *prevOp = (PcodeOp *)0; + if (op->code() == CPUI_CAST) { + Varnode *vn = op->getIn(0); + if (!vn->isExplicit()) { // If CAST is part of expression, we need to move the previous op as well + if (!vn->isWritten()) return false; + prevOp = vn->getDef(); + if (prevOp->isCall()) return false; + if (op->previousOp() != prevOp) return false; // Previous op must exist and feed into the CAST + } + } + Varnode *rootvn = op->getOut(); + vector highList; + int4 typeVal = HighVariable::markExpression(rootvn, highList); + PcodeOp *curOp = op; + do { + PcodeOp *nextOp = curOp->nextOp(); + OpCode opc = nextOp->code(); + if (opc != CPUI_COPY && opc != CPUI_CAST) break; // Limit ourselves to only crossing COPY and CAST ops + if (rootvn == nextOp->getIn(0)) break; // Data-flow order dependence + Varnode *copyVn = nextOp->getOut(); + if (copyVn->getHigh()->isMark()) break; // Direct interference: COPY writes what original op reads + if (typeVal != 0 && copyVn->isAddrTied()) break; // Possible indirect interference + curOp = nextOp; + } while(curOp != lastOp); + for(int4 i=0;iclearMark(); + if (curOp == lastOp) { // If we are able to cross everything + opUninsert(op); // Move -op- + opInsertAfter(op, lastOp); + if (prevOp != (PcodeOp *)0) { // If there was a CAST, move both ops + opUninsert(prevOp); + opInsertAfter(prevOp, lastOp); + } + return true; + } + return false; +} diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index ac8eeaac86..a42520cee8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -731,38 +731,35 @@ void Funcdata::clearDeadVarnodes(void) void Funcdata::calcNZMask(void) { - vector opstack; - vector slotstack; + vector opstack; list::const_iterator oiter; for(oiter=beginOpAlive();oiter!=endOpAlive();++oiter) { PcodeOp *op = *oiter; if (op->isMark()) continue; - opstack.push_back(op); - slotstack.push_back(0); + opstack.push_back(PcodeOpNode(op,0)); op->setMark(); do { // Get next edge - op = opstack.back(); - int4 slot = slotstack.back(); - if (slot >= op->numInput()) { // If no edge left - Varnode *outvn = op->getOut(); + PcodeOpNode &node( opstack.back() ); + if (node.slot >= node.op->numInput()) { // If no edge left + Varnode *outvn = node.op->getOut(); if (outvn != (Varnode *)0) { - outvn->nzm = op->getNZMaskLocal(true); + outvn->nzm = node.op->getNZMaskLocal(true); } opstack.pop_back(); // Pop a level - slotstack.pop_back(); continue; } - slotstack.back() = slot + 1; // Advance to next input + int4 oldslot = node.slot; + node.slot += 1; // Advance to next input // Determine if we want to traverse this edge - if (op->code() == CPUI_MULTIEQUAL) { - if (op->getParent()->isLoopIn(slot)) // Clip looping edges + if (node.op->code() == CPUI_MULTIEQUAL) { + if (node.op->getParent()->isLoopIn(oldslot)) // Clip looping edges continue; } // Traverse edge indicated by slot - Varnode *vn = op->getIn(slot); + Varnode *vn = node.op->getIn(oldslot); if (!vn->isWritten()) { if (vn->isConstant()) vn->nzm = vn->getOffset(); @@ -773,32 +770,32 @@ void Funcdata::calcNZMask(void) } } else if (!vn->getDef()->isMark()) { // If haven't traversed before - opstack.push_back(vn->getDef()); - slotstack.push_back(0); + opstack.push_back(PcodeOpNode(vn->getDef(),0)); vn->getDef()->setMark(); } } while(!opstack.empty()); } + vector worklist; // Clear marks and push ops with looping edges onto worklist for(oiter=beginOpAlive();oiter!=endOpAlive();++oiter) { PcodeOp *op = *oiter; op->clearMark(); if (op->code() == CPUI_MULTIEQUAL) - opstack.push_back(op); + worklist.push_back(op); } // Continue to propagate changes along all edges - while(!opstack.empty()) { - PcodeOp *op = opstack.back(); - opstack.pop_back(); + while(!worklist.empty()) { + PcodeOp *op = worklist.back(); + worklist.pop_back(); Varnode *vn = op->getOut(); if (vn == (Varnode *)0) continue; uintb nzmask = op->getNZMaskLocal(false); if (nzmask != vn->nzm) { vn->nzm = nzmask; for(oiter=vn->beginDescend();oiter!=vn->endDescend();++oiter) - opstack.push_back(*oiter); + worklist.push_back(*oiter); } } } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc index b78b499afc..ccbb667cf7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.cc @@ -506,39 +506,33 @@ uintb JumpBasic::backup2Switch(Funcdata *fd,uintb output,Varnode *outvn,Varnode void JumpBasic::findDeterminingVarnodes(PcodeOp *op,int4 slot) { - vector path; - vector slotpath; - PcodeOp *curop; - Varnode *curvn; + vector path; bool firstpoint = false; // Have not seen likely switch variable yet - path.push_back(op); - slotpath.push_back(slot); + path.push_back(PcodeOpNode(op,slot)); do { // Traverse through tree of inputs to final address - curop = path.back(); - curvn = curop->getIn(slotpath.back()); + PcodeOpNode &node(path.back()); + Varnode *curvn = node.op->getIn(node.slot); if (isprune(curvn)) { // Here is a node of the tree if (ispoint(curvn)) { // Is it a possible switch variable if (!firstpoint) { // If it is the first possible - pathMeld.set(path,slotpath); // Take the current path as the result + pathMeld.set(path); // Take the current path as the result firstpoint = true; } else // If we have already seen at least one possible - pathMeld.meld(path,slotpath); + pathMeld.meld(path); } - slotpath.back() += 1; - while(slotpath.back() >= path.back()->numInput()) { + path.back().slot += 1; + while(path.back().slot >= path.back().op->numInput()) { path.pop_back(); - slotpath.pop_back(); if (path.empty()) break; - slotpath.back() += 1; + path.back().slot += 1; } } else { // This varnode is not pruned - path.push_back(curvn->getDef()); - slotpath.push_back(0); + path.push_back(PcodeOpNode(curvn->getDef(),0)); } } while(path.size() > 1); if (pathMeld.empty()) { // Never found a likely point, which means that @@ -785,7 +779,7 @@ void PathMeld::internalIntersect(vector &parentMap) /// \param cutOff is the number of PcodeOps with an input in the common path /// \param parentMap is the map from old common Varnodes to the new common Varnodes /// \return the index of the last (earliest) Varnode in the common path or -1 -int4 PathMeld::meldOps(const vector &path,int4 cutOff,const vector &parentMap) +int4 PathMeld::meldOps(const vector &path,int4 cutOff,const vector &parentMap) { // First update opMeld.rootVn with new intersection information @@ -804,7 +798,7 @@ int4 PathMeld::meldOps(const vector &path,int4 cutOff,const vector &path,const vector &slot) +/// \param path is the list of PcodeOpNode edges in the path (in reverse execution order) +void PathMeld::set(const vector &path) { for(int4 i=0;igetIn(slot[i]); - opMeld.push_back(RootedOp(op,i)); + const PcodeOpNode &node(path[i]); + Varnode *vn = node.op->getIn(node.slot); + opMeld.push_back(RootedOp(node.op,i)); commonVn.push_back(vn); } } @@ -921,23 +914,23 @@ void PathMeld::clear(void) /// Add the new path, recalculating the set of Varnodes common to all paths. /// Paths are trimmed to ensure that any path that splits from the common intersection /// must eventually rejoin. -/// \param path is the new path of PcodeOps to meld, in reverse execution order -/// \param slot is the set of Varnodes in the new path presented as input slots to the corresponding PcodeOp -void PathMeld::meld(vector &path,vector &slot) +/// \param path is the new path of PcodeOpNode edges to meld, in reverse execution order +void PathMeld::meld(vector &path) { vector parentMap; for(int4 i=0;igetIn(slot[i]); - vn->setMark(); // Mark varnodes in the new path, so its easy to see intersection + PcodeOpNode &node(path[i]); + node.op->getIn(node.slot)->setMark(); // Mark varnodes in the new path, so its easy to see intersection } internalIntersect(parentMap); // Calculate varnode intersection, and map from old intersection -> new int4 cutOff = -1; // Calculate where the cutoff point is in the new path for(int4 i=0;igetIn(slot[i]); + PcodeOpNode &node(path[i]); + Varnode *vn = node.op->getIn(node.slot); if (!vn->isMark()) { // If mark already cleared, we know it is in intersection cutOff = i + 1; // Cut-off must at least be past this -vn- } @@ -948,7 +941,6 @@ void PathMeld::meld(vector &path,vector &slot) if (newCutoff >= 0) // If not all ops could be ordered truncatePaths(newCutoff); // Cut off at the point where we couldn't order path.resize(cutOff); - slot.resize(cutOff); } /// The starting Varnode, common to all paths, is provided as an index. diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh index a7c351685c..fdced4e67a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/jumptable.hh @@ -73,15 +73,15 @@ class PathMeld { vector commonVn; ///< Varnodes in common with all paths vector opMeld; ///< All the ops for the melded paths void internalIntersect(vector &parentMap); - int4 meldOps(const vector &path,int4 cutOff,const vector &parentMap); + int4 meldOps(const vector &path,int4 cutOff,const vector &parentMap); void truncatePaths(int4 cutPoint); public: void set(const PathMeld &op2); ///< Copy paths from another container - void set(const vector &path,const vector &slot); ///< Initialize \b this to be a single path + void set(const vector &path); ///< Initialize \b this to be a single path void set(PcodeOp *op,Varnode *vn); ///< Initialize \b this container to a single node "path" void append(const PathMeld &op2); ///< Append a new set of paths to \b this set of paths void clear(void); ///< Clear \b this to be an empty container - void meld(vector &path,vector &slot); ///< Meld a new path into \b this container + void meld(vector &path); ///< Meld a new path into \b this container void markPaths(bool val,int4 startVarnode); ///< Mark PcodeOps paths from the given start int4 numCommonVarnode(void) const { return commonVn.size(); } ///< Return the number of Varnodes common to all paths int4 numOps(void) const { return opMeld.size(); } ///< Return the number of PcodeOps across all paths diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc index d0e8d10c49..d80da176cd 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc @@ -171,6 +171,106 @@ bool PcodeOp::isCseMatch(const PcodeOp *op) const return true; } +/// Its possible for the order of operations to be rearranged in some instances but still keep +/// equivalent data-flow. Test if \b this operation can be moved to occur immediately after +/// a specified \e point operation. This currently only tests for movement within a basic block. +/// \param point is the specified point to move \b this after +/// \return \b true if the move is possible +bool PcodeOp::isMoveable(const PcodeOp *point) const + +{ + if (this == point) return true; // No movement necessary + bool movingLoad = false; + if (getEvalType() == PcodeOp::special) { + if (code() == CPUI_LOAD) + movingLoad = true; // Allow LOAD to be moved with additional restrictions + else + return false; // Don't move special ops + } + if (parent != point->parent) return false; // Not in the same block + if (output != (Varnode *)0) { + // Output cannot be moved past an op that reads it + list::const_iterator iter = output->beginDescend(); + list::const_iterator enditer = output->endDescend(); + while(iter != enditer) { + PcodeOp *readOp = *iter; + ++iter; + if (readOp->parent != parent) continue; + if (readOp->start.getOrder() <= point->start.getOrder()) + return false; // Is in the block and is read before (or at) -point- + } + } + // Only allow this op to be moved across a CALL in very restrictive circumstances + bool crossCalls = false; + if (getEvalType() != PcodeOp::special) { + // Check for a normal op where all inputs and output are not address tied + if (output != (Varnode *)0 && !output->isAddrTied() && !output->isPersist()) { + int4 i; + for(i=0;iisAddrTied() || vn->isPersist()) + break; + } + if (i == numInput()) + crossCalls = true; + } + } + vector tiedList; + for(int4 i=0;iisAddrTied()) + tiedList.push_back(vn); + } + list::iterator biter = basiciter; + do { + ++biter; + PcodeOp *op = *biter; + if (op->getEvalType() == PcodeOp::special) { + switch (op->code()) { + case CPUI_LOAD: + if (output != (Varnode *)0) { + if (output->isAddrTied()) return false; + } + break; + case CPUI_STORE: + if (movingLoad) + return false; + else { + if (!tiedList.empty()) return false; + if (output != (Varnode *)0) { + if (output->isAddrTied()) return false; + } + } + break; + case CPUI_INDIRECT: // Let thru, deal with what's INDIRECTed around separately + case CPUI_SEGMENTOP: + case CPUI_CPOOLREF: + break; + case CPUI_CALL: + case CPUI_CALLIND: + case CPUI_NEW: + if (!crossCalls) return false; + break; + default: + return false; + } + } + if (op->output != (Varnode *)0) { + if (movingLoad) { + if (op->output->isAddrTied()) return false; + } + for(int4 i=0;ioverlap(*op->output)>=0) + return false; + if (op->output->overlap(*vn)>=0) + return false; + } + } + } while(biter != point->basiciter); + return true; +} + /// Set the behavioral class (opcode) of this operation. For most applications this should only be called /// by the PcodeOpBank. This is fairly low-level but does cache various boolean flags associated with the opcode /// \param t_op is the behavioural class to set diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh index ed915d66c8..6c87300ea8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh @@ -206,6 +206,7 @@ public: bool usesSpacebasePtr(void) const { return ((flags&PcodeOp::spacebase_ptr)!=0); } uintm getCseHash(void) const; ///< Return hash indicating possibility of common subexpression elimination bool isCseMatch(const PcodeOp *op) const; ///< Return \b true if this and \e op represent common subexpressions + bool isMoveable(const PcodeOp *point) const; ///< Can \b this be moved to after \e point, without disturbing data-flow TypeOp *getOpcode(void) const { return opcode; } ///< Get the opcode for this op OpCode code(void) const { return opcode->getOpcode(); } ///< Get the opcode id (enum) for this op bool isCommutative(void) const { return ((flags & PcodeOp::commutative)!=0); } ///< Return \b true if inputs commute @@ -229,6 +230,16 @@ public: bool inheritsSign(void) const { return opcode->inheritsSign(); } ///< Does this token inherit its sign from operands }; +/// \brief An edge in a data-flow path or graph +/// +/// A minimal node for traversing expressions in the data-flow +struct PcodeOpNode { + PcodeOp *op; ///< The p-code end-point of the edge + int4 slot; ///< Slot indicating the input Varnode end-point of the edge + PcodeOpNode(void) { op = (PcodeOp *)0; slot = 0; } ///< Unused constructor + PcodeOpNode(PcodeOp *o,int4 s) { op = o; slot = s; } ///< Constructor +}; + /// A map from sequence number (SeqNum) to PcodeOp typedef map PcodeOpTree; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc index 4e20a4a032..2ec50db12b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/options.cc @@ -57,6 +57,7 @@ OptionDatabase::OptionDatabase(Architecture *g) registerOption(new OptionErrorTooManyInstructions()); registerOption(new OptionDefaultPrototype()); registerOption(new OptionInferConstPtr()); + registerOption(new OptionForLoops()); registerOption(new OptionInline()); registerOption(new OptionNoReturn()); registerOption(new OptionStructAlign()); @@ -245,6 +246,24 @@ string OptionInferConstPtr::apply(Architecture *glb,const string &p1,const strin return res; } +/// \class OptionForLoops +/// \brief Toggle whether the decompiler attempts to recover \e for-loop variables +/// +/// Setting the first parameter to "on" causes the decompiler to search for a suitable loop variable +/// controlling iteration of a \e while-do block. The \e for-loop displays the following on a single line: +/// - loop variable initializer (optional) +/// - loop condition +/// - loop variable incrementer +/// +string OptionForLoops::apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const + +{ + glb->analyze_for_loops = onOrOff(p1); + + string res = "Recovery of for-loops is " + p1; + return res; +} + /// \class OptionInline /// \brief Mark/unmark a specific function as \e inline /// diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh index 37c692077a..b18e0a1b46 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/options.hh @@ -96,6 +96,12 @@ public: virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const; }; +class OptionForLoops : public ArchOption { +public: + OptionForLoops(void) { name = "analyzeforloops"; } ///< Constructor + virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const; +}; + class OptionInline : public ArchOption { public: OptionInline(void) { name = "inline"; } ///< Constructor diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index 430c64e324..cad78ea5ee 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -2629,11 +2629,68 @@ void PrintC::emitBlockIf(const BlockIf *bl) popMod(); } +/// Print the loop using the keyword \e for, followed by a semicolon separated +/// - Initializer statement +/// - Condition statment +/// - Iterate statement +/// +/// Then print the body of the loop +void PrintC::emitForLoop(const BlockWhileDo *bl) + +{ + const PcodeOp *op; + int4 indent; + + pushMod(); + unsetMod(no_branch|only_branch); + emitAnyLabelStatement(bl); + emit->tagLine(); + op = bl->getBlock(0)->lastOp(); + emit->tagOp("for",EmitXml::keyword_color,op); + emit->spaces(1); + int4 id1 = emit->openParen('('); + pushMod(); + setMod(comma_separate); + op = bl->getInitializeOp(); // Emit the (optional) initializer statement + if (op != (PcodeOp *)0) { + int4 id3 = emit->beginStatement(op); + emitExpression(op); + emit->endStatement(id3); + } + emit->print(";"); + emit->spaces(1); + bl->getBlock(0)->emit(this); // Emit the conditional statement + emit->print(";"); + emit->spaces(1); + op = bl->getIterateOp(); // Emit the iterator statement + int4 id4 = emit->beginStatement(op); + emitExpression(op); + emit->endStatement(id4); + popMod(); + emit->closeParen(')',id1); + emit->spaces(1); + indent = emit->startIndent(); + emit->print("{"); + setMod(no_branch); // Dont print goto at bottom of clause + int4 id2 = emit->beginBlock(bl->getBlock(1)); + bl->getBlock(1)->emit(this); + emit->endBlock(id2); + emit->stopIndent(indent); + emit->tagLine(); + emit->print("}"); + popMod(); +} + void PrintC::emitBlockWhileDo(const BlockWhileDo *bl) { const PcodeOp *op; int4 indent; + + if (bl->getIterateOp() != (PcodeOp *)0) { + emitForLoop(bl); + return; + } // whiledo block NEVER prints final branch pushMod(); unsetMod(no_branch|only_branch); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh index 54162016f5..e0da6900ba 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh @@ -157,6 +157,7 @@ protected: void emitAnyLabelStatement(const FlowBlock *bl); ///< Emit any required label statement for a given control-flow block void emitCommentGroup(const PcodeOp *inst); ///< Emit comments associated with a given statement void emitCommentFuncHeader(const Funcdata *fd); ///< Emit comments in the given function's header + void emitForLoop(const BlockWhileDo *bl); ///< Emit block as a \e for loop void opFunc(const PcodeOp *op); ///< Push a \e functional expression based on the given p-code op to the RPN stack void opTypeCast(const PcodeOp *op); ///< Push the given p-code op using type-cast syntax to the RPN stack void opHiddenFunc(const PcodeOp *op); ///< Push the given p-code op as a hidden token diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc index ef4da743b9..cdbe6fae1d 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc @@ -488,6 +488,62 @@ void HighVariable::saveXml(ostream &s) const s << ""; } +/// Given a Varnode at the root of an expression, we collect all the \e explicit HighVariables +/// involved in the expression. This should only be run after \e explicit and \e implicit +/// properties have been computed on Varnodes. The expression is traced back from the root +/// until explicit Varnodes are encountered; then their HighVariable is marked and added to the list. +/// The routine returns a value based on PcodeOps encountered in the expression: +/// - 1 for call instructions +/// - 2 for LOAD instructions +/// - 3 for both call and LOAD +/// - 0 for no calls or LOADS +/// +/// \param vn is the given root Varnode of the expression +/// \param highList will hold the collected HighVariables +/// \return a value based on call and LOAD instructions in the expression +int4 HighVariable::markExpression(Varnode *vn,vector &highList) + +{ + HighVariable *high = vn->getHigh(); + high->setMark(); + highList.push_back(high); + int4 retVal = 0; + if (!vn->isWritten()) return retVal; + + vector path; + PcodeOp *op = vn->getDef(); + if (op->isCall()) + retVal |= 1; + if (op->code() == CPUI_LOAD) + retVal |= 2; + path.push_back(PcodeOpNode(op,0)); + while(!path.empty()) { + PcodeOpNode &node(path.back()); + if (node.op->numInput() <= node.slot) { + path.pop_back(); + continue; + } + Varnode *vn = node.op->getIn(node.slot); + node.slot += 1; + if (vn->isAnnotation()) continue; + if (vn->isExplicit()) { + high = vn->getHigh(); + if (high->isMark()) continue; // Already in the list + high->setMark(); + highList.push_back(high); + continue; // Truncate at explicit + } + if (!vn->isWritten()) continue; + op = vn->getDef(); + if (op->isCall()) + retVal |= 1; + if (op->code() == CPUI_LOAD) + retVal |= 2; + path.push_back(PcodeOpNode(vn->getDef(),0)); + } + return retVal; +} + #ifdef MERGEMULTI_DEBUG /// \brief Check that there are no internal Cover intersections within \b this /// diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh index f97573c2be..98be353e1a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh @@ -137,6 +137,7 @@ public: // Varnode *findGlobalRep(void) const; static bool compareName(Varnode *vn1,Varnode *vn2); ///< Determine which given Varnode is most nameable static bool compareJustLoc(const Varnode *a,const Varnode *b); ///< Compare based on storage location + static int4 markExpression(Varnode *vn,vector &highList); ///< Mark and collect variables in expression }; #endif diff --git a/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml b/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml index 75cafef9b1..7ea17934fb 100644 --- a/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml +++ b/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml @@ -2556,6 +2556,26 @@ + + Recover -for- loops + + + When this is toggle on, the decompiler attempts to pinpoint + variables that control the iteration over specific loops in the function body. + When these loop variables are discovered, the loop is + rendered using a standard for loop header + that contains an initializer statement, condition, and iterating statement. + + for (iVar2 = 10; iVar2 < len; iVar2 = iVar2 + 1) { ... + + + + If the toggle is off, the loop is displayed using + while syntax, with any initializer and + iterating statements mixed in with the loop body or preceding basic blocks. + + + Eliminate unreachable code diff --git a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html index 655b75f59c..357af8cdcc 100644 --- a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html +++ b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html @@ -158,6 +158,28 @@

+Recover -for- loops +
+
+

+ When this is toggle on, the decompiler attempts to pinpoint + variables that control the iteration over specific loops in the function body. + When these loop variables are discovered, the loop is + rendered using a standard for loop header + that contains an initializer statement, condition, and iterating statement. +

+
+ for (iVar2 = 10; iVar2 < len; iVar2 = iVar2 + 1) { ... +
+

+

+

+ If the toggle is off, the loop is displayed using + while syntax, with any initializer and + iterating statements mixed in with the loop body or preceding basic blocks. +

+
+
Eliminate unreachable code
diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java index 6887834a89..ae5006a547 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileDebug.java @@ -442,8 +442,8 @@ public class DecompileDebug { Varnode.appendSpaceOffset(stringBuf, addr); stringBuf.append(">\n"); for (ContextSymbol sym : ctxsymbols) { - int sbit = sym.getLow(); - int ebit = sym.getHigh(); + int sbit = sym.getInternalLow(); + int ebit = sym.getInternalHigh(); int word = sbit / (8 * 4); int startbit = sbit - word * (8 * 4); int endbit = ebit - word * (8 * 4); diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java index d20d935874..4b1bc2edb9 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java @@ -85,6 +85,13 @@ public class DecompileOptions { private final static boolean INFERCONSTPTR_OPTIONDEFAULT = true; private boolean inferconstptr; + private final static String ANALYZEFORLOOPS_OPTIONSTRING = "Analysis.Recover -for- loops"; + private final static String ANALYZEFORLOOPS_OPTIONDESCRIPTION = + "If set, the decompiler attempts to recover for-loop variables, including their initializer, condition, " + + "and incrementer statements. Loop variable bounds are displayed as a formal -for- loop header"; + private final static boolean ANALYZEFORLOOPS_OPTIONDEFAULT = true; + private boolean analyzeForLoops; + private final static String NULLTOKEN_OPTIONSTRING = "Display.Print 'NULL' for null pointers"; private final static String NULLTOKEN_OPTIONDESCRIPTION = "If set, any zero valued pointer (null pointer) will " + @@ -366,6 +373,7 @@ public class DecompileOptions { simplifyDoublePrecision = SIMPLIFY_DOUBLEPRECISION_OPTIONDEFAULT; ignoreunimpl = IGNOREUNIMPL_OPTIONDEFAULT; inferconstptr = INFERCONSTPTR_OPTIONDEFAULT; + analyzeForLoops = ANALYZEFORLOOPS_OPTIONDEFAULT; nullToken = NULLTOKEN_OPTIONDEFAULT; inplaceTokens = INPLACEOP_OPTIONDEFAULT; aliasBlock = ALIASBLOCK_OPTIONDEFAULT; @@ -426,6 +434,8 @@ public class DecompileOptions { SIMPLIFY_DOUBLEPRECISION_OPTIONDEFAULT); ignoreunimpl = opt.getBoolean(IGNOREUNIMPL_OPTIONSTRING, IGNOREUNIMPL_OPTIONDEFAULT); inferconstptr = opt.getBoolean(INFERCONSTPTR_OPTIONSTRING, INFERCONSTPTR_OPTIONDEFAULT); + analyzeForLoops = + opt.getBoolean(ANALYZEFORLOOPS_OPTIONSTRING, ANALYZEFORLOOPS_OPTIONDEFAULT); nullToken = opt.getBoolean(NULLTOKEN_OPTIONSTRING, NULLTOKEN_OPTIONDEFAULT); inplaceTokens = opt.getBoolean(INPLACEOP_OPTIONSTRING, INPLACEOP_OPTIONDEFAULT); aliasBlock = opt.getEnum(ALIASBLOCK_OPTIONSTRING, ALIASBLOCK_OPTIONDEFAULT); @@ -547,6 +557,10 @@ public class DecompileOptions { INFERCONSTPTR_OPTIONDEFAULT, new HelpLocation(HelpTopics.DECOMPILER, "AnalysisInferConstants"), INFERCONSTPTR_OPTIONDESCRIPTION); + opt.registerOption(ANALYZEFORLOOPS_OPTIONSTRING, + ANALYZEFORLOOPS_OPTIONDEFAULT, + new HelpLocation(HelpTopics.DECOMPILER, "AnalysisForLoops"), + ANALYZEFORLOOPS_OPTIONDESCRIPTION); opt.registerOption(NULLTOKEN_OPTIONSTRING, NULLTOKEN_OPTIONDEFAULT, new HelpLocation(HelpTopics.DECOMPILER, "DisplayNull"), @@ -739,6 +753,7 @@ public class DecompileOptions { appendOption(buf, "ignoreunimplemented", ignoreunimpl ? "on" : "off", "", ""); appendOption(buf, "inferconstptr", inferconstptr ? "on" : "off", "", ""); + appendOption(buf, "analyzeforloops", analyzeForLoops ? "on" : "off", "", ""); appendOption(buf, "nullprinting", nullToken ? "on" : "off", "", ""); appendOption(buf, "inplaceops", inplaceTokens ? "on" : "off", "", ""); appendOption(buf, "aliasblock", aliasBlock.getOptionString(), "", ""); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/symbol/ContextSymbol.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/symbol/ContextSymbol.java index 32d9c1e035..99c4e5ef5e 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/symbol/ContextSymbol.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/symbol/ContextSymbol.java @@ -1,6 +1,5 @@ /* ### * IP: GHIDRA - * REVIEWED: YES * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,10 +19,11 @@ */ package ghidra.app.plugin.processors.sleigh.symbol; -import ghidra.app.plugin.processors.sleigh.*; +import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.expression.*; -import ghidra.util.xml.*; -import ghidra.xml.*; +import ghidra.util.xml.SpecXmlUtils; +import ghidra.xml.XmlElement; +import ghidra.xml.XmlPullParser; /** * @@ -40,8 +40,40 @@ public class ContextSymbol extends ValueSymbol { public VarnodeSymbol getVarnode() { return vn; } - public int getLow() { return low; } - public int getHigh() { return high; } + /** + * Get starting bit of context value within its context register. + * @return the starting bit + */ + public int getLow() { + return low; + } + + /** + * Get ending bit of context value within its context register. + * @return the ending bit + */ + public int getHigh() { + return high; + } + + /** + * Get the starting bit of the context value within the "global" buffer, after + * the values have been packed. + * @return the starting bit + */ + public int getInternalLow() { + return ((ContextField) patval).getStartBit(); + } + + /** + * Get the ending bit of the context value within the "global" buffer, after + * the values have been packed. + * @return the ending bit + */ + + public int getInternalHigh() { + return ((ContextField) patval).getEndBit(); + } public boolean followsFlow() { return flow; } @Override