diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index b6090f8eb5..4f5fc9fee8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -3367,6 +3367,9 @@ void ActionDeadCode::markConsumedParameters(FuncCallSpecs *fc,vector consumeVal = ~((uintb)0); else consumeVal = minimalmask(vn->getNZMask()); + int4 bytesConsumed = fc->getInputBytesConsumed(i); + if (bytesConsumed != 0) + consumeVal &= calc_mask(bytesConsumed); pushConsumed(consumeVal,vn,worklist); } } @@ -3394,6 +3397,10 @@ uintb ActionDeadCode::gatherConsumedReturn(Funcdata &data) consumeVal |= minimalmask(vn->getNZMask()); } } + int4 val = data.getFuncProto().getReturnBytesConsumed(); + if (val != 0) { + consumeVal &= calc_mask(val); + } return consumeVal; } @@ -4924,6 +4931,7 @@ void universal_action(Architecture *conf) actprop->addRule( new RulePtraddUndo("typerecovery") ); actprop->addRule( new RulePtrsubUndo("typerecovery") ); actprop->addRule( new RuleSegment("segment") ); + actprop->addRule( new RulePiecePathology("protorecovery") ); actprop->addRule( new RuleDoubleLoad("doubleload") ); actprop->addRule( new RuleDoubleIn("doubleprecis") ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc index 641481e8ca..7129f75b0d 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc @@ -2924,6 +2924,7 @@ FuncProto::FuncProto(void) store = (ProtoStore *)0; flags = 0; injectid = -1; + returnBytesConsumed = 0; } /// \param op2 is the other function prototype to copy into \b this @@ -3095,6 +3096,19 @@ void FuncProto::setOutputLock(bool val) store->getOutput()->setTypeLock(val); } +/// This value can be used as a hint as to how much of the return value is important and +/// is used to inform the dead code \e consume algorithm. +/// \param val is the estimated number of bytes or 0 +/// \return \b true if the value was changed +bool FuncProto::setReturnBytesConsumed(int4 val) + +{ + int4 oldVal = returnBytesConsumed; + if (oldVal == 0 || val < oldVal) + returnBytesConsumed = val; + return (oldVal != val); +} + /// \brief Assuming \b this prototype is locked, calculate the \e extrapop /// /// If \e extrapop is unknown and \b this prototype is locked, try to directly @@ -3142,6 +3156,7 @@ void FuncProto::clearUnlockedOutput(void) } else store->clearOutput(); + returnBytesConsumed = 0; } void FuncProto::clearInput(void) @@ -4859,6 +4874,42 @@ void FuncCallSpecs::buildOutputFromTrials(Funcdata &data,vector &tria } } +/// \brief Get the estimated number of bytes within the given parameter that are consumed +/// +/// As a function is decompiled, there may hints about how many of the bytes, within the +/// storage location used to pass the parameter, are used by \b this sub-function. A non-zero +/// value means that that many least significant bytes of the storage location are used. A value +/// of zero means all bytes are presumed used. +/// \param slot is the slot of the given input parameter +/// \return the number of bytes used (or 0) +int4 FuncCallSpecs::getInputBytesConsumed(int4 slot) const + +{ + if (slot >= inputConsume.size()) + return 0; + return inputConsume[slot]; +} + +/// \brief Set the estimated number of bytes within the given parameter that are consumed +/// +/// This provides a hint to the dead code \e consume algorithm, while examining the calling +/// function, about how the given parameter within the subfunction is used. +/// A non-zero value means that that many least significant bytes of the storage location +/// are used. A value of zero means all bytes are presumed used. +/// \param slot is the slot of the given input parameter +/// \param val is the number of bytes consumed (or 0) +/// \return \b true if there was a change in the estimate +bool FuncCallSpecs::setInputBytesConsumed(int4 slot,int4 val) const + +{ + while(inputConsume.size() <= slot) + inputConsume.push_back(0); + int4 oldVal = inputConsume[slot]; + if (oldVal == 0 || val < oldVal) + inputConsume[slot] = val; + return (oldVal != val); +} + /// \brief Prepend any extra parameters if a paramshift is required void FuncCallSpecs::paramshiftModifyStart(void) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh index 460f652c90..ee746a3b00 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh @@ -1168,6 +1168,7 @@ class FuncProto { vector effectlist; ///< Side-effects associated with non-parameter storage locations vector likelytrash; ///< Locations that may contain \e trash values int4 injectid; ///< (If non-negative) id of p-code snippet that should replace this function + int4 returnBytesConsumed; ///< Number of bytes of return value that are consumed by callers (0 = all bytes) protected: void paramShift(int4 paramshift); ///< Add parameters to the front of the input parameter list bool isParamshiftApplied(void) const { return ((flags¶mshift_applied)!=0); } ///< Has a parameter shift been applied @@ -1220,6 +1221,14 @@ public: /// \return the id value corresponding to the specific call-fixup or -1 if there is no call-fixup int4 getInjectId(void) const { return injectid; } + /// \brief Get an estimate of the number of bytes consumed by callers of \b this prototype. + /// + /// A value of 0 means \e all possible bytes of the storage location are consumed. + /// \return the number of bytes or 0 + int4 getReturnBytesConsumed(void) const { return returnBytesConsumed; } + + bool setReturnBytesConsumed(int4 val); ///< Set the number of bytes consumed by callers of \b this + /// \brief Does a function with \b this prototype never return bool isNoReturn(void) const { return ((flags & no_return)!=0); } @@ -1436,6 +1445,7 @@ class FuncCallSpecs : public FuncProto { int4 matchCallCount; ///< Number of calls to this sub-function within the calling function ParamActive activeinput; ///< Info for recovering input parameters ParamActive activeoutput; ///< Info for recovering output parameters + mutable vector inputConsume; ///< Number of bytes consumed by sub-function, for each input parameter bool isinputactive; ///< Are we actively trying to recover input parameters bool isoutputactive; ///< Are we actively trying to recover output parameters bool isbadjumptable; ///< Was the call originally a jump-table we couldn't recover @@ -1496,6 +1506,8 @@ public: void checkOutputTrialUse(Funcdata &data,vector &trialvn); void buildInputFromTrials(Funcdata &data); void buildOutputFromTrials(Funcdata &data,vector &trialvn); + int4 getInputBytesConsumed(int4 slot) const; + bool setInputBytesConsumed(int4 slot,int4 val) const; void paramshiftModifyStart(void); bool paramshiftModifyStop(Funcdata &data); uint4 hasEffectTranslate(const Address &addr,int4 size) const; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc index c7d2300c13..a09574c95b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc @@ -279,8 +279,8 @@ Varnode *Heritage::normalizeWriteSize(Varnode *vn,const Address &addr,int4 size) pieceaddr = addr; else pieceaddr = addr + (overlap+vn->getSize()); - if (op->isCall() && callOpIndirectEffect(pieceaddr,mostsigsize,op)) { // Unless CALL definitely has no effect on piece - newop = fd->newIndirectCreation(op,pieceaddr,mostsigsize,false); // Don't create a new big read if write is from a CALL + if (op->isCall() && callOpIndirectEffect(pieceaddr,mostsigsize,op)) { // Does CALL have an effect on piece + newop = fd->newIndirectCreation(op,pieceaddr,mostsigsize,false); // Don't create a new big read if write is from a CALL mostvn = newop->getOut(); } else { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index f32a4b314f..1ac3a78556 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -8657,3 +8657,194 @@ Varnode *RulePopcountBoolXor::getBooleanResult(Varnode *vn,int4 bitPos,int4 &con } return (Varnode *)0; // Never reach here } + +/// \brief Return \b true if concatenating with a SUBPIECE of the given Varnode is unusual +/// +/// \param vn is the given Varnode +/// \param data is the function containing the Varnode +/// \return \b true if the configuration is a pathology +bool RulePiecePathology::isPathology(Varnode *vn,Funcdata &data) + +{ + vector worklist; + int4 pos = 0; + int4 slot = 0; + bool res = false; + for(;;) { + if (vn->isInput() && !vn->isPersist()) { + res = true; + break; + } + PcodeOp *op = vn->getDef(); + while(!res && op != (PcodeOp *)0) { + switch(op->code()) { + case CPUI_COPY: + vn = op->getIn(0); + op = vn->getDef(); + break; + case CPUI_MULTIEQUAL: + if (!op->isMark()) { + op->setMark(); + worklist.push_back(op); + } + op = (PcodeOp *)0; + break; + case CPUI_INDIRECT: + if (op->getIn(1)->getSpace()->getType() == IPTR_IOP) { + PcodeOp *callOp = PcodeOp::getOpFromConst(op->getIn(1)->getAddr()); + if (callOp->isCall()) { + FuncCallSpecs *fspec = data.getCallSpecs(callOp); + if (fspec != (FuncCallSpecs *) 0 && !fspec->isOutputActive()) { + res = true; + } + } + } + op = (PcodeOp *)0; + break; + case CPUI_CALL: + case CPUI_CALLIND: + { + FuncCallSpecs *fspec = data.getCallSpecs(op); + if (fspec != (FuncCallSpecs *)0 && !fspec->isOutputActive()) { + res = true; + } + break; + } + default: + op = (PcodeOp *)0; + break; + } + } + if (res) break; + if (pos >= worklist.size()) break; + op = worklist[pos]; + if (slot < op->numInput()) { + vn = op->getIn(slot); + slot += 1; + } + else { + pos += 1; + if (pos >= worklist.size()) break; + vn = worklist[pos]->getIn(0); + slot = 1; + } + } + for(int4 i=0;iclearMark(); + return res; +} + +/// \brief Given a known pathological concatenation, trace it forward to CALLs and RETURNs +/// +/// If the pathology reaches a CALL or RETURN, it is noted, through the FuncProto or FuncCallSpecs +/// object, that the parameter or return value is only partially consumed. The subvariable flow +/// rules can then decide whether or not to truncate this part of the data-flow. +/// \param op is CPUI_PIECE op that is the pathological concatenation +/// \param data is the function containing the data-flow +/// \return a non-zero value if new bytes are labeled as unconsumed +int4 RulePiecePathology::tracePathologyForward(PcodeOp *op,Funcdata &data) + +{ + int4 count = 0; + const FuncCallSpecs *fProto; + vector worklist; + int4 pos = 0; + op->setMark(); + worklist.push_back(op); + while(pos < worklist.size()) { + PcodeOp *curOp = worklist[pos]; + pos += 1; + Varnode *outVn = curOp->getOut(); + list::const_iterator iter; + list::const_iterator enditer = outVn->endDescend(); + for(iter=outVn->beginDescend();iter!=enditer;++iter) { + curOp = *iter; + switch(curOp->code()) { + case CPUI_COPY: + case CPUI_INDIRECT: + case CPUI_MULTIEQUAL: + if (!curOp->isMark()) { + curOp->setMark(); + worklist.push_back(curOp); + } + break; + case CPUI_CALL: + case CPUI_CALLIND: + fProto = data.getCallSpecs(curOp); + if (fProto != (FuncProto *)0 && !fProto->isInputActive() && !fProto->isInputLocked()) { + int4 bytesConsumed = op->getIn(1)->getSize(); + for(int4 i=1;inumInput();++i) { + if (curOp->getIn(i) == outVn) { + if (fProto->setInputBytesConsumed(i, bytesConsumed)) + count += 1; + } + } + } + break; + case CPUI_RETURN: + if (!data.getFuncProto().isOutputLocked()) { + if (data.getFuncProto().setReturnBytesConsumed(op->getIn(1)->getSize())) + count += 1; + } + break; + default: + break; + } + } + } + for(int4 i=0;iclearMark(); + return count; +} + +/// \class RulePiecePathology +/// \brief Search for concatenations with unlikely things to inform return/parameter consumption calculation +/// +/// For that can read/write part of a general purpose register, a small return value can get concatenated +/// with unrelated data when the function writes directly to part of the return register. This searches +/// for a characteristic pathology: +/// \code +/// retreg = CALL(); +/// ... +/// retreg = CONCAT(SUBPIECE(retreg,#4),smallval); +/// \endcode +void RulePiecePathology::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_PIECE); +} + +int4 RulePiecePathology::applyOp(PcodeOp *op,Funcdata &data) + +{ + Varnode *vn = op->getIn(0); + if (!vn->isWritten()) return 0; + PcodeOp *subOp = vn->getDef(); + + // Make sure we are concatenating the most significant bytes of a truncation + OpCode opc = subOp->code(); + if (opc == CPUI_SUBPIECE) { + if (subOp->getIn(1)->getOffset() == 0) return 0; + if (!isPathology(subOp->getIn(0),data)) return 0; + } + else if (opc == CPUI_INDIRECT) { + if (!subOp->isIndirectCreation()) return 0; + Varnode *retVn = op->getIn(1); + if (!retVn->isWritten()) return 0; + PcodeOp *callOp = retVn->getDef(); + if (!callOp->isCall()) return 0; + FuncCallSpecs *fc = data.getCallSpecs(callOp); + if (fc == (FuncCallSpecs *)0) return 0; + if (!fc->isOutputLocked()) return 0; + Address addr = retVn->getAddr(); + if (addr.getSpace()->isBigEndian()) + addr = addr - vn->getSize(); + else + addr = addr + retVn->getSize(); + if (addr != vn->getAddr()) return 0; + } + else + return 0; + return tracePathologyForward(op, data); +} + diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh index 7a12b0cbc3..da2689a4fc 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh @@ -1432,4 +1432,16 @@ public: static Varnode *getBooleanResult(Varnode *vn,int4 bitPos,int4 &constRes); }; +class RulePiecePathology : public Rule { + static bool isPathology(Varnode *vn,Funcdata &data); + static int4 tracePathologyForward(PcodeOp *op,Funcdata &data); +public: + RulePiecePathology(const string &g) : Rule( g, 0, "piecepathology") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RulePiecePathology(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; #endif