From 216725f4cd708f82bedf5a8565c9e425db91b2c6 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Tue, 22 Oct 2019 14:08:57 -0400 Subject: [PATCH 01/10] subvariableflow modifications --- .../Decompiler/src/decompile/cpp/subflow.cc | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc index 477f6f46a0..ce47c37d2b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc @@ -367,7 +367,7 @@ bool SubvariableFlow::traceForward(ReplaceVarnode *rvn) break; } // Is the small variable getting zero padded into something that is fully consumed - if ((!aggressive)&&(calc_mask(outvn->getSize()) == outvn->getConsume())) { + if ((!aggressive)&&((outvn->getConsume() & rvn->mask) != outvn->getConsume())) { addSuggestedPatch(rvn,op,-1); hcount += 1; // Dealt with this descendant break; @@ -1310,18 +1310,25 @@ void SubvariableFlow::doReplacement(void) // where all the remaining bits are zero int4 sa = (*piter).slot; vector invec; + Varnode *inVn = getReplaceVarnode((*piter).in1); + int4 outSize = pullop->getOut()->getSize(); if (sa == 0) { - invec.push_back( getReplaceVarnode((*piter).in1) ); - fd->opSetOpcode( pullop, CPUI_INT_ZEXT ); + invec.push_back( inVn ); + OpCode opc = (inVn->getSize() == outSize) ? CPUI_COPY : CPUI_INT_ZEXT; + fd->opSetOpcode( pullop, opc ); fd->opSetAllInput(pullop,invec); } else { - PcodeOp *zextop = fd->newOp(1,pullop->getAddr()); - fd->opSetOpcode( zextop, CPUI_INT_ZEXT ); - Varnode *zextout = fd->newUniqueOut(pullop->getOut()->getSize(),zextop); - fd->opSetInput(zextop,getReplaceVarnode((*piter).in1),0); - fd->opInsertBefore(zextop,pullop); - invec.push_back(zextout); + if (inVn->getSize() != outSize) { + PcodeOp *zextop = fd->newOp(1,pullop->getAddr()); + fd->opSetOpcode( zextop, CPUI_INT_ZEXT ); + Varnode *zextout = fd->newUniqueOut(outSize,zextop); + fd->opSetInput(zextop,inVn,0); + fd->opInsertBefore(zextop,pullop); + invec.push_back(zextout); + } + else + invec.push_back(inVn); invec.push_back(fd->newConstant(4,sa)); fd->opSetAllInput(pullop,invec); fd->opSetOpcode( pullop, CPUI_INT_LEFT); From e014cfb3a5697afb7dffb43422bb5fcb345151a9 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Tue, 22 Oct 2019 15:21:29 -0400 Subject: [PATCH 02/10] another subvariableflow modification --- Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index b479dc95d5..5ea9bbff05 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -7230,7 +7230,7 @@ int4 RuleSubvarSubpiece::applyOp(PcodeOp *op,Funcdata &data) mask <<= 8*((int4)op->getIn(1)->getOffset()); bool aggressive = outvn->isPtrFlow(); if (!aggressive) { - if (mask != vn->getConsume()) return 0; + if ((vn->getConsume() & mask) != vn->getConsume()) return 0; if (op->getOut()->hasNoDescend()) return 0; } SubvariableFlow subflow(&data,vn,mask,aggressive,false); From 165377aad78d528c277e21db51d1b384344a0c4e Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Tue, 22 Oct 2019 15:32:24 -0400 Subject: [PATCH 03/10] modification to consume propagation --- .../src/decompile/cpp/coreaction.cc | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 2907863e83..19ea6232c7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -2877,14 +2877,24 @@ void ActionDeadCode::propagateConsumed(vector &worklist) switch(op->code()) { case CPUI_INT_MULT: + b = coveringmask(outc); + if (op->getIn(1)->isConstant()) { + int4 leastSet = leastsigbit_set(op->getIn(1)->getOffset()); + if (leastSet >= 0) { + a = calc_mask(vn->getSize()) >> leastSet; + a &= b; + } + else + a = 0; + } + else + a = b; + pushConsumed(a,op->getIn(0),worklist); + pushConsumed(b,op->getIn(1),worklist); + break; case CPUI_INT_ADD: case CPUI_INT_SUB: - a = outc | (outc>>1); // Make sure all 1 bits below - a = a | (a>>2); // highest 1 bit are set - a = a | (a>>4); - a = a | (a>>8); - a = a | (a>>16); - a = a | (a>>32); + a = coveringmask(outc); // Make sure value is filled out as a contiguous mask pushConsumed(a,op->getIn(0),worklist); pushConsumed(a,op->getIn(1),worklist); break; From 992d32dd87f216c1dcaf7f267627189a1cba4a84 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Wed, 23 Oct 2019 16:38:35 -0400 Subject: [PATCH 04/10] added RulePositiveDiv --- .../src/decompile/cpp/coreaction.cc | 1 + .../src/decompile/cpp/ruleaction.cc | 27 +++++++++++++++++++ .../src/decompile/cpp/ruleaction.hh | 11 ++++++++ 3 files changed, 39 insertions(+) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 19ea6232c7..3131a73e4a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -4542,6 +4542,7 @@ void universal_action(Architecture *conf) actprop->addRule( new RuleHumptyOr("analysis") ); actprop->addRule( new RuleNegateIdentity("analysis") ); actprop->addRule( new RuleSubNormal("analysis") ); + actprop->addRule( new RulePositiveDiv("analysis") ); actprop->addRule( new RuleDivTermAdd("analysis") ); actprop->addRule( new RuleDivTermAdd2("analysis") ); actprop->addRule( new RuleDivOpt("analysis") ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 5ea9bbff05..9cd6697cc5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -6448,6 +6448,33 @@ int4 RuleSubNormal::applyOp(PcodeOp *op,Funcdata &data) return 1; } +/// \class RulePositiveDiv +/// \brief Signed division of positive values is unsigned division +/// +/// If the sign bit of both the numerator and denominator of a signed division (or remainder) +/// are zero, then convert to the unsigned form of the operation. +void RulePositiveDiv::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_INT_SDIV); + oplist.push_back(CPUI_INT_SREM); +} + +int4 RulePositiveDiv::applyOp(PcodeOp *op,Funcdata &data) + +{ + int4 sa = op->getOut()->getSize(); + if (sa > sizeof(uintb)) return 0; + sa = sa * 8 - 1; + if (((op->getIn(0)->getNZMask() >> sa) & 1) != 0) + return 0; // Input 0 may be negative + if (((op->getIn(1)->getNZMask() >> sa) & 1) != 0) + return 0; // Input 1 may be negative + OpCode opc = (op->code() == CPUI_INT_SDIV) ? CPUI_INT_DIV : CPUI_INT_REM; + data.opSetOpcode(op, opc); + return 1; +} + /// \class RuleDivTermAdd /// \brief Simplify expressions associated with optimized division expressions /// diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh index 004786a369..fe6b160a11 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh @@ -1115,6 +1115,17 @@ public: // virtual int4 applyOp(PcodeOp *op,Funcdata &data); // }; +class RulePositiveDiv : public Rule { +public: + RulePositiveDiv(const string &g) : Rule( g, 0, "positivediv") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RulePositiveDiv(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + class RuleDivTermAdd : public Rule { public: RuleDivTermAdd(const string &g) : Rule( g, 0, "divtermadd") {} ///< Constructor From 40d840085f1e15c272ae550624ae4bfe3127a9c0 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Tue, 29 Oct 2019 09:27:42 -0400 Subject: [PATCH 05/10] added RuleOrConsume --- .../src/decompile/cpp/coreaction.cc | 1 + .../src/decompile/cpp/ruleaction.cc | 29 +++++++++++++++++++ .../src/decompile/cpp/ruleaction.hh | 10 +++++++ 3 files changed, 40 insertions(+) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 3131a73e4a..428ac0763c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -4482,6 +4482,7 @@ void universal_action(Architecture *conf) actprop->addRule( new RuleIdentityEl("analysis") ); actprop->addRule( new RuleOrMask("analysis") ); actprop->addRule( new RuleAndMask("analysis") ); + actprop->addRule( new RuleOrConsume("analysis") ); actprop->addRule( new RuleOrCollapse("analysis") ); actprop->addRule( new RuleAndOrLump("analysis") ); actprop->addRule( new RuleShiftBitops("analysis") ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 9cd6697cc5..39c0d3c937 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -390,6 +390,35 @@ int4 RuleAndMask::applyOp(PcodeOp *op,Funcdata &data) return 1; } +/// \class RuleOrConsume +/// \brief Simply OR with unconsumed input: `V = A | B => V = B if nzm(A) & consume(V) == 0 +void RuleOrConsume::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_INT_OR); + oplist.push_back(CPUI_INT_XOR); +} + +int4 RuleOrConsume::applyOp(PcodeOp *op,Funcdata &data) + +{ + Varnode *outvn = op->getOut(); + int4 size = outvn->getSize(); + if (size > sizeof(uintb)) return 0; // FIXME: uintb should be arbitrary precision + uintb consume = outvn->getConsume(); + if ((consume & op->getIn(0)->getNZMask()) == 0) { + data.opRemoveInput(op,0); + data.opSetOpcode(op, CPUI_COPY); + return 1; + } + else if ((consume & op->getIn(1)->getNZMask()) == 0) { + data.opRemoveInput(op,1); + data.opSetOpcode(op, CPUI_COPY); + return 1; + } + return 0; +} + /// \class RuleOrCollapse /// \brief Collapse unnecessary INT_OR /// diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh index fe6b160a11..29113da32d 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh @@ -128,6 +128,16 @@ public: virtual void getOpList(vector &oplist) const; virtual int4 applyOp(PcodeOp *op,Funcdata &data); }; +class RuleOrConsume : public Rule { +public: + RuleOrConsume(const string &g) : Rule(g, 0, "orconsume") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RuleOrConsume(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; class RuleOrCollapse : public Rule { public: RuleOrCollapse(const string &g) : Rule(g, 0, "orcollapse") {} ///< Constructor From 5fbbef38ac188b1627cb8c23137405badb38076e Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Thu, 31 Oct 2019 12:20:45 -0400 Subject: [PATCH 06/10] MapState reconcileDatatypes --- .../Decompiler/src/decompile/cpp/varmap.cc | 72 +++++++++++++++---- .../Decompiler/src/decompile/cpp/varmap.hh | 5 +- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc index 422197df71..a7925b4762 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc @@ -273,23 +273,26 @@ bool RangeHint::merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory) return overlapProblems; } -/// Order the two ranges by the signed version of their offset, then by size, -/// then by data-type -/// \param a is the first range to compare -/// \param b is the second range -/// \return \b true if the first range is ordered before the second -bool RangeHint::compareRanges(const RangeHint *a,const RangeHint *b) +/// Compare (signed) offset, size, RangeType, type lock, and high index, in that order. +/// Datatype is \e not compared. +/// \param op2 is the other RangeHint to compare with \b this +/// \return -1, 0, or 1 depending on if \b this comes before, is equal to, or comes after +int4 RangeHint::compare(const RangeHint &op2) const { - if (a->sstart != b->sstart) - return (a->sstart < b->sstart); - if (a->size != b->size) - return (a->size < b->size); // Small sizes come first - type_metatype ameta = a->type->getMetatype(); - type_metatype bmeta = b->type->getMetatype(); - if (ameta != bmeta) - return (ameta < bmeta); // Order more specific types first - return false; //comp(x, x) must be false for strict weak ordering + if (sstart != op2.sstart) + return (sstart < op2.sstart) ? -1 : 1; + if (size != op2.size) + return (size < op2.size) ? -1 : 1; // Small sizes come first + if (rangeType != op2.rangeType) + return (rangeType < op2.rangeType) ? -1 : 1; + uint4 thisLock = flags & Varnode::typelock; + uint4 op2Lock = op2.flags & Varnode::typelock; + if (thisLock != op2Lock) + return (thisLock < op2Lock) ? -1 : 1; + if (highind != op2.highind) + return (highind < op2.highind) ? -1 : 1; + return 0; } /// \param spc is the (stack) address space associated with this function's local variables @@ -799,6 +802,44 @@ void MapState::addRange(uintb st,Datatype *ct,uint4 fl,RangeHint::RangeType rt,i #endif } +/// Assuming a sorted list, from among a sequence of RangeHints with the same start and size, select +/// the most specific data-type. Set all elements to use this data-type, and eliminate duplicates. +void MapState::reconcileDatatypes(void) + +{ + vector newList; + newList.reserve(maplist.size()); + int4 startPos = 0; + RangeHint *startHint = maplist[0]; + Datatype *startDatatype = startHint->type; + newList.push_back(startHint); + int4 curPos = 1; + while(curPos < maplist.size()) { + RangeHint *curHint = maplist[curPos++]; + if (curHint->start == startHint->start && curHint->size == startHint->size) { + Datatype *curDatatype = curHint->type; + if (curDatatype->typeOrder(*startDatatype) < 0) // Take the most specific variant of data-type + startDatatype = curDatatype; + if (curHint->compare(*startHint) != 0) + newList.push_back(curHint); // Keep the current hint if it is otherwise different + } + else { + while(startPos < newList.size()) { + newList[startPos]->type = startDatatype; + startPos += 1; + } + startHint = curHint; + startDatatype = startHint->type; + newList.push_back(startHint); + } + } + while(startPos < newList.size()) { + newList[startPos]->type = startDatatype; + startPos += 1; + } + maplist.swap(newList); +} + /// The given LoadGuard, which may be a LOAD or STORE is converted into an appropriate /// RangeHint, attempting to make use of any data-type or index information. /// \param guard is the given LoadGuard @@ -879,6 +920,7 @@ bool MapState::initialize(void) maplist.push_back(range); stable_sort(maplist.begin(),maplist.end(),RangeHint::compareRanges); + reconcileDatatypes(); iter = maplist.begin(); return true; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.hh index 996baadf35..99ab76a80b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.hh @@ -66,6 +66,7 @@ public: /// where the data-type starts, what data-type it might be, and how far it extends /// from the start point (possibly as an array). class RangeHint { + friend class MapState; friend class ScopeLocal; public: /// \brief The basic categorization of the range @@ -91,7 +92,8 @@ public: bool preferred(const RangeHint *b,bool reconcile) const; bool absorb(RangeHint *b); ///< Try to absorb the other RangeHint into \b this bool merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory); ///< Try to form the union of \b this with another RangeHint - static bool compareRanges(const RangeHint *a,const RangeHint *b); ///< Compare to RangeHint pointers + int4 compare(const RangeHint &op2) const; ///< Order \b this with another RangeHint + static bool compareRanges(const RangeHint *a,const RangeHint *b) { return (a->compare(*b) < 0); } ///< Compare two RangeHint pointers }; class ProtoModel; @@ -148,6 +150,7 @@ class MapState { AliasChecker checker; ///< A collection of pointer Varnodes into our address space void addGuard(const LoadGuard &guard,OpCode opc,TypeFactory *typeFactory); ///< Add LoadGuard record as a hint to the collection void addRange(uintb st,Datatype *ct,uint4 fl,RangeHint::RangeType rt,int4 hi); ///< Add a hint to the collection + void reconcileDatatypes(void); ///< Decide on data-type for RangeHints at the same address public: #ifdef OPACTION_DEBUG mutable bool debugon; From cbbfc9ca18cab664dc74126389f3e01d51f5a205 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Fri, 1 Nov 2019 10:12:31 -0400 Subject: [PATCH 07/10] Performing ActionSetCasts much later --- .../Decompiler/src/decompile/cpp/coreaction.cc | 2 +- .../Decompiler/src/decompile/cpp/varmap.cc | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 428ac0763c..82c7544253 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -4651,12 +4651,12 @@ void universal_action(Architecture *conf) act->addAction( new ActionHideShadow("merge") ); act->addAction( new ActionCopyMarker("merge") ); act->addAction( new ActionOutputPrototype("localrecovery") ); - act->addAction( new ActionSetCasts("casts") ); act->addAction( new ActionInputPrototype("fixateproto") ); act->addAction( new ActionRestructureHigh("localrecovery") ); act->addAction( new ActionMapGlobals("fixateglobals") ); act->addAction( new ActionDynamicSymbols("dynamic") ); act->addAction( new ActionNameVars("merge") ); + act->addAction( new ActionSetCasts("casts") ); act->addAction( new ActionFinalStructure("blockrecovery") ); act->addAction( new ActionPrototypeWarnings("protorecovery") ); act->addAction( new ActionStop("base") ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc index a7925b4762..c770c09921 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varmap.cc @@ -666,8 +666,21 @@ void AliasChecker::gatherAdditiveBase(Varnode *startvn,vector &addbase) vnqueue.push_back(AddBase(subvn,indexvn)); } break; - case CPUI_INT_ADD: case CPUI_INT_SUB: + if (vn == op->getIn(1)) { // Subtracting the pointer + nonadduse = true; + break; + } + othervn = op->getIn(1); + if (!othervn->isConstant()) + indexvn = othervn; + subvn = op->getOut(); + if (!subvn->isMark()) { + subvn->setMark(); + vnqueue.push_back(AddBase(subvn,indexvn)); + } + break; + case CPUI_INT_ADD: case CPUI_PTRADD: othervn = op->getIn(1); // Check if something else is being added in besides a constant if (othervn == vn) @@ -820,7 +833,7 @@ void MapState::reconcileDatatypes(void) Datatype *curDatatype = curHint->type; if (curDatatype->typeOrder(*startDatatype) < 0) // Take the most specific variant of data-type startDatatype = curDatatype; - if (curHint->compare(*startHint) != 0) + if (curHint->compare(*newList.back()) != 0) newList.push_back(curHint); // Keep the current hint if it is otherwise different } else { From cfc1177ac1219857cf3b4f17210b80139d77705a Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Fri, 1 Nov 2019 12:29:47 -0400 Subject: [PATCH 08/10] finalizing HighVariable from symbols --- .../Decompiler/src/decompile/cpp/coreaction.cc | 4 ++-- .../Decompiler/src/decompile/cpp/funcdata.hh | 4 ++-- .../src/decompile/cpp/funcdata_varnode.cc | 17 ++++++++++------- .../Decompiler/src/decompile/cpp/variable.cc | 13 ++++++++++++- .../Decompiler/src/decompile/cpp/variable.hh | 6 ++++-- 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index 82c7544253..c34ce04ff7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -1874,7 +1874,7 @@ int4 ActionRestructureVarnode::apply(Funcdata &data) bool aliasyes = data.isJumptableRecoveryOn() ? false : (numpass != 0); l1->restructureVarnode(aliasyes); // Note the alias calculation, may not be very good on the first pass - if (data.updateFlags(l1,false)) + if (data.syncVarnodesWithSymbols(l1,false)) count += 1; numpass += 1; @@ -1899,7 +1899,7 @@ int4 ActionRestructureHigh::apply(Funcdata &data) #endif l1->restructureHigh(); - if (data.updateFlags(l1,true)) + if (data.syncVarnodesWithSymbols(l1,true)) count += 1; #ifdef OPACTION_DEBUG diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index 9ced5d3f61..617c7e7dfe 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -84,7 +84,7 @@ class Funcdata { // Low level Varnode functions void setVarnodeProperties(Varnode *vn) const; ///< Look-up boolean properties and data-type information HighVariable *assignHigh(Varnode *vn); ///< Assign a new HighVariable to a Varnode - bool updateFlags(VarnodeLocSet::const_iterator &iter,uint4 flags,Datatype *ct); + bool syncVarnodesWithSymbol(VarnodeLocSet::const_iterator &iter,uint4 flags,Datatype *ct); bool descend2Undef(Varnode *vn); ///< Transform all reads of the given Varnode to a special \b undefined constant void splitUses(Varnode *vn); ///< Make all reads of the given Varnode unique @@ -351,7 +351,7 @@ public: bool checkCallDoubleUse(const PcodeOp *opmatch,const PcodeOp *op,const Varnode *vn,const ParamTrial &trial) const; bool onlyOpUse(const Varnode *invn,const PcodeOp *opmatch,const ParamTrial &trial) const; bool ancestorOpUse(int4 maxlevel,const Varnode *invn,const PcodeOp *op,ParamTrial &trial) const; - bool updateFlags(const ScopeLocal *lm,bool typesyes); + bool syncVarnodesWithSymbols(const ScopeLocal *lm,bool typesyes); void splitVarnode(Varnode *vn,int4 lowsize,Varnode *&vnlo,Varnode *& vnhi); bool fillinReadOnly(Varnode *vn); ///< Replace the given Varnode with its (constant) value in the load image bool replaceVolatile(Varnode *vn); ///< Replace accesses of the given Varnode with \e volatile operations diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index bce59397ab..372952954a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -803,15 +803,16 @@ void Funcdata::calcNZMask(void) } } -/// \brief Update Varnode boolean properties based on (new) Symbol information +/// \brief Update Varnode properties based on (new) Symbol information /// /// Boolean properties \b addrtied, \b addrforce, \b auto_live, and \b nolocalalias /// for Varnodes are updated based on new Symbol information they map to. -/// The caller can elect to update data-type information as well. +/// The caller can elect to update data-type information as well, where Varnodes +/// and their associated HighVariables have their data-type finalized based symbols. /// \param lm is the Symbol scope within which to search for mapped Varnodes /// \param typesyes is \b true if the caller wants to update data-types /// \return \b true if any Varnode was updated -bool Funcdata::updateFlags(const ScopeLocal *lm,bool typesyes) +bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool typesyes) { bool updateoccurred = false; @@ -859,13 +860,13 @@ bool Funcdata::updateFlags(const ScopeLocal *lm,bool typesyes) else flags = 0; } - if (updateFlags(iter,flags,ct)) + if (syncVarnodesWithSymbol(iter,flags,ct)) updateoccurred = true; } return updateoccurred; } -/// \brief Update boolean properties (and the data-type) for a set of Varnodes +/// \brief Update properties (and the data-type) for a set of Varnodes associated with one Symbol /// /// The set of Varnodes with the same size and address all have their boolean properties /// updated to the given values. The set is specified by providing an iterator reference @@ -882,7 +883,7 @@ bool Funcdata::updateFlags(const ScopeLocal *lm,bool typesyes) /// \param flags holds the new set of boolean properties /// \param ct is the given data-type to set (or NULL) /// \return \b true if at least one Varnode was modified -bool Funcdata::updateFlags(VarnodeLocSet::const_iterator &iter,uint4 flags,Datatype *ct) +bool Funcdata::syncVarnodesWithSymbol(VarnodeLocSet::const_iterator &iter,uint4 flags,Datatype *ct) { VarnodeLocSet::const_iterator enditer; @@ -914,9 +915,11 @@ bool Funcdata::updateFlags(VarnodeLocSet::const_iterator &iter,uint4 flags,Datat vn->setFlags(flags); vn->clearFlags((~flags)&mask); } - if (ct != (Datatype *)0) + if (ct != (Datatype *)0) { if (vn->updateType(ct,false,false)) updateoccurred = true; + vn->getHigh()->finalizeDatatype(ct); // Permanently set the data-type on the HighVariable + } } while(iter != enditer); return updateoccurred; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc index 434b88cca1..42a7cf13da 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc @@ -99,6 +99,8 @@ void HighVariable::updateType(void) const Varnode *vn; if ((highflags&HighVariable::typedirty)==0) return; // Type is up to date + highflags &= ~HighVariable::typedirty; // Mark type as clean + if ((highflags & type_finalized)!=0) return; // Type has been finalized vn = getTypeRepresentative(); type = vn->getType(); @@ -106,7 +108,6 @@ void HighVariable::updateType(void) const flags &= ~Varnode::typelock; if (vn->isTypeLock()) flags |= Varnode::typelock; - highflags &= ~HighVariable::typedirty; // Mark type as clean } /// Compare two Varnode objects based just on their storage address @@ -198,6 +199,16 @@ void HighVariable::remove(Varnode *vn) } } +/// The data-type its dirtying mechanism is disabled. The data-type will not change, unless +/// this method is called again. +/// \param tp is the data-type to set +void HighVariable::finalizeDatatype(Datatype *tp) + +{ + type = tp; + highflags |= type_finalized; +} + /// The lists of members are merged and the other HighVariable is deleted. /// \param tv2 is the other HighVariable to merge into \b this /// \param isspeculative is \b true to keep the new members in separate \e merge classes diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh index edb3a23508..07f8f3332e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh @@ -47,7 +47,8 @@ public: typedirty = 2, ///< The data-type for the HighVariable is dirty coverdirty = 4, ///< The cover for the HighVariable is dirty copy_in1 = 8, ///< There exists at least 1 COPY into \b this HighVariable from other HighVariables - copy_in2 = 16 ///< There exists at least 2 COPYs into \b this HighVariable from other HighVariables + copy_in2 = 16, ///< There exists at least 2 COPYs into \b this HighVariable from other HighVariables + type_finalized = 32 ///< Set if a final data-type is locked in and dirtying is disabled }; private: friend class Merge; @@ -68,6 +69,7 @@ private: void clearCopyIns(void) const { highflags &= ~(copy_in1 | copy_in2); } ///< Clear marks indicating COPYs into \b this bool hasCopyIn1(void) const { return ((highflags©_in1)!=0); } ///< Is there at least one COPY into \b this bool hasCopyIn2(void) const { return ((highflags©_in2)!=0); } ///< Is there at least two COPYs into \b this + void merge(HighVariable *tv2,bool isspeculative); ///< Merge another HighVariable into \b this public: HighVariable(Varnode *vn); ///< Construct a HighVariable with a single member Varnode Datatype *getType(void) const { updateType(); return type; } ///< Get the data-type @@ -90,6 +92,7 @@ public: void coverDirty(void) const { highflags |= HighVariable::coverdirty; } ///< Mark the cover as \e dirty void typeDirty(void) const { highflags |= HighVariable::typedirty; } ///< Mark the data-type as \e dirty void remove(Varnode *vn); ///< Remove a member Varnode from \b this + void finalizeDatatype(Datatype *tp); ///< Set a final datatype for \b this variable /// \brief Print details of the cover for \b this (for debug purposes) /// @@ -102,7 +105,6 @@ public: Varnode *getInputVarnode(void) const; ///< Find (the) input member Varnode Varnode *getTypeRepresentative(void) const; ///< Get a member Varnode with the strongest data-type Varnode *getNameRepresentative(void) const; ///< Get a member Varnode that dictates the naming of \b this HighVariable - void merge(HighVariable *tv2,bool isspeculative); ///< Merge another HighVariable into \b this bool isMapped(void) const { updateFlags(); return ((flags&Varnode::mapped)!=0); } ///< Return \b true if \b this is mapped bool isPersist(void) const { updateFlags(); return ((flags&Varnode::persist)!=0); } ///< Return \b true if \b this is a global variable bool isAddrTied(void) const { updateFlags(); return ((flags&Varnode::addrtied)!=0); } ///< Return \b true if \b this is \e address \e ties From 9a3ab3863145ce835e77bef5e6e382bbed1ac84b Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Fri, 1 Nov 2019 16:02:36 -0400 Subject: [PATCH 09/10] PTRADD adjustments post ActionRestructureHigh --- .../src/decompile/cpp/coreaction.cc | 13 +++++-- .../Decompiler/src/decompile/cpp/funcdata.hh | 1 + .../src/decompile/cpp/funcdata_op.cc | 36 +++++++++++++++++++ .../src/decompile/cpp/ruleaction.cc | 21 ++--------- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index c34ce04ff7..8b6532a23f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -2059,17 +2059,24 @@ int4 ActionSetCasts::apply(Funcdata &data) for(iter=bb->beginOp();iter!=bb->endOp();++iter) { op = *iter; if (op->notPrinted()) continue; - if (op->code() == CPUI_CAST) continue; + OpCode opc = op->code(); + if (opc == CPUI_CAST) continue; + if (opc == CPUI_PTRADD) { // Check for PTRADD that no longer fits its pointer + int4 sz = (int4)op->getIn(2)->getOffset(); + TypePointer *ct = (TypePointer *)op->getIn(0)->getHigh()->getType(); + if ((ct->getMetatype() != TYPE_PTR)||(ct->getPtrTo()->getSize() != AddrSpace::addressToByteInt(sz, ct->getWordSize()))) + data.opUndoPtradd(op,true); + } for(int4 i=0;inumInput();++i) // Do input casts first, as output may depend on input count += castInput(op,i,data,castStrategy); - if (op->code() == CPUI_LOAD) { + if (opc == CPUI_LOAD) { TypePointer *ptrtype = (TypePointer *)op->getIn(1)->getHigh()->getType(); int4 valsize = op->getOut()->getSize(); if ((ptrtype->getMetatype()!=TYPE_PTR)|| (ptrtype->getPtrTo()->getSize() != valsize)) data.warning("Load size is inaccurate",op->getAddr()); } - else if (op->code() == CPUI_STORE) { + else if (opc == CPUI_STORE) { TypePointer *ptrtype = (TypePointer *)op->getIn(1)->getHigh()->getType(); int4 valsize = op->getIn(2)->getSize(); if ((ptrtype->getMetatype()!=TYPE_PTR)|| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index 617c7e7dfe..137b23c5e7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -428,6 +428,7 @@ public: Varnode *createStackRef(AddrSpace *spc,uintb off,PcodeOp *op,Varnode *stackptr,bool insertafter); Varnode *opStackLoad(AddrSpace *spc,uintb off,uint4 sz,PcodeOp *op,Varnode *stackptr,bool insertafter); PcodeOp *opStackStore(AddrSpace *spc,uintb off,PcodeOp *op,bool insertafter); + void opUndoPtradd(PcodeOp *op,bool finalize); ///< Convert a CPUI_PTRADD back into a CPUI_INT_ADD /// \brief Start of PcodeOp objects with the given op-code list::const_iterator beginOp(OpCode opc) const { return obank.begin(opc); } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc index 17b4a0322d..a4ec654f6b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc @@ -521,6 +521,42 @@ Varnode *Funcdata::opStackLoad(AddrSpace *spc,uintb off,uint4 sz,PcodeOp *op,Var return res; } +/// Convert the given CPUI_PTRADD into the equivalent CPUI_INT_ADD. This may involve inserting a +/// CPUI_INT_MULT PcodeOp. If finalization is requested and a new PcodeOp is needed, the output +/// Varnode is marked as \e implicit and has its data-type set +/// \param op is the given PTRADD +void Funcdata::opUndoPtradd(PcodeOp *op,bool finalize) + +{ + Varnode *multVn = op->getIn(2); + int4 multSize = multVn->getOffset(); // Size the PTRADD thinks we are pointing + + opRemoveInput(op,2); + opSetOpcode(op,CPUI_INT_ADD); + if (multSize == 1) return; // If no multiplier, we are done + Varnode *offVn = op->getIn(1); + if (offVn->isConstant()) { + uintb newVal = multSize * offVn->getOffset(); + newVal &= calc_mask(offVn->getSize()); + Varnode *newOffVn = newConstant(offVn->getSize(), newVal); + if (finalize) + newOffVn->updateType(offVn->getType(), false, false); + opSetInput(op,newOffVn,1); + return; + } + PcodeOp *multOp = newOp(2,op->getAddr()); + opSetOpcode(multOp,CPUI_INT_MULT); + Varnode *addVn = newUniqueOut(offVn->getSize(),multOp); + if (finalize) { + addVn->updateType(multVn->getType(), false, false); + addVn->setImplied(); + } + opSetInput(multOp,offVn,0); + opSetInput(multOp,multVn,1); + opSetInput(op,addVn,1); + opInsertBefore(multOp,op); +} + /// Make a clone of the given PcodeOp, copying control-flow properties as well. The data-type /// is \e not cloned. /// \param op is the PcodeOp to clone diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 39c0d3c937..5fbc713fb6 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -6105,14 +6105,11 @@ void RulePtraddUndo::getOpList(vector &oplist) const int4 RulePtraddUndo::applyOp(PcodeOp *op,Funcdata &data) { - int4 size; - Varnode *basevn,*offvn,*multvn,*addvn; - PcodeOp *multop; + Varnode *basevn; TypePointer *tp; if (!data.isTypeRecoveryOn()) return 0; - multvn = op->getIn(2); - size = multvn->getOffset(); // Size the PTRADD thinks we are pointing + int4 size = (int4)op->getIn(2)->getOffset(); // Size the PTRADD thinks we are pointing basevn = op->getIn(0); tp = (TypePointer *)basevn->getType(); if (tp->getMetatype() == TYPE_PTR) // Make sure we are still a pointer @@ -6122,19 +6119,7 @@ int4 RulePtraddUndo::applyOp(PcodeOp *op,Funcdata &data) return 0; } - // At this point we have a type mismatch to fix - data.opRemoveInput(op,2); - data.opSetOpcode(op,CPUI_INT_ADD); - if (size == 1) return 1; // If no multiplier, we are done - multop = data.newOp(2,op->getAddr()); - data.opSetOpcode(multop,CPUI_INT_MULT); - offvn = op->getIn(1); - addvn = data.newUniqueOut(offvn->getSize(),multop); - data.opSetInput(multop,offvn,0); - data.opSetInput(multop,multvn,1); - data.opSetInput(op,addvn,1); - data.opInsertBefore(multop,op); - + data.opUndoPtradd(op,false); return 1; } From a700c52275315cabf74002034909f83fa9788364 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Mon, 4 Nov 2019 11:15:52 -0500 Subject: [PATCH 10/10] Adjustments to STORE casting --- .../Decompiler/src/decompile/cpp/typeop.cc | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc index 0ceff89a11..3ca75d8eec 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc @@ -427,17 +427,37 @@ TypeOpStore::TypeOpStore(TypeFactory *t) : TypeOp(t,CPUI_STORE,"store") Datatype *TypeOpStore::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const { - if (slot!=1) return (Datatype *)0; - Datatype *reqtype = op->getIn(2)->getHigh()->getType(); // Cast storage pointer to match what's being stored - Datatype *curtype = op->getIn(1)->getHigh()->getType(); + if (slot==0) return (Datatype *)0; + const Varnode *pointerVn = op->getIn(1); + Datatype *pointerType = pointerVn->getHigh()->getType(); + Datatype *valueType = op->getIn(2)->getHigh()->getType(); AddrSpace *spc = Address::getSpaceFromConst(op->getIn(0)->getAddr()); - if (curtype->getMetatype() == TYPE_PTR) - curtype = ((TypePointer *)curtype)->getPtrTo(); + int4 destSize; + if (pointerType->getMetatype() == TYPE_PTR) { + pointerType = ((TypePointer *)pointerType)->getPtrTo(); + destSize = pointerType->getSize(); + } else - return tlst->getTypePointer(op->getIn(1)->getSize(),reqtype,spc->getWordSize()); - reqtype = castStrategy->castStandard(reqtype,curtype,false,true); - if (reqtype == (Datatype *)0) return reqtype; - return tlst->getTypePointer(op->getIn(1)->getSize(),reqtype,spc->getWordSize()); + destSize = -1; + if (destSize != valueType->getSize()) { + if (slot == 1) + return tlst->getTypePointer(pointerVn->getSize(),valueType,spc->getWordSize()); + else + return (Datatype *)0; + } + if (slot == 1) { + if (pointerVn->isWritten() && pointerVn->getDef()->code() == CPUI_CAST) { + if (pointerVn->isImplied() && pointerVn->loneDescend() == op) { + // CAST is already in place, test if it is casting to the right type + Datatype *newType = tlst->getTypePointer(pointerVn->getSize(), valueType, spc->getWordSize()); + if (pointerVn->getHigh()->getType() != newType) + return newType; + } + } + return (Datatype *)0; + } + // If we reach here, cast the value, not the pointer + return castStrategy->castStandard(pointerType,valueType,false,true); } void TypeOpStore::printRaw(ostream &s,const PcodeOp *op)