From 535ac7c08dce646ee6cb90568f1dae61703cea1e Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Thu, 27 Oct 2022 16:43:58 -0400 Subject: [PATCH] GP-2767 Structured pieces --- .../Decompiler/certification.manifest | 1 + .../src/decompile/cpp/architecture.cc | 2 +- .../src/decompile/cpp/coreaction.cc | 58 ++- .../src/decompile/cpp/coreaction.hh | 2 +- .../Decompiler/src/decompile/cpp/database.hh | 6 + .../Decompiler/src/decompile/cpp/fspec.hh | 3 +- .../Decompiler/src/decompile/cpp/funcdata.cc | 1 + .../Decompiler/src/decompile/cpp/funcdata.hh | 1 + .../src/decompile/cpp/funcdata_varnode.cc | 35 +- .../Decompiler/src/decompile/cpp/merge.cc | 139 ++++++-- .../Decompiler/src/decompile/cpp/merge.hh | 6 + .../Decompiler/src/decompile/cpp/op.cc | 84 +++++ .../Decompiler/src/decompile/cpp/op.hh | 29 +- .../Decompiler/src/decompile/cpp/printc.cc | 37 +- .../src/decompile/cpp/printlanguage.cc | 13 + .../src/decompile/cpp/printlanguage.hh | 1 + .../src/decompile/cpp/ruleaction.cc | 335 +++++++++++++----- .../src/decompile/cpp/ruleaction.hh | 27 +- .../Decompiler/src/decompile/cpp/type.cc | 29 +- .../Decompiler/src/decompile/cpp/type.hh | 18 +- .../Decompiler/src/decompile/cpp/typeop.cc | 38 +- .../Decompiler/src/decompile/cpp/typeop.hh | 1 - .../Decompiler/src/decompile/cpp/userop.hh | 1 + .../Decompiler/src/decompile/cpp/variable.cc | 46 ++- .../Decompiler/src/decompile/cpp/variable.hh | 3 + .../Decompiler/src/decompile/cpp/varnode.cc | 24 ++ .../Decompiler/src/decompile/cpp/varnode.hh | 7 +- .../src/decompile/datatests/concat.xml | 81 +++++ .../program/model/pcode/HighSymbol.java | 16 +- 29 files changed, 834 insertions(+), 210 deletions(-) create mode 100644 Ghidra/Features/Decompiler/src/decompile/datatests/concat.xml diff --git a/Ghidra/Features/Decompiler/certification.manifest b/Ghidra/Features/Decompiler/certification.manifest index 9547fd487f..8e20c08473 100644 --- a/Ghidra/Features/Decompiler/certification.manifest +++ b/Ghidra/Features/Decompiler/certification.manifest @@ -12,6 +12,7 @@ src/decompile/.project||GHIDRA||||END| src/decompile/cpp/.gitignore||GHIDRA||||END| src/decompile/cpp/Doxyfile||GHIDRA|||Most of this file is autogenerated by doxygen which falls under the GPL - output from GPL products are NOT GPL! - mjbell4|END| src/decompile/cpp/Makefile||GHIDRA||||END| +src/decompile/datatests/concat.xml||GHIDRA||||END| src/decompile/datatests/convert.xml||GHIDRA||||END| src/decompile/datatests/deadvolatile.xml||GHIDRA||||END| src/decompile/datatests/deindirect.xml||GHIDRA||||END| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc index 14c4c337c5..2bd660889c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc @@ -313,7 +313,7 @@ int4 Architecture::getMinimumLanedRegisterSize(void) const /// The default model is used whenever an explicit model is not known /// or can't be determined. -/// \param nm is the name of the model to set +/// \param model is the ProtoModel object to make the default void Architecture::setDefaultModel(ProtoModel *model) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index a4c6b71fc7..17d3e4b550 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -2830,7 +2830,7 @@ int4 ActionNameVars::apply(Funcdata &data) /// and that it needs special printing. /// \param vn is the given Varnode /// \param maxref is the maximum number of references to consider before forcing explicitness -/// \return -1 if given Varnode should be marked explicit, the number of descendants otherwise +/// \return -1 or -2 if given Varnode should be marked explicit, the number of descendants otherwise int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref) { @@ -2850,31 +2850,55 @@ int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref) if (def->code() == CPUI_SUBPIECE) { Varnode *vin = def->getIn(0); if (vin->isAddrTied()) { - if (vn->overlap(*vin) == def->getIn(1)->getOffset()) - return -1; // Should be explicit, will be a copymarker and not printed + if (vn->overlap(*vin) == def->getIn(1)->getOffset()) + return -1; // Should be explicit, will be a copymarker and not printed } } - // (Part of) an addrtied location into itself is hopefully implicit - bool shouldbeimplicit = true; - for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { - PcodeOp *op = *iter; - if ((op->code()!=CPUI_INT_ZEXT)&&(op->code()!=CPUI_PIECE)) { - shouldbeimplicit = false; - break; - } - Varnode *vnout = op->getOut(); - if ((!vnout->isAddrTied())||(0!=vnout->contains(*vn))) { - shouldbeimplicit = false; - break; + PcodeOp *useOp = vn->loneDescend(); + if (useOp == (PcodeOp *)0) return -1; + if (useOp->code() == CPUI_INT_ZEXT) { + Varnode *vnout = useOp->getOut(); + if ((!vnout->isAddrTied())||(0!=vnout->contains(*vn))) + return -1; + } + else if (useOp->code() == CPUI_PIECE) { + Varnode *rootVn = PieceNode::findRoot(vn); + if (vn == rootVn) return -1; + Datatype *ct = rootVn->getStructuredType(); + if (ct != (Datatype *)0) { + // Getting PIECEd into a structured thing. Unless vn is a leaf, it should be implicit + if (def->code() != CPUI_PIECE) return -1; + if (vn->loneDescend() == (PcodeOp *)0) return -1; + Varnode *vn0 = def->getIn(0); + Varnode *vn1 = def->getIn(1); + Address addr = vn->getAddr(); + if (!addr.getSpace()->isBigEndian()) + addr = addr + vn1->getSize(); + if (addr != vn0->getAddr()) return -1; + addr = vn->getAddr(); + if (addr.getSpace()->isBigEndian()) + addr = addr + vn0->getSize(); + if (addr != vn1->getAddr()) return -1; + // If we reach here vn is a non-leaf in a CONCAT tree and should be implicit } } - if (!shouldbeimplicit) return -1; + else { + return -1; + } } else if (vn->isMapped()) { // If NOT addrtied but is still mapped, there must be either a first use (register) mapping // or a dynamic mapping causing the bit to be set. In either case, it should probably be explicit return -1; } + else if (vn->isProtoPartial() && def->code() != CPUI_PIECE) { + // Varnode is part of structure. Write to structure should be an explicit statement + return -1; + } + else if (def->code() == CPUI_PIECE && def->getIn(0)->isProtoPartial() && !vn->isProtoPartial()) { + // The base of PIECE operations building a structure + return -1; + } if (vn->hasNoDescend()) return -1; // Must have at least one descendant if (def->code() == CPUI_PTRSUB) { // A dereference @@ -5069,7 +5093,6 @@ void ActionDatabase::universalAction(Architecture *conf) actprop->addRule( new RuleShiftAnd("analysis") ); actprop->addRule( new RuleConcatZero("analysis") ); actprop->addRule( new RuleConcatLeftShift("analysis") ); - actprop->addRule( new RuleEmbed("analysis") ); actprop->addRule( new RuleSubZext("analysis") ); actprop->addRule( new RuleSubCancel("analysis") ); actprop->addRule( new RuleShiftSub("analysis") ); @@ -5181,6 +5204,7 @@ void ActionDatabase::universalAction(Architecture *conf) actcleanup->addRule( new RuleSubRight("cleanup") ); actcleanup->addRule( new RulePtrsubCharConstant("cleanup") ); actcleanup->addRule( new RuleExtensionPush("cleanup") ); + actcleanup->addRule( new RulePieceStructure("cleanup") ); } act->addAction( actcleanup ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh index 2f4c949219..2b187e7347 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.hh @@ -364,7 +364,7 @@ public: return new ActionMergeRequired(getGroup()); } virtual int4 apply(Funcdata &data) { - data.getMerge().mergeAddrTied(); data.getMerge().mergeMarker(); return 0; } + data.getMerge().mergeAddrTied(); data.getMerge().groupPartials(); data.getMerge().mergeMarker(); return 0; } }; /// \brief Try to merge an op's input Varnode to its output, if they are at the same storage location. diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh index 67f42a2655..10eaee7ab5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh @@ -305,6 +305,12 @@ public: virtual void decode(Decoder &decoder); }; +/// \brief A Symbol that forces a particular \e union field at a particular point in the body of a function +/// +/// This is an internal Symbol that users can create if they want to force a particular interpretation of a +/// a \e union data-type. It attaches to data-flow via the DynamicHash mechanism, which also allows it to attach +/// to a specific read or write of the target Varnode. Different reads (or write) of the same Varnode can have +/// different symbols attached. The Symbol's associated data-type will be the desired \e union to force. class UnionFacetSymbol : public Symbol { int4 fieldNum; ///< Particular field to associate with Symbol access public: diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh index 08487fa4e1..88eb871cdb 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh @@ -963,7 +963,8 @@ public: class UnknownProtoModel : public ProtoModel { ProtoModel *placeholderModel; ///< The model whose behavior \b this adopts as a behavior placeholder public: - UnknownProtoModel(const string &nm,ProtoModel *placeHold) : ProtoModel(nm,*placeHold) { placeholderModel = placeHold; } + UnknownProtoModel(const string &nm,ProtoModel *placeHold) : ProtoModel(nm,*placeHold) { + placeholderModel = placeHold; } ///< Constructor ProtoModel *getPlaceholderModel(void) const { return placeholderModel; } ///< Retrieve the placeholder model virtual bool isUnknown(void) const { return true; } }; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc index 2412d35bba..24756f0b79 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc @@ -101,6 +101,7 @@ void Funcdata::clear(void) clearJumpTables(); // Do not clear overrides heritage.clear(); + covermerge.clear(); #ifdef OPACTION_DEBUG opactdbg_count = 0; #endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index ccdcd8ae38..24a7a634be 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -414,6 +414,7 @@ public: void clearDeadOps(void) { obank.destroyDead(); } ///< Delete any dead PcodeOps void remapVarnode(Varnode *vn,Symbol *sym,const Address &usepoint); void remapDynamicVarnode(Varnode *vn,Symbol *sym,const Address &usepoint,uint8 hash); + void linkProtoPartial(Varnode *vn); ///< Find or create Symbol and a partial mapping Symbol *linkSymbol(Varnode *vn); ///< Find or create Symbol associated with given Varnode Symbol *linkSymbolReference(Varnode *vn); ///< Discover and attach Symbol to a constant reference Varnode *findLinkedVarnode(SymbolEntry *entry) const; ///< Find a Varnode matching the given Symbol mapping diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index 63784131dd..0a641091b4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -813,7 +813,8 @@ void Funcdata::calcNZMask(void) /// The caller can elect to update data-type information as well, where Varnodes /// and their associated HighVariables have their data-type finalized based symbols. /// \param lm is the Symbol scope within which to search for mapped Varnodes -/// \param updataDatatypes is \b true if the caller wants to update data-types +/// \param updateDatatypes is \b true if the caller wants to update data-types +/// \param unmappedAliasCheck is \b true if an alias check should be performed on unmapped Varnodes /// \return \b true if any Varnode was updated bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool updateDatatypes,bool unmappedAliasCheck) @@ -883,16 +884,7 @@ Datatype *Funcdata::checkSymbolType(Varnode *vn) if (curType->getSize() == vn->getSize()) return (Datatype *)0; int4 curOff = (vn->getAddr().getOffset() - entry->getAddr().getOffset()) + entry->getOffset(); - // Drill down until we hit something that isn't a containing structure - while(curType != (Datatype *)0 && curType->getMetatype() == TYPE_STRUCT && curType->getSize() > vn->getSize()) { - uintb newOff; - curType = curType->getSubType(curOff, &newOff); - curOff = newOff; - } - if (curType == (Datatype *)0 || curType->getSize() <= vn->getSize() || curType->getMetatype() != TYPE_UNION) - return (Datatype *)0; - // If we hit a containing union - return glb->types->getTypePartialUnion((TypeUnion *)curType, curOff, vn->getSize()); + return glb->types->getExactPiece(curType, curOff, vn->getSize()); } /// A Varnode overlaps the given SymbolEntry. Make sure the Varnode is part of the variable @@ -1033,6 +1025,25 @@ void Funcdata::remapDynamicVarnode(Varnode *vn,Symbol *sym,const Address &usepoi vn->setSymbolEntry(entry); } +/// PIECE operations put the given Varnode into a larger structure. Find the resulting +/// whole Varnode, make sure it has a symbol assigned, and then assign the same symbol +/// to the given Varnode piece. If the given Varnode has been merged with something +/// else or the whole Varnode can't be found, do nothing. +void Funcdata::linkProtoPartial(Varnode *vn) + +{ + HighVariable *high = vn->getHigh(); + if (high->getSymbol() != (Symbol *)0) return; + Varnode *rootVn = PieceNode::findRoot(vn); + if (rootVn == vn) return; + + Varnode *nameRep = rootVn->getHigh()->getNameRepresentative(); + Symbol *sym = linkSymbol(nameRep); + if (sym == (Symbol *)0) return; + SymbolEntry *entry = sym->getFirstWholeMap(); + vn->setSymbolEntry(entry); +} + /// The Symbol is really attached to the Varnode's HighVariable (which must exist). /// The only reason a Symbol doesn't get set is if, the HighVariable /// is global and there is no pre-existing Symbol. (see mapGlobals()) @@ -1041,6 +1052,8 @@ void Funcdata::remapDynamicVarnode(Varnode *vn,Symbol *sym,const Address &usepoi Symbol *Funcdata::linkSymbol(Varnode *vn) { + if (vn->isProtoPartial()) + linkProtoPartial(vn); HighVariable *high = vn->getHigh(); SymbolEntry *entry; uint4 fl = 0; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc index 26458054c9..2ada56f0e4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc @@ -112,9 +112,18 @@ bool Merge::mergeTestRequired(HighVariable *high_out,HighVariable *high_in) return false; // Map to different parts of same symbol } - // Currently don't allow merging of variables that are in separate overlapping collections - if (high_out->piece != (VariablePiece *)0 && high_in->piece != (VariablePiece *)0) - return false; + if (high_out->piece != (VariablePiece *)0 || high_in->piece != (VariablePiece *)0) { + // Currently don't allow merging of variables that are in separate overlapping collections + if (high_out->piece != (VariablePiece *)0 && high_in->piece != (VariablePiece *)0) + return false; + if (symbolIn != symbolOut) { // If we know symbols are involved, and not both the same symbol + // Treat piece as if it were a separate symbol + if (symbolIn != (Symbol *)0 && high_out->piece != (VariablePiece *)0) + return false; // effectively different symbols + if (symbolOut != (Symbol *)0 && high_in->piece != (VariablePiece *)0) + return false; // effectively different symbols + } + } return true; } @@ -184,7 +193,19 @@ bool Merge::mergeTestSpeculative(HighVariable *high_out,HighVariable *high_in) return true; } -/// \brief A test if the given Varnode can ever be merged +/// \brief Test if the given Varnode that \e must be merged, \e can be merged. +/// +/// If it cannot be merged, throw an exception. +/// \param vn is the given Varnode +void Merge::mergeTestMust(Varnode *vn) + +{ + if (vn->hasCover() && !vn->isImplied()) + return; + throw LowlevelError("Cannot force merge of range"); +} + +/// \brief Test if the given Varnode can ever be merged. /// /// Some Varnodes (constants, annotations, implied, spacebase) are never merged with another /// Varnode. @@ -196,6 +217,7 @@ bool Merge::mergeTestBasic(Varnode *vn) if (vn == (Varnode *)0) return false; if (!vn->hasCover()) return false; if (vn->isImplied()) return false; + if (vn->isProtoPartial()) return false; if (vn->isSpacebase()) return false; return true; } @@ -242,18 +264,12 @@ void Merge::mergeRangeMust(VarnodeLocSet::const_iterator startiter,VarnodeLocSet Varnode *vn; vn = *startiter++; - if (!mergeTestBasic(vn)) { - if (!vn->isSpacebase()) - throw LowlevelError("Cannot force merge of range"); - } + mergeTestMust(vn); high = vn->getHigh(); for(;startiter!=enditer;++startiter) { vn = *startiter; if (vn->getHigh() == high) continue; - if (!mergeTestBasic(vn)) { - if (!vn->isSpacebase()) - throw LowlevelError("Cannot force merge of range"); - } + mergeTestMust(vn); if (!merge(high,vn->getHigh(),false)) throw LowlevelError("Forced merge caused intersection"); } @@ -933,6 +949,19 @@ void Merge::mergeMultiEntry(void) } } +/// \brief Run through CONCAT tree roots and group each tree +/// +void Merge::groupPartials(void) + +{ + for(int4 i=0;iisDead()) continue; + if (!op->isPartialRoot()) continue; + groupPartialRoot(op->getOut()); + } +} + /// \brief Speculatively merge Varnodes that are input/output to the same p-code op /// /// If a single p-code op has an input and output HighVariable that share the same data-type, @@ -1324,6 +1353,37 @@ void Merge::processHighRedundantCopy(HighVariable *high) } } +/// \brief Group the different nodes of a CONCAT tree into a VariableGroup +/// +/// This formally labels all the Varnodes in the tree as overlapping pieces of the same variable. +/// The tree is reconstructed from the root Varnode. +/// \param vn is the root Varnode +void Merge::groupPartialRoot(Varnode *vn) + +{ + HighVariable *high = vn->getHigh(); + if (high->numInstances() != 1) return; + vector pieces; + + int4 baseOffset = 0; + SymbolEntry *entry = vn->getSymbolEntry(); + if (entry != (SymbolEntry *)0) { + baseOffset = entry->getOffset(); + } + + PieceNode::gatherPieces(pieces, vn, vn->getDef(), baseOffset); + for(int4 i=0;iisProtoPartial()) return; + if (nodeVn->getHigh()->numInstances() != 1) return; + } + for(int4 i=0;igetHigh()->groupWith(pieces[i].getTypeOffset() - baseOffset,high); + } +} + /// \brief Try to reduce/eliminate COPYs produced by the merge trimming process /// /// In order to force merging of certain Varnodes, extra COPY operations may be inserted @@ -1396,12 +1456,19 @@ void Merge::markInternalCopies(void) h1 = op->getOut()->getHigh(); h2 = op->getIn(0)->getHigh(); h3 = op->getIn(1)->getHigh(); - if (!h1->isAddrTied()) break; - if (!h2->isAddrTied()) break; - if (!h3->isAddrTied()) break; - v1 = h1->getTiedVarnode(); - v2 = h2->getTiedVarnode(); - v3 = h3->getTiedVarnode(); + if (!h2->isPartial()) break; + if (!h3->isPartial()) break; + v2 = h2->getPartial(); + v3 = h3->getPartial(); + if (v2->isAddrTied()) { + if (!h1->isAddrTied()) break; + v1 = h1->getTiedVarnode(); + } + else { + if (op->getIn(0) != v2) break; + if (op->getIn(1) != v3) break; + v1 = op->getOut(); + } if (v3->overlap(*v1) != 0) break; if (v2->overlap(*v1) != v3->getSize()) break; data.opMarkNonPrinting(op); @@ -1409,10 +1476,16 @@ void Merge::markInternalCopies(void) case CPUI_SUBPIECE: h1 = op->getOut()->getHigh(); h2 = op->getIn(0)->getHigh(); - if (!h1->isAddrTied()) break; - if (!h2->isAddrTied()) break; - v1 = h1->getTiedVarnode(); - v2 = h2->getTiedVarnode(); + if (!h1->isPartial()) break; + v1 = h1->getPartial(); + if (v1->isAddrTied()) { + if (!h2->isAddrTied()) break; + v2 = h2->getTiedVarnode(); + } + else { + if (!h1->sameGroup(h2)) break; + v2 = op->getIn(0); + } val = op->getIn(1)->getOffset(); if (v1->overlap(*v2) != val) break; data.opMarkNonPrinting(op); @@ -1432,6 +1505,17 @@ void Merge::markInternalCopies(void) #endif } +/// \brief Register an unmapped CONCAT stack with the merge process +/// +/// The given Varnode must be the root of a tree of CPUI_PIECE operations as produced by +/// PieceNode::gatherPieces. These will be grouped together into a single variable. +/// \param vn is the given root Varnode +void Merge::registerProtoPartialRoot(Varnode *vn) + +{ + protoPartial.push_back(vn->getDef()); +} + /// \brief Translate any intersection tests for \e high2 into tests for \e high1 /// /// The two variables will be merged and \e high2, as an object, will be freed. @@ -1548,6 +1632,17 @@ void Merge::purgeHigh(HighVariable *high) highedgemap.erase(iterfirst,iterlast); } +/// \brief Clear the any cached data from the last merge process +/// +/// Free up resources used by cached intersection tests etc. +void Merge::clear(void) + +{ + highedgemap.clear(); + copyTrims.clear(); + protoPartial.clear(); +} + /// \brief Test the intersection of two HighVariables and cache the result /// /// If the Covers of the two variables intersect, this routine returns \b true. To avoid diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh index 65349af178..b3919111c9 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh @@ -81,6 +81,7 @@ class Merge { Funcdata &data; ///< The function containing the Varnodes to be merged map highedgemap; ///< A cache of intersection tests, sorted by HighVariable pair vector copyTrims; ///< COPY ops inserted to facilitate merges + vector protoPartial; ///< Roots of unmapped CONCAT trees bool updateHigh(HighVariable *a); ///< Make sure given HighVariable's Cover is up-to-date void purgeHigh(HighVariable *high); ///< Remove cached intersection tests for a given HighVariable static void gatherBlockVarnodes(HighVariable *a,int4 blk,const Cover &cover,vector &res); @@ -89,6 +90,7 @@ class Merge { static bool mergeTestRequired(HighVariable *high_out,HighVariable *high_in); static bool mergeTestAdjacent(HighVariable *high_out,HighVariable *high_in); static bool mergeTestSpeculative(HighVariable *high_out,HighVariable *high_in); + static void mergeTestMust(Varnode *vn); static bool mergeTestBasic(Varnode *vn); static void findSingleCopy(HighVariable *high,vector &singlelist); static bool compareHighByBlock(const HighVariable *a,const HighVariable *b); @@ -116,8 +118,10 @@ class Merge { void markRedundantCopies(HighVariable *high,vector ©,int4 pos,int4 size); void processHighDominantCopy(HighVariable *high); void processHighRedundantCopy(HighVariable *high); + void groupPartialRoot(Varnode *vn); public: Merge(Funcdata &fd) : data(fd) {} ///< Construct given a specific function + void clear(void); bool intersection(HighVariable *a,HighVariable *b); bool inflateTest(Varnode *a,HighVariable *high); void inflate(Varnode *a,HighVariable *high); @@ -127,11 +131,13 @@ public: void mergeByDatatype(VarnodeLocSet::const_iterator startiter,VarnodeLocSet::const_iterator enditer); void mergeAddrTied(void); void mergeMarker(void); + void groupPartials(void); void mergeAdjacent(void); void mergeMultiEntry(void); bool hideShadows(HighVariable *high); void processCopyTrims(void); void markInternalCopies(void); + void registerProtoPartialRoot(Varnode *vn); #ifdef MERGEMULTI_DEBUG void verifyHighCovers(void); #endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc index b4f8a3f8ed..697e423723 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc @@ -750,6 +750,90 @@ int4 PcodeOp::compareOrder(const PcodeOp *bop) const return 0; } +/// \brief Determine if a Varnode is a leaf within the CONCAT tree rooted at the given Varnode +/// +/// The CONCAT tree is the maximal set of Varnodes that are all inputs to CPUI_PIECE operations, +/// with no other uses, and that all ultimately flow to the root Varnode. This method tests +/// whether a Varnode is a leaf of this tree. +/// \param rootVn is the given root of the CONCAT tree +/// \param vn is the Varnode to test as a leaf +/// \param typeOffset is byte offset of the test Varnode within fully concatenated value +/// \return \b true is the test Varnode is a leaf of the tree +bool PieceNode::isLeaf(Varnode *rootVn,Varnode *vn,int4 typeOffset) + +{ + if (vn->isMapped() && rootVn->getSymbolEntry() != vn->getSymbolEntry()) { + return true; + } + if (!vn->isWritten()) return true; + PcodeOp *def = vn->getDef(); + if (def->code() != CPUI_PIECE) return true; + PcodeOp *op = vn->loneDescend(); + if (op == (PcodeOp *)0) return true; + if (vn->isAddrTied()) { + Address addr = rootVn->getAddr() + typeOffset; + if (vn->getAddr() != addr) return true; + } + return false; +} + +/// Find the root of the CONCAT tree of Varnodes marked either isProtoPartial() or isAddrTied(). +/// This will be the maximal Varnode that containing the given Varnode (as storage), with a +/// backward path to it through PIECE operations. All Varnodes along the path, except the root, will be +/// marked as isProtoPartial() or isAddrTied(). +/// \return the root of the CONCAT tree +Varnode *PieceNode::findRoot(Varnode *vn) + +{ + while(vn->isProtoPartial() || vn->isAddrTied()) { + list::const_iterator iter = vn->beginDescend(); + PcodeOp *pieceOp = (PcodeOp *)0; + while(iter != vn->endDescend()) { + PcodeOp *op = *iter; + ++iter; + if (op->code() != CPUI_PIECE) continue; + int4 slot = op->getSlot(vn); + Address addr = op->getOut()->getAddr(); + if (addr.getSpace()->isBigEndian() == (slot == 1)) + addr = addr + op->getIn(1-slot)->getSize(); + if (addr == vn->getAddr()) { + if (pieceOp != (PcodeOp *)0) { // If there is more than one valid PIECE + if (op->compareOrder(pieceOp)) // Attach this to earliest one + pieceOp = op; + } + else + pieceOp = op; + } + } + if (pieceOp == (PcodeOp *)0) + break; + vn = pieceOp->getOut(); + } + return vn; +} + +/// \brief Build the CONCAT tree rooted at the given Varnode +/// +/// Recursively walk backwards from the root through CPUI_PIECE operations, stopping if a Varnode +/// is deemed a leaf. Collect all Varnodes involved in the tree in a list. For each Varnode in the tree, +/// record whether it is leaf and also calculate its offset within the data-type attached to the root. +/// \param stack holds the markup for each node of the tree +/// \param rootVn is the given root of the tree +/// \param op is the current PIECE op to explore as part of the tree +/// \param baseOffset is the offset associated with the output of the current PIECE op +void PieceNode::gatherPieces(vector &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset) + +{ + for(int4 i=0;i<2;++i) { + Varnode *vn = op->getIn(i); + int4 offset = (rootVn->getSpace()->isBigEndian() == (i==1)) ? baseOffset + op->getIn(1-i)->getSize() : baseOffset; + bool res = isLeaf(rootVn,vn,offset); + stack.emplace_back(op,i,offset,res); + if (!res) + gatherPieces(stack,rootVn,vn->getDef(),offset); + } +} + /// Add the PcodeOp to the list of ops with the same op-code. Currently only certain /// op-codes have a dedicated list. /// \param op is the given PcodeOp diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh index d93dcd3c18..da10d3380e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/op.hh @@ -111,7 +111,8 @@ public: incidental_copy = 0x10, ///< Treat this as \e incidental for parameter recovery algorithms is_cpool_transformed = 0x20, ///< Have we checked for cpool transforms stop_type_propagation = 0x40, ///< Stop data-type propagation into output from descendants - hold_output = 0x80 ///< Output varnode (of call) should not be removed if it is unread + hold_output = 0x80, ///< Output varnode (of call) should not be removed if it is unread + concat_root = 0x100 ///< Output of \b this is root of a CONCAT tree }; private: TypeOp *opcode; ///< Pointer to class providing behavioral details of the operation @@ -212,6 +213,8 @@ public: void clearStopTypePropagation(void) { addlflags &= ~stop_type_propagation; } ///< Allow data-type propagation from below bool holdOutput(void) const { return ((addlflags&hold_output)!=0); } ///< If \b true, do not remove output as dead code void setHoldOutput(void) { addlflags |= hold_output; } ///< Prevent output from being removed as dead code + bool isPartialRoot(void) const { return ((addlflags&concat_root)!=0); } ///< Output is root of CONCAT tree + void setPartialRoot(void) { addlflags |= concat_root; } ///< Mark \b this as root of CONCAT tree bool stopsCopyPropagation(void) const { return ((flags&no_copy_propagation)!=0); } ///< Does \b this allow COPY propagation void setStopCopyPropagation(void) { flags |= no_copy_propagation; } ///< Stop COPY propagation through inputs /// \brief Return \b true if this LOADs or STOREs from a dynamic \e spacebase pointer @@ -251,6 +254,30 @@ struct PcodeOpNode { PcodeOpNode(PcodeOp *o,int4 s) { op = o; slot = s; } ///< Constructor }; +/// \brief A node in a tree structure of CPUI_PIECE operations +/// +/// If a group of Varnodes are concatenated into a larger structure, this object is used to explicitly gather +/// the PcodeOps (and Varnodes) in the data-flow and view them as a unit. In a properly formed tree, for each +/// CPUI_PIECE operation, the addresses of the input Varnodes and the output Varnode align according to the +/// concatenation. Internal Varnodes can have only one descendant, but the leaf and the root Varnodes +/// can each have multiple descendants +class PieceNode { + PcodeOp *pieceOp; ///< CPUI_PIECE operation combining this particular Varnode piece + int4 slot; ///< The particular slot of this Varnode within CPUI_PIECE + int4 typeOffset; ///< Byte offset into structure/array + bool leaf; ///< \b true if this is a leaf of the tree structure +public: + PieceNode(PcodeOp *op,int4 sl,int4 off,bool l) { pieceOp=op; slot=sl; typeOffset=off; leaf = l; } ///< Constructor + bool isLeaf(void) const { return leaf; } ///< Return \b true if \b this node is a leaf of the tree structure + int4 getTypeOffset(void) const { return typeOffset; } ///< Get the byte offset of \b this node into the data-type + int4 getSlot(void) const { return slot; } ///< Get the input slot associated with \b this node + PcodeOp *getOp(void) const { return pieceOp; } ///< Get the PcodeOp reading \b this piece + Varnode *getVarnode(void) const { return pieceOp->getIn(slot); } ///< Get the Varnode representing \b this piece + static bool isLeaf(Varnode *rootVn,Varnode *vn,int4 typeOffset); + static Varnode *findRoot(Varnode *vn); + static void gatherPieces(vector &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset); +}; + /// A map from sequence number (SeqNum) to PcodeOp typedef map PcodeOpTree; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index 5c90024a2d..d93f044fb3 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -758,14 +758,29 @@ void PrintC::opSubpiece(const PcodeOp *op) { if (op->doesSpecialPrinting()) { // Special printing means it is a field extraction - int4 offset; - Datatype *ct; - const TypeField *field = TypeOpSubpiece::testExtraction(true, op, ct, offset); - if (field != (const TypeField *)0 && offset == 0) { - pushOp(&object_member,op); - pushVn(op->getIn(0), op, mods); - pushAtom(Atom(field->name,fieldtoken,EmitMarkup::no_color,ct,field->ident,op)); - return; + const Varnode *vn = op->getIn(0); + Datatype *ct = vn->getHighTypeReadFacing(op); + if (ct->isPieceStructured()) { + int4 offset; + int4 byteOff = TypeOpSubpiece::computeByteOffsetForComposite(op); + const TypeField *field = ct->findTruncation(byteOff,op->getOut()->getSize(),op,1,offset); // Use artificial slot + if (field != (const TypeField*)0 && offset == 0) { // A formal structure field + pushOp(&object_member,op); + pushVn(vn,op,mods); + pushAtom(Atom(field->name,fieldtoken,EmitMarkup::no_color,ct,field->ident,op)); + return; + } + else if (vn->isExplicit() && vn->getHigh()->getSymbolOffset() == -1) { // An explicit, entire, structured object + Symbol *sym = vn->getHigh()->getSymbol(); + if (sym != (Symbol *)0) { + int4 sz = op->getOut()->getSize(); + int4 off = (int4)op->getIn(1)->getOffset(); + off = vn->getSpace()->isBigEndian() ? vn->getSize() - (sz + off) : off; + pushPartialSymbol(sym, off, sz, vn, op, -1); + return; + } + } + // Fall thru to functional printing } } if (castStrategy->isSubpieceCast(op->getOut()->getHighTypeDefFacing(), @@ -1930,13 +1945,9 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, stack.emplace_back(); PartialSymbolEntry &entry(stack.back()); entry.token = &object_member; - ostringstream s; if (sz == 0) sz = ct->getSize() - off; - // Special notation for subpiece which is neither - // array entry nor struct field - s << '_' << dec << off << '_' << sz << '_'; - entry.fieldname = s.str(); + entry.fieldname = unnamedField(off, sz); // If nothing else works, generate artificial field name entry.field = (const TypeField *)0; entry.hilite = EmitMarkup::no_color; ct = (Datatype *)0; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc index f43d3e6f34..a693042ded 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc @@ -693,6 +693,19 @@ void PrintLanguage::setIntegerFormat(const string &nm) mods |= mod; // Set any new force } +/// This is used if a value is extracted from a structured data-type, but the natural name is not available. +/// An artificial name is generated given just the offset into the data-type and the size in bytes. +/// \param off is the byte offset into the data-type +/// \param size is the number of bytes in the extracted value +/// \return a string describing the artificial field +string PrintLanguage::unnamedField(int4 off,int4 size) + +{ + ostringstream s; + s << '_' << dec << off << '_' << size << '_'; + return s.str(); +} + /// Count '0' and '9' digits base 10. Count '0' and 'f' digits base 16. /// The highest count is the preferred base. /// \param val is the given integer diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh index ea7a1efa55..497e968c70 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh @@ -554,6 +554,7 @@ public: virtual void opInsertOp(const PcodeOp *op)=0; ///< Emit an INSERT operator virtual void opExtractOp(const PcodeOp *op)=0; ///< Emit an EXTRACT operator virtual void opPopcountOp(const PcodeOp *op)=0; ///< Emit a POPCOUNT operator + virtual string unnamedField(int4 off,int4 size); ///< Generate an artificial field name static int4 mostNaturalBase(uintb val); ///< Determine the most natural base for an integer static void formatBinary(ostream &s,uintb val); ///< Print a number in binary form diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 973a7a31d9..524eecfce3 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -4964,89 +4964,6 @@ int4 RuleHumptyOr::applyOp(PcodeOp *op,Funcdata &data) return 1; } -/// \class RuleEmbed -/// \brief Simplify PIECE intended as embedding: `concat(V, sub(W,0)) => W & 0xff | (zext(W) << 8)` -/// -/// There is a complementary form: -/// `concat(sub(V,c),W) => (V & 0xff00) | zext(W)` -void RuleEmbed::getOpList(vector &oplist) const - -{ - oplist.push_back(CPUI_PIECE); -} - -int4 RuleEmbed::applyOp(PcodeOp *op,Funcdata &data) - -{ - // Beware of humpty dumpty - Varnode *a,*subout,*x; - PcodeOp *subop; - int4 i; - - if (op->getOut()->getSize() > sizeof(uintb)) return 0; // FIXME: Can't exceed uintb precision - for(i=0;i<2;++i) { - subout = op->getIn(i); - if (!subout->isWritten()) continue; - subop = subout->getDef(); - if (subop->code() != CPUI_SUBPIECE) continue; - int4 c = subop->getIn(1)->getOffset(); - a = subop->getIn(0); - if (a->isFree()) continue; - if (a->getSize() != op->getOut()->getSize()) continue; - x = op->getIn(1-i); - if (x->isFree()) continue; - if (i==0) { - if (subout->getSize()+c != a->getSize()) continue; // Not hi SUB - } - else { - if (c != 0) continue; // Not lo SUB - } - - if (x->isWritten()) { // Check for humptydumpty - PcodeOp *othersub = x->getDef(); - if (othersub->code() == CPUI_SUBPIECE) { - if (othersub->getIn(0)==a) { - int4 d = othersub->getIn(1)->getOffset(); - if ((i==0)&&(d==0)) continue; - if ((i==1)&&(d==subout->getSize())) continue; - } - } - } - - uintb mask = calc_mask(subout->getSize()); - mask <<= 8*c; - - // Construct mask - PcodeOp *andop = data.newOp(2,op->getAddr()); - data.opSetOpcode(andop,CPUI_INT_AND); - data.newUniqueOut(a->getSize(),andop); - data.opSetInput(andop,a,0); - data.opSetInput(andop,data.newConstant(a->getSize(),mask),1); - data.opInsertBefore(andop,op); - // Extend x - PcodeOp *extop = data.newOp(1,op->getAddr()); - data.opSetOpcode(extop,CPUI_INT_ZEXT); - data.newUniqueOut(a->getSize(),extop); - data.opSetInput(extop,x,0); - data.opInsertBefore(extop,op); - x = extop->getOut(); - if (i==1) { // Shift x into position - PcodeOp *shiftop = data.newOp(2,op->getAddr()); - data.opSetOpcode(shiftop,CPUI_INT_LEFT); - data.newUniqueOut(a->getSize(),shiftop); - data.opSetInput(shiftop,x,0); - data.opSetInput(shiftop,data.newConstant(4,8*subout->getSize()),1); - data.opInsertBefore(shiftop,op); - x = shiftop->getOut(); - } - data.opSetOpcode(op,CPUI_INT_OR); - data.opSetInput(op,andop->getOut(),0); - data.opSetInput(op,x,1); - return 1; - } - return 0; -} - /// \class RuleSwitchSingle /// \brief Convert BRANCHIND with only one computed destination to a BRANCH void RuleSwitchSingle::getOpList(vector &oplist) const @@ -6696,12 +6613,9 @@ void RuleSubRight::getOpList(vector &oplist) const int4 RuleSubRight::applyOp(PcodeOp *op,Funcdata &data) { - Datatype *parent; - int4 offset; - if (op->doesSpecialPrinting()) return 0; - if (TypeOpSubpiece::testExtraction(false, op, parent, offset) != (const TypeField *)0) { + if (op->getIn(0)->getTypeReadFacing(op)->isPieceStructured()) { data.opMarkSpecialPrint(op); // Print this as a field extraction return 0; } @@ -6899,6 +6813,251 @@ int4 RuleExtensionPush::applyOp(PcodeOp *op,Funcdata &data) return 1; } +/// \brief Find the base structure or array data-type that the given Varnode is part of +/// +/// If the Varnode's data-type is already a structure or array, return that data-type. +/// If the Varnode is part of a known symbol, use that data-type. +/// The starting byte offset of the given Varnode within the structure or array is passed back. +/// \param vn is the given Varnode +/// \param baseOffset is used to pass back the starting offset +/// \return the structure or array data-type, or null otherwise +Datatype *RulePieceStructure::determineDatatype(Varnode *vn,int4 &baseOffset) + +{ + Datatype *ct = vn->getStructuredType(); + if (ct == (Datatype *)0) + return ct; + + if (ct->getSize() != vn->getSize()) { // vn is a partial + SymbolEntry *entry = vn->getSymbolEntry(); + baseOffset = vn->getAddr().overlap(0,entry->getAddr(),ct->getSize()); + if (baseOffset < 0) + return (Datatype*)0; + baseOffset += entry->getOffset(); + // Find concrete sub-type that matches the size of the Varnode + Datatype *subType = ct; + uintb subOffset = baseOffset; + while(subType != (Datatype *)0 && subType->getSize() > vn->getSize()) { + subType = subType->getSubType(subOffset, &subOffset); + } + if (subType != (Datatype *)0 && subType->getSize() == vn->getSize() && subOffset == 0) { + // If there is a concrete sub-type + if (!subType->isPieceStructured()) // and the concrete sub-type is not a structured type itself + return (Datatype *)0; // don't split out CONCAT forming the sub-type + } + } + else { + baseOffset = 0; + } + return ct; +} + +/// \brief For a structured data-type, determine if the given range spans multiple elements +/// +/// Return true unless the range falls within a single non-structured element. +/// \param ct is the structured data-type +/// \param offset is the start of the given range +/// \param size is the number of bytes in the range +/// \return \b true if the range spans multiple elements +bool RulePieceStructure::spanningRange(Datatype *ct,int4 offset,int4 size) + +{ + if (offset + size > ct->getSize()) return false; + uintb newOff = offset; + for(;;) { + ct = ct->getSubType(newOff, &newOff); + if (ct == (Datatype *)0) return true; // Don't know what it spans, assume multiple + if ((int4)newOff + size > ct->getSize()) return true; // Spans more than 1 + if (!ct->isPieceStructured()) break; + } + return false; +} + +/// \brief Convert an INT_ZEXT operation to a PIECE with a zero constant as the first parameter +/// +/// The caller provides a parent data-type and an offset into it corresponding to the \e output of the INT_ZEXT. +/// The op is converted to a PIECE with a 0 Varnode, which will be assigned a data-type based on +/// the parent data-type and a computed offset. +/// \param zext is the INT_ZEXT operation +/// \param ct is the parent data-type +/// \param offset is the byte offset of the \e output within the parent data-type +/// \param data is the function containing the operation +/// \return true if the INT_ZEXT was successfully converted +bool RulePieceStructure::convertZextToPiece(PcodeOp *zext,Datatype *ct,int4 offset,Funcdata &data) + +{ + Varnode *outvn = zext->getOut(); + Varnode *invn = zext->getIn(0); + if (invn->isConstant()) return false; + int4 sz = outvn->getSize() - invn->getSize(); + if (sz > sizeof(uintb)) return false; + offset += outvn->getSpace()->isBigEndian() ? 0 : invn->getSize(); + uintb newOff = offset; + while(ct != (Datatype *)0 && ct->getSize() > sz) { + ct = ct->getSubType(newOff, &newOff); + } + Varnode *zerovn = data.newConstant(sz, 0); + if (ct != (Datatype *)0 && ct->getSize() == sz) + zerovn->updateType(ct, false, false); + data.opSetOpcode(zext, CPUI_PIECE); + data.opInsertInput(zext, zerovn, 0); + if (invn->getType()->needsResolution()) + data.inheritResolution(invn->getType(), zext, 1, zext, 0); // Transfer invn's resolution to slot 1 + return true; +} + +/// \brief Search for leaves in the CONCAT tree defined by an INT_ZEXT operation and convert them to PIECE +/// +/// The CONCAT tree can be extended through an INT_ZEXT, if the extensions output crosses multiple fields of +/// the parent data-type. We check this and replace the INT_ZEXT with PIECE if appropriate. +/// \param stack is the node container for the CONCAT tree +/// \param structuredType is the parent data-type for the tree +/// \param data is the function containing the tree +/// \return \b true if any INT_ZEXT replacement was performed +bool RulePieceStructure::findReplaceZext(vector &stack,Datatype *structuredType,Funcdata &data) + +{ + bool change = false; + for(int4 i=0;iisWritten()) continue; + PcodeOp *op = vn->getDef(); + if (op->code() != CPUI_INT_ZEXT) continue; + if (!spanningRange(structuredType,node.getTypeOffset(),vn->getSize())) continue; + if (convertZextToPiece(op,structuredType,node.getTypeOffset(),data)) + change = true; + } + return change; +} + +/// \brief Return \b true if the two given \b root and \b leaf should be part of different symbols +/// +/// A leaf in a CONCAT tree can be in a separate from the root if it is a parameter or a separate root. +/// \param root is the root of the CONCAT tree +/// \param leaf is the given leaf Varnode +/// \return \b true if the two Varnodes should be in different symbols +bool RulePieceStructure::separateSymbol(Varnode *root,Varnode *leaf) + +{ + if (root->getSymbolEntry() != leaf->getSymbolEntry()) return true; // Forced to be different symbols + if (root->isAddrTied()) return false; + if (!leaf->isWritten()) return true; // Assume to be different symbols + if (leaf->isProtoPartial()) return true; // Already in another tree + PcodeOp *op = leaf->getDef(); + if (op->code() != CPUI_PIECE) return false; + if (leaf->getType()->isPieceStructured()) return true; // Would be a separate root + + return false; +} + +/// \class RulePieceStructure +/// \brief Concatenating structure pieces gets printed as explicit write statements +/// +/// Set properties so that a CONCAT expression like `v = CONCAT(CONCAT(v1,v2),CONCAT(v3,v4))` gets +/// rendered as a sequence of separate write statements. `v.field1 = v1; v.field2 = v2; v.field3 = v3; v.field4 = v4;` +void RulePieceStructure::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_PIECE); + oplist.push_back(CPUI_INT_ZEXT); +} + +int4 RulePieceStructure::applyOp(PcodeOp *op,Funcdata &data) + +{ + if (op->isPartialRoot()) return 0; // Check if CONCAT tree already been visited + Varnode *outvn = op->getOut(); + int4 baseOffset; + Datatype *ct = determineDatatype(outvn, baseOffset); + if (ct == (Datatype *)0) return 0; + + if (op->code() == CPUI_INT_ZEXT) { + if (convertZextToPiece(op,outvn->getType(),0,data)) + return 1; + return 0; + } + // Check if outvn is really the root of the tree + PcodeOp *zext = outvn->loneDescend(); + if (zext != (PcodeOp*)0) { + if (zext->code() == CPUI_PIECE) + return 0; // More PIECEs below us, not a root + if (zext->code() == CPUI_INT_ZEXT) { + // Extension of a structured data-type, convert extension to PIECE first + if (convertZextToPiece(zext,zext->getOut()->getType(),0,data)) + return 1; + return 0; + } + } + + vector stack; + for(;;) { + PieceNode::gatherPieces(stack, outvn, op, baseOffset); + if (!findReplaceZext(stack, ct, data)) // Check for INT_ZEXT leaves that need to be converted to PIECEs + break; + stack.clear(); // If we found some, regenerate the tree + } + + op->setPartialRoot(); + bool anyAddrTied = outvn->isAddrTied(); + Address baseAddr = outvn->getAddr() - baseOffset; + for(int4 i=0;igetAddr() == addr) { + if (!node.isLeaf() || !separateSymbol(outvn, vn)) { + // Varnode already has correct address and will be part of the same symbol as root + // so we don't need to change the storage or insert a COPY + if (!vn->isAddrTied() && !vn->isProtoPartial()) { + vn->setProtoPartial(); + } + anyAddrTied = anyAddrTied || vn->isAddrTied(); + continue; + } + } + if (node.isLeaf()) { + PcodeOp *copyOp = data.newOp(1,node.getOp()->getAddr()); + Varnode *newVn = data.newVarnodeOut(vn->getSize(), addr, copyOp); + anyAddrTied = anyAddrTied || newVn->isAddrTied(); // Its possible newVn is addrtied, even if vn isn't + Datatype *newType = data.getArch()->types->getExactPiece(ct, node.getTypeOffset(), vn->getSize()); + if (newType == (Datatype *)0) + newType = vn->getType(); + newVn->updateType(newType, false, false); + data.opSetOpcode(copyOp, CPUI_COPY); + data.opSetInput(copyOp, vn, 0); + data.opSetInput(node.getOp(),newVn,node.getSlot()); + data.opInsertBefore(copyOp, node.getOp()); + if (vn->getType()->needsResolution()) { + // Inherit PIECE's read resolution for COPY's read + data.inheritResolution(vn->getType(), copyOp, 0, node.getOp(), node.getSlot()); + } + if (newType->needsResolution()) { + newType->resolveInFlow(copyOp, -1); // If the piece represents part of a union, resolve it + } + if (!newVn->isAddrTied()) + newVn->setProtoPartial(); + } + else { + // Reaching here we know vn is NOT addrtied and has a lone descendant + // We completely replace the Varnode with one having the correct storage + PcodeOp *defOp = vn->getDef(); + PcodeOp *loneOp = vn->loneDescend(); + int4 slot = loneOp->getSlot(vn); + Varnode *newVn = data.newVarnode(vn->getSize(), addr, vn->getType()); + data.opSetOutput(defOp, newVn); + data.opSetInput(loneOp, newVn, slot); + data.deleteVarnode(vn); + if (!newVn->isAddrTied()) + newVn->setProtoPartial(); + } + } + if (!anyAddrTied) + data.getMerge().registerProtoPartialRoot(outvn); + return 1; +} + /// \class RuleSubNormal /// \brief Pull-back SUBPIECE through INT_RIGHT and INT_SRIGHT /// @@ -8142,7 +8301,7 @@ Varnode *RuleSignMod2nOpt2::checkSignExtForm(PcodeOp *op) /// \brief Verify an \e if block like `V = (V s< 0) ? V + 2^n-1 : V` /// /// \param op is the MULTIEQUAL -/// \param npos is the constant 2^n +/// \param npow is the constant 2^n /// \return the Varnode V in the form, or null if the form doesn't match Varnode *RuleSignMod2nOpt2::checkMultiequalForm(PcodeOp *op,uintb npow) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh index 0412b8b724..325c62987f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh @@ -940,16 +940,6 @@ public: virtual void getOpList(vector &oplist) const; virtual int4 applyOp(PcodeOp *op,Funcdata &data); }; -class RuleEmbed : public Rule { -public: - RuleEmbed(const string &g) : Rule(g, 0, "embed") {} ///< Constructor - virtual Rule *clone(const ActionGroupList &grouplist) const { - if (!grouplist.contains(getGroup())) return (Rule *)0; - return new RuleEmbed(getGroup()); - } - virtual void getOpList(vector &oplist) const; - virtual int4 applyOp(PcodeOp *op,Funcdata &data); -}; class RuleSwitchSingle : public Rule { public: RuleSwitchSingle(const string &g) : Rule(g,0,"switchsingle") {} ///< Constructor @@ -1155,6 +1145,23 @@ public: virtual int4 applyOp(PcodeOp *op,Funcdata &data); }; +class RulePieceStructure : public Rule { + /// \brief Markup for Varnodes pieced together into structure/array + static Datatype *determineDatatype(Varnode *vn,int4 &baseOffset); + static bool spanningRange(Datatype *ct,int4 off,int4 size); + static bool convertZextToPiece(PcodeOp *zext,Datatype *structuredType,int4 offset,Funcdata &data); + static bool findReplaceZext(vector &stack,Datatype *structuredType,Funcdata &data); + static bool separateSymbol(Varnode *root,Varnode *leaf); +public: + RulePieceStructure(const string &g) : Rule( g, 0, "piecestructure") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RulePieceStructure(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + class RuleSubNormal : public Rule { public: RuleSubNormal(const string &g) : Rule( g, 0, "subnormal") {} ///< Constructor diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index 316074389c..844bb27766 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -1924,13 +1924,13 @@ const TypeField *TypePartialUnion::findTruncation(int4 off,int4 sz,const PcodeOp return container->findTruncation(off + offset, sz, op, slot, newoff); } -int4 TypePartialUnion::numDepend(void) +int4 TypePartialUnion::numDepend(void) const { return container->numDepend(); } -Datatype *TypePartialUnion::getDepend(int4 index) +Datatype *TypePartialUnion::getDepend(int4 index) const { // Treat dependents as coming from the underlying union @@ -3483,6 +3483,31 @@ TypePointer *TypeFactory::getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc return res; } +/// Drill down into nested data-types until we get to a data-type that exactly matches the +/// given offset and size, and return this data-type. Any \e union data-type encountered +/// terminates the process and a partial union data-type is constructed and returned. +/// If the range indicated by the offset and size contains only a partial field or crosses +/// field boundaries, null is returned. +/// \param ct is the structured data-type +/// \param offset is the starting byte offset for the piece +/// \param size is the number of bytes in the piece +/// \return the data-type of the piece or null +Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size) + +{ + uintb newOff = offset; + while(ct != (Datatype *)0 && ct->getSize() > size && ct->getMetatype() != TYPE_UNION) { + ct = ct->getSubType(newOff, &newOff); + } + if (ct == (Datatype *)0 || ct->getSize() < size) + return (Datatype *)0; + if (ct->getSize() == size) + return ct; + if (ct->getMetatype() == TYPE_UNION) // If we hit a containing union + return getTypePartialUnion((TypeUnion *)ct, newOff, size); + return (Datatype *)0; +} + /// The indicated Datatype object is removed from this container. /// Indirect references (via TypeArray TypeStruct etc.) are not affected /// \param ct is the data-type to destroy diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index 78b6fe4d2f..6703c74b24 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -222,6 +222,7 @@ public: int4 typeOrder(const Datatype &op) const { if (this==&op) return 0; return compare(op,10); } ///< Order this with -op- datatype int4 typeOrderBool(const Datatype &op) const; ///< Order \b this with -op-, treating \e bool data-type as special void encodeRef(Encoder &encoder) const; ///< Encode a reference of \b this to a stream + bool isPieceStructured(void) const; ///< Does \b this data-type consist of separate pieces? static uint4 encodeIntegerFormat(const string &val); static string decodeIntegerFormat(uint4 val); }; @@ -501,8 +502,8 @@ public: TypeUnion *getParentUnion(void) const { return container; } ///< Get the union which \b this is part of virtual void printRaw(ostream &s) const; ///< Print a description of the type to stream virtual const TypeField *findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const; - virtual int4 numDepend(void); - virtual Datatype *getDepend(int4 index); + virtual int4 numDepend(void) const; + virtual Datatype *getDepend(int4 index) const; virtual int4 compare(const Datatype &op,int4 level) const; virtual int4 compareDependency(const Datatype &op) const; virtual Datatype *clone(void) const { return new TypePartialUnion(*this); } @@ -691,6 +692,7 @@ public: TypePointerRel *getTypePointerRel(TypePointer *parentPtr,Datatype *ptrTo,int4 off); ///< Get pointer offset relative to a container TypePointerRel *getTypePointerRel(int4 sz,Datatype *parent,Datatype *ptrTo,int4 ws,int4 off,const string &nm); TypePointer *getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc,const string &nm); + Datatype *getExactPiece(Datatype *ct,int4 offset,int4 size); ///< Get the data-type associated with piece of a structured data-type void destroyType(Datatype *ct); ///< Remove a data-type from \b this Datatype *concretize(Datatype *ct); ///< Convert given data-type to concrete form void dependentOrder(vector &deporder) const; ///< Place all data-types in dependency order @@ -745,6 +747,18 @@ inline int4 Datatype::typeOrderBool(const Datatype &op) const return compare(op,10); } +/// If a value with \b this data-type is put together from multiple pieces, is it better to display +/// this construction as a sequence of separate assignments or as a single concatenation. +/// Generally a TYPE_STRUCT or TYPE_ARRAY should be represented with separate assignments. +/// \return \b true if the data-type is put together with multiple assignments +inline bool Datatype::isPieceStructured(void) const + +{ +// if (metatype == TYPE_STRUCT || metatype == TYPE_ARRAY || metatype == TYPE_UNION || +// metatype == TYPE_PARTIALUNION || metatype == TYPE_PARTIALSTRUCT) + return (metatype <= TYPE_ARRAY); +} + inline TypeArray::TypeArray(int4 n,Datatype *ao) : Datatype(n*ao->getSize(),TYPE_ARRAY) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc index 11000806f1..0a00521f77 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc @@ -1929,18 +1929,20 @@ string TypeOpSubpiece::getOperatorName(const PcodeOp *op) const Datatype *TypeOpSubpiece::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const { + const Varnode *outvn = op->getOut(); + const TypeField *field; + Datatype *ct = op->getIn(0)->getHighTypeReadFacing(op); int4 offset; - Datatype *parent; - const Varnode *vn = op->getOut(); - const TypeField *field = testExtraction(true, op, parent, offset); + int4 byteOff = computeByteOffsetForComposite(op); + field = ct->findTruncation(byteOff,outvn->getSize(),op,1,offset); // Use artificial slot if (field != (const TypeField *)0) { - if (vn->getSize() == field->type->getSize()) + if (outvn->getSize() == field->type->getSize()) return field->type; } - Datatype *dt = vn->getHighTypeDefFacing(); // SUBPIECE prints as cast to whatever its output is + Datatype *dt = outvn->getHighTypeDefFacing(); // SUBPIECE prints as cast to whatever its output is if (dt->getMetatype() != TYPE_UNKNOWN) return dt; - return tlst->getBase(vn->getSize(),TYPE_INT); // If output is unknown, treat as cast to int + return tlst->getBase(outvn->getSize(),TYPE_INT); // If output is unknown, treat as cast to int } Datatype *TypeOpSubpiece::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, @@ -1969,30 +1971,6 @@ Datatype *TypeOpSubpiece::propagateType(Datatype *alttype,PcodeOp *op,Varnode *i return (Datatype *)0; } -/// \brief Test if the given SUBPIECE PcodeOp is acting as a field extraction operator -/// -/// For packed structures with small fields, SUBPIECE may be used to extract the field. -/// Test if the HighVariable being truncated is a structure and if the truncation produces -/// part of a \e single field. If so return the TypeField descriptor, and pass back the parent -/// structure and the number of least significant bytes that have been truncated from the field. -/// \param useHigh is \b true if the HighVariable data-type is checked, otherwise the Varnode data-type is used -/// \param op is the given SUBPIECE PcodeOp -/// \param parent holds the parent Datatype being passed back -/// \param offset holds the LSB offset being passed back -/// \return the TypeField if a field is being extracted or null otherwise -const TypeField *TypeOpSubpiece::testExtraction(bool useHigh,const PcodeOp *op,Datatype *&parent,int4 &offset) - -{ - const Varnode *vn = op->getIn(0); - Datatype *ct = useHigh ? vn->getHighTypeReadFacing(op) : vn->getTypeReadFacing(op); - type_metatype meta = ct->getMetatype(); - if (meta != TYPE_STRUCT && meta != TYPE_UNION && meta != TYPE_PARTIALUNION) - return (const TypeField *)0; - parent = ct; - int4 byteOff = computeByteOffsetForComposite(op); - return ct->findTruncation(byteOff,op->getOut()->getSize(),op,1,offset); // Use artificial slot -} - /// \brief Compute the byte offset into an assumed composite data-type produced by the given CPUI_SUBPIECE /// /// If the input Varnode is a composite data-type, the extracted result of the SUBPIECE represent a diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh index f8976eb5ee..7c9532cdc2 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh @@ -749,7 +749,6 @@ public: int4 inslot,int4 outslot); virtual string getOperatorName(const PcodeOp *op) const; virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opSubpiece(op); } - static const TypeField *testExtraction(bool useHigh,const PcodeOp *op,Datatype *&parent,int4 &offset); static int4 computeByteOffsetForComposite(const PcodeOp *op); }; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/userop.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/userop.hh index a283255cb2..747a22d0f7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/userop.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/userop.hh @@ -44,6 +44,7 @@ extern ElementId ELEM_SEGMENTOP; ///< Marshaling element \ /// its CALLOTHER index. A facility for reading in implementation details is provided via decode(). class UserPcodeOp { public: + /// \brief Enumeration of different boolean properties that can be assigned to a CALLOTHER enum userop_flags { annotation_assignment = 1, ///< Displayed as assignment, `in1 = in2`, where the first parameter is an annotation no_operator = 2 ///< Don't emit special token, just emit the first input parameter as expression diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc index 0155b2a882..72bb50eb90 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc @@ -24,8 +24,9 @@ AttributeId ATTRIB_SYMREF = AttributeId("symref",68); ElementId ELEM_HIGH = ElementId("high",82); /// Compare by offset within the group, then by size. -/// \param op2 is the other piece to compare with \b this -/// \return \b true if \b this should be ordered before the other piece +/// \param a is the first piece to compare +/// \param b is the other piece to compare +/// \return \b true if \b a should be ordered before the \b b bool VariableGroup::PieceCompareByOffset::operator()(const VariablePiece *a,const VariablePiece *b) const { @@ -145,7 +146,7 @@ void VariablePiece::adjustOffset(int4 amt) } /// If there are no remaining references to the old VariableGroup it is deleted. -/// \param newGropu is the new VariableGroup to transfer \b this to +/// \param newGroup is the new VariableGroup to transfer \b this to void VariablePiece::transferGroup(VariableGroup *newGroup) { @@ -227,15 +228,26 @@ void HighVariable::setSymbol(Varnode *vn) const } } symbol = entry->getSymbol(); - if (entry->isDynamic()) // Dynamic symbols match whole variable + if (vn->isProtoPartial()) { + Varnode *rootVn = PieceNode::findRoot(vn); + if (rootVn == vn) + throw LowlevelError("Partial varnode does not match symbol"); + + symboloffset = vn->getAddr().overlap(0,rootVn->getAddr(),rootVn->getSize()); + SymbolEntry *entry = rootVn->getSymbolEntry(); + if (entry != (SymbolEntry *)0) + symboloffset += entry->getOffset(); + } + else if (entry->isDynamic()) // Dynamic symbols (that aren't partials) match whole variable symboloffset = -1; else if (symbol->getCategory() == Symbol::equate) symboloffset = -1; // For equates, we don't care about size else if (symbol->getType()->getSize() == vn->getSize() && entry->getAddr() == vn->getAddr() && !entry->isPiece()) symboloffset = -1; // A matching entry - else + else { symboloffset = vn->getAddr().overlap(0,entry->getAddr(),symbol->getType()->getSize()) + entry->getOffset(); + } highflags &= ~((uint4)symboldirty); // We are no longer dirty } @@ -455,6 +467,21 @@ Varnode *HighVariable::getNameRepresentative(void) const return nameRepresentative; } +/// Find the first member that is either address tied or marked as a proto partial. +/// \return a member Varnode acting as partial storage or null if none exist +Varnode *HighVariable::getPartial(void) const + +{ + int4 i; + + for(i=0;iisAddrTied() || vn->isProtoPartial()) + return vn; + } + return (Varnode *)0; +} + /// Search for the given Varnode and cut it out of the list, marking all properties as \e dirty. /// \param vn is the given Varnode member to remove void HighVariable::remove(Varnode *vn) @@ -726,6 +753,15 @@ int4 HighVariable::instanceIndex(const Varnode *vn) const return -1; } +/// \param op2 is the other HighVariable to compare with \b this +/// \return \b true if they are in the same group +bool HighVariable::sameGroup(const HighVariable *op2) const + +{ + if (piece == (VariablePiece *)0 || op2->piece == (VariablePiece *)0) return false; + return (piece->getGroup() == op2->piece->getGroup()); +} + /// \param encoder is the stream encoder void HighVariable::encode(Encoder &encoder) const diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh index f8fb0e4335..6e8e4caa57 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.hh @@ -179,6 +179,7 @@ public: Varnode *getInputVarnode(void) const; ///< Find (the) input member Varnode Varnode *getTypeRepresentative(void) const; ///< Get a member Varnode with the strongest data-type Varnode *getNameRepresentative(void) const; ///< Get a member Varnode that dictates the naming of \b this HighVariable + Varnode *getPartial(void) const; ///< Find the first member that can act as partial symbol storage int4 getNumMergeClasses(void) const { return numMergeClasses; } ///< Get the number of speculative merges for \b this bool isMapped(void) const { updateFlags(); return ((flags&Varnode::mapped)!=0); } ///< Return \b true if \b this is mapped bool isPersist(void) const { updateFlags(); return ((flags&Varnode::persist)!=0); } ///< Return \b true if \b this is a global variable @@ -189,6 +190,7 @@ public: bool isConstant(void) const { updateFlags(); return ((flags&Varnode::constant)!=0); } ///< Return \b true if \b this is a constant bool isUnaffected(void) const { updateFlags(); return ((flags&Varnode::unaffected)!=0); } ///< Return \b true if \b this is an \e unaffected register bool isExtraOut(void) const { updateFlags(); return ((flags&(Varnode::indirect_creation|Varnode::addrtied))==Varnode::indirect_creation); } ///< Return \b true if \b this is an extra output + bool isPartial(void) const { updateFlags(); return ((flags&(Varnode::addrtied|Varnode::proto_partial))!=0); } ///< Return \b true if \b this is potential partial symbol void setMark(void) const { flags |= Varnode::mark; } ///< Set the mark on this variable void clearMark(void) const { flags &= ~Varnode::mark; } ///< Clear the mark on this variable bool isMark(void) const { return ((flags&Varnode::mark)!=0); } ///< Return \b true if \b this is marked @@ -205,6 +207,7 @@ public: bool isUnattached(void) const { return inst.empty(); } ///< Return \b true if \b this has no member Varnode bool isTypeLock(void) const { updateType(); return ((flags & Varnode::typelock)!=0); } ///< Return \b true if \b this is \e typelocked bool isNameLock(void) const { updateFlags(); return ((flags & Varnode::namelock)!=0); } ///< Return \b true if \b this is \e namelocked + bool sameGroup(const HighVariable *op2) const; ///< Return \b true if \b and other variable are parts of the same variable void encode(Encoder &encoder) const; ///< Encode \b this variable to stream as a \ element #ifdef MERGEMULTI_DEBUG void verifyCover(void) const; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc index 7d9acb78fb..b5581a7aa7 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc @@ -936,6 +936,13 @@ bool Varnode::findSubpieceShadow(int4 leastByte,const Varnode *whole,int4 recurs return false; } +/// \brief Try to find a PIECE operation that produces \b this from a given Varnode \b piece +/// +/// \param leastByte is the number of least significant bytes being truncated from the +/// putative \b this to get \b piece. The routine can backtrack through COPY operations and +/// more than one PIECE operations to verify that \b this is formed out of \b piece. +/// \param piece is the given Varnode piece +/// \return \b true if \b this and \b whole have the prescribed PIECE relationship bool Varnode::findPieceShadow(int4 leastByte,const Varnode *piece) const { @@ -1007,6 +1014,23 @@ bool Varnode::partialCopyShadow(const Varnode *op2,int4 relOff) const return false; } +/// If \b this has a data-type built out of separate pieces, return it. +/// If \b this is mapped as a partial to a symbol with one of these data-types, return it. +/// Return null otherwise. +/// \return the associated structured data-type or null +Datatype *Varnode::getStructuredType(void) const + +{ + Datatype *ct; + if (mapentry != (SymbolEntry *)0) + ct = mapentry->getSymbol()->getType(); + else + ct = type; + if (ct->isPieceStructured()) + return ct; + return (Datatype *)0; +} + /// Compare term order of two Varnodes. Used in Term Rewriting strategies to order operands of commutative ops /// \param op is the Varnode to order against \b this /// \return -1 if \b this comes before \b op, 1 if op before this, or 0 diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh index 9f25983017..64fc38bbdd 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh @@ -109,7 +109,8 @@ public: indirectstorage = 0x8000000, ///< Is this Varnode storing a pointer to the actual symbol hiddenretparm = 0x10000000, ///< Does this varnode point to the return value storage location incidental_copy = 0x20000000, ///< Do copies of this varnode happen as a side-effect - autolive_hold = 0x40000000 ///< Temporarily block dead-code removal of \b this + autolive_hold = 0x40000000, ///< Temporarily block dead-code removal of \b this + proto_partial = 0x80000000 ///< Varnode is getting PIECEd together into an (unmapped) structure }; /// Additional boolean properties on a Varnode enum addl_flags { @@ -250,6 +251,7 @@ public: bool isUnaffected(void) const { return ((flags&Varnode::unaffected)!=0); } ///< Is \b this a value that is supposed to be preserved across the function? bool isSpacebase(void) const { return ((flags&Varnode::spacebase)!=0); } ///< Is this location used to store the base point for a virtual address space? bool isReturnAddress(void) const { return ((flags&Varnode::return_address)!=0); } ///< Is this storage for a calls return address? + bool isProtoPartial(void) const { return ((flags&Varnode::proto_partial)!=0); } ///< Is \b this getting pieced together into a larger whole bool isPtrCheck(void) const { return ((addlflags&Varnode::ptrcheck)!=0); } ///< Has \b this been checked as a constant pointer to a mapped symbol? bool isPtrFlow(void) const { return ((addlflags&Varnode::ptrflow)!=0); } ///< Does this varnode flow to or from a known pointer bool isSpacebasePlaceholder(void) const { return ((addlflags&Varnode::spacebase_placeholder)!=0); } ///< Is \b this used specifically to track stackpointer values? @@ -318,6 +320,8 @@ public: void clearWriteMask(void) { addlflags &= ~Varnode::writemask; } ///< Clear the mark indicating \b this is not a true write void setAutoLiveHold(void) { flags |= Varnode::autolive_hold; } ///< Place temporary hold on dead code removal void clearAutoLiveHold(void) { flags &= ~Varnode::autolive_hold; } ///< Clear temporary hold on dead code removal + void setProtoPartial(void) { flags |= Varnode::proto_partial; } ///< Mark \b this gets pieced into larger structure + void clearProtoPartial(void) { flags &= ~Varnode::proto_partial; } ///< Clear mark indicating \b this gets pieced into larger structure void setUnsignedPrint(void) { addlflags |= Varnode::unsignedprint; } ///< Force \b this to be printed as unsigned void setLongPrint(void) { addlflags |= Varnode::longprint; } ///< Force \b this to be printed as a \e long token void setStopUpPropagation(void) { addlflags |= Varnode::stop_uppropagation; } ///< Stop up-propagation thru \b this @@ -334,6 +338,7 @@ public: bool findSubpieceShadow(int4 leastByte,const Varnode *whole,int4 recurse) const; bool findPieceShadow(int4 leastByte,const Varnode *piece) const; bool partialCopyShadow(const Varnode *op2,int4 relOff) const; ///< Is one of \b this or \b op2 a partial copy of the other? + Datatype *getStructuredType(void) const; ///< Get structure/array/union that \b this is a piece of void encode(Encoder &encoder) const; ///< Encode a description of \b this to a stream static bool comparePointers(const Varnode *a,const Varnode *b) { return (*a < *b); } ///< Compare Varnodes as pointers static void printRaw(ostream &s,const Varnode *vn); ///< Print raw info about a Varnode to stream diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/concat.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/concat.xml new file mode 100644 index 0000000000..a0658e89b8 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/concat.xml @@ -0,0 +1,81 @@ + + + + + 4883ec0889f84889f748c1e720 +4809c7e812ffffff4883c408c348c1e6 +2089f84809f0c30fb7c648c1e0104889 +c60fb7c70fb7d248c1e2204809f048c1 +e1304809d04809c8c35389f366893d4f +082000488b3d46082000e806ffffff66 +891d400820005bc389f8c3 + + + 554889e5897dec8b45 +ec8945f8c745fc00000000488b45f85d +c3 + + + + + + + + + + + +CONCAT +ZEXT +fVar1\.A = regp1; +fVar1\.B = regp2; +fVar1\.A = retp1; +fVar1\.B = retp2; +fVar1\.a = short1; +fVar1\.b = short2; +fVar1\.c = short3; +fVar1\.d = short4; +globshort\.b = replace1; +globshort\.d = replace2; +globshort\.a = +globshort\.c = +fVar1\.A = zero1; +fVar1\.B = 0; +footwo\.A = zero_two1; +footwo\.B = 0; + diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighSymbol.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighSymbol.java index e59d762362..94255200a2 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighSymbol.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighSymbol.java @@ -166,15 +166,23 @@ public class HighSymbol { /** * Associate a particular HighVariable with this symbol. This is used to link the symbol * into the decompiler's description of how a function manipulates a particular symbol. + * Multiple partial HighVariables may get associated with the same HighSymbol. The HighSymbol + * keeps a reference to the biggest HighVariable passed to this method. * @param high is the associated HighVariable */ - public void setHighVariable(HighVariable high) { - this.highVariable = high; + void setHighVariable(HighVariable high) { + if (highVariable != null) { + if (highVariable.getSize() >= high.getSize()) { + return; + } + } + highVariable = high; } /** - * Get the HighVariable associate with this symbol if any. This allows the user to go straight - * into the decompiler's function to see how the symbol gets manipulated. + * Get the HighVariable associate with this symbol if any. The symbol may have multiple + * partial HighVariables associated with it. This method returns the biggest one, which + * may not be the same size as the symbol itself. * @return the associated HighVariable or null */ public HighVariable getHighVariable() {