GP-2767 Structured pieces

This commit is contained in:
caheckman 2022-10-27 16:43:58 -04:00
parent b707c2ea6b
commit 535ac7c08d
29 changed files with 834 additions and 210 deletions

View file

@ -12,6 +12,7 @@ src/decompile/.project||GHIDRA||||END|
src/decompile/cpp/.gitignore||GHIDRA||||END| src/decompile/cpp/.gitignore||GHIDRA||||END|
src/decompile/cpp/Doxyfile||GHIDRA|||Most of this file is autogenerated by doxygen which falls under the GPL - output from GPL products are NOT GPL! - mjbell4|END| src/decompile/cpp/Doxyfile||GHIDRA|||Most of this file is autogenerated by doxygen which falls under the GPL - output from GPL products are NOT GPL! - mjbell4|END|
src/decompile/cpp/Makefile||GHIDRA||||END| src/decompile/cpp/Makefile||GHIDRA||||END|
src/decompile/datatests/concat.xml||GHIDRA||||END|
src/decompile/datatests/convert.xml||GHIDRA||||END| src/decompile/datatests/convert.xml||GHIDRA||||END|
src/decompile/datatests/deadvolatile.xml||GHIDRA||||END| src/decompile/datatests/deadvolatile.xml||GHIDRA||||END|
src/decompile/datatests/deindirect.xml||GHIDRA||||END| src/decompile/datatests/deindirect.xml||GHIDRA||||END|

View file

@ -313,7 +313,7 @@ int4 Architecture::getMinimumLanedRegisterSize(void) const
/// The default model is used whenever an explicit model is not known /// The default model is used whenever an explicit model is not known
/// or can't be determined. /// or can't be determined.
/// \param nm is the name of the model to set /// \param model is the ProtoModel object to make the default
void Architecture::setDefaultModel(ProtoModel *model) void Architecture::setDefaultModel(ProtoModel *model)
{ {

View file

@ -2830,7 +2830,7 @@ int4 ActionNameVars::apply(Funcdata &data)
/// and that it needs special printing. /// and that it needs special printing.
/// \param vn is the given Varnode /// \param vn is the given Varnode
/// \param maxref is the maximum number of references to consider before forcing explicitness /// \param maxref is the maximum number of references to consider before forcing explicitness
/// \return -1 if given Varnode should be marked explicit, the number of descendants otherwise /// \return -1 or -2 if given Varnode should be marked explicit, the number of descendants otherwise
int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref) int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref)
{ {
@ -2850,31 +2850,55 @@ int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref)
if (def->code() == CPUI_SUBPIECE) { if (def->code() == CPUI_SUBPIECE) {
Varnode *vin = def->getIn(0); Varnode *vin = def->getIn(0);
if (vin->isAddrTied()) { if (vin->isAddrTied()) {
if (vn->overlap(*vin) == def->getIn(1)->getOffset()) if (vn->overlap(*vin) == def->getIn(1)->getOffset())
return -1; // Should be explicit, will be a copymarker and not printed return -1; // Should be explicit, will be a copymarker and not printed
} }
} }
// (Part of) an addrtied location into itself is hopefully implicit PcodeOp *useOp = vn->loneDescend();
bool shouldbeimplicit = true; if (useOp == (PcodeOp *)0) return -1;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { if (useOp->code() == CPUI_INT_ZEXT) {
PcodeOp *op = *iter; Varnode *vnout = useOp->getOut();
if ((op->code()!=CPUI_INT_ZEXT)&&(op->code()!=CPUI_PIECE)) { if ((!vnout->isAddrTied())||(0!=vnout->contains(*vn)))
shouldbeimplicit = false; return -1;
break; }
} else if (useOp->code() == CPUI_PIECE) {
Varnode *vnout = op->getOut(); Varnode *rootVn = PieceNode::findRoot(vn);
if ((!vnout->isAddrTied())||(0!=vnout->contains(*vn))) { if (vn == rootVn) return -1;
shouldbeimplicit = false; Datatype *ct = rootVn->getStructuredType();
break; if (ct != (Datatype *)0) {
// Getting PIECEd into a structured thing. Unless vn is a leaf, it should be implicit
if (def->code() != CPUI_PIECE) return -1;
if (vn->loneDescend() == (PcodeOp *)0) return -1;
Varnode *vn0 = def->getIn(0);
Varnode *vn1 = def->getIn(1);
Address addr = vn->getAddr();
if (!addr.getSpace()->isBigEndian())
addr = addr + vn1->getSize();
if (addr != vn0->getAddr()) return -1;
addr = vn->getAddr();
if (addr.getSpace()->isBigEndian())
addr = addr + vn0->getSize();
if (addr != vn1->getAddr()) return -1;
// If we reach here vn is a non-leaf in a CONCAT tree and should be implicit
} }
} }
if (!shouldbeimplicit) return -1; else {
return -1;
}
} }
else if (vn->isMapped()) { else if (vn->isMapped()) {
// If NOT addrtied but is still mapped, there must be either a first use (register) mapping // If NOT addrtied but is still mapped, there must be either a first use (register) mapping
// or a dynamic mapping causing the bit to be set. In either case, it should probably be explicit // or a dynamic mapping causing the bit to be set. In either case, it should probably be explicit
return -1; return -1;
} }
else if (vn->isProtoPartial() && def->code() != CPUI_PIECE) {
// Varnode is part of structure. Write to structure should be an explicit statement
return -1;
}
else if (def->code() == CPUI_PIECE && def->getIn(0)->isProtoPartial() && !vn->isProtoPartial()) {
// The base of PIECE operations building a structure
return -1;
}
if (vn->hasNoDescend()) return -1; // Must have at least one descendant if (vn->hasNoDescend()) return -1; // Must have at least one descendant
if (def->code() == CPUI_PTRSUB) { // A dereference if (def->code() == CPUI_PTRSUB) { // A dereference
@ -5069,7 +5093,6 @@ void ActionDatabase::universalAction(Architecture *conf)
actprop->addRule( new RuleShiftAnd("analysis") ); actprop->addRule( new RuleShiftAnd("analysis") );
actprop->addRule( new RuleConcatZero("analysis") ); actprop->addRule( new RuleConcatZero("analysis") );
actprop->addRule( new RuleConcatLeftShift("analysis") ); actprop->addRule( new RuleConcatLeftShift("analysis") );
actprop->addRule( new RuleEmbed("analysis") );
actprop->addRule( new RuleSubZext("analysis") ); actprop->addRule( new RuleSubZext("analysis") );
actprop->addRule( new RuleSubCancel("analysis") ); actprop->addRule( new RuleSubCancel("analysis") );
actprop->addRule( new RuleShiftSub("analysis") ); actprop->addRule( new RuleShiftSub("analysis") );
@ -5181,6 +5204,7 @@ void ActionDatabase::universalAction(Architecture *conf)
actcleanup->addRule( new RuleSubRight("cleanup") ); actcleanup->addRule( new RuleSubRight("cleanup") );
actcleanup->addRule( new RulePtrsubCharConstant("cleanup") ); actcleanup->addRule( new RulePtrsubCharConstant("cleanup") );
actcleanup->addRule( new RuleExtensionPush("cleanup") ); actcleanup->addRule( new RuleExtensionPush("cleanup") );
actcleanup->addRule( new RulePieceStructure("cleanup") );
} }
act->addAction( actcleanup ); act->addAction( actcleanup );

View file

@ -364,7 +364,7 @@ public:
return new ActionMergeRequired(getGroup()); return new ActionMergeRequired(getGroup());
} }
virtual int4 apply(Funcdata &data) { virtual int4 apply(Funcdata &data) {
data.getMerge().mergeAddrTied(); data.getMerge().mergeMarker(); return 0; } data.getMerge().mergeAddrTied(); data.getMerge().groupPartials(); data.getMerge().mergeMarker(); return 0; }
}; };
/// \brief Try to merge an op's input Varnode to its output, if they are at the same storage location. /// \brief Try to merge an op's input Varnode to its output, if they are at the same storage location.

View file

@ -305,6 +305,12 @@ public:
virtual void decode(Decoder &decoder); virtual void decode(Decoder &decoder);
}; };
/// \brief A Symbol that forces a particular \e union field at a particular point in the body of a function
///
/// This is an internal Symbol that users can create if they want to force a particular interpretation of a
/// a \e union data-type. It attaches to data-flow via the DynamicHash mechanism, which also allows it to attach
/// to a specific read or write of the target Varnode. Different reads (or write) of the same Varnode can have
/// different symbols attached. The Symbol's associated data-type will be the desired \e union to force.
class UnionFacetSymbol : public Symbol { class UnionFacetSymbol : public Symbol {
int4 fieldNum; ///< Particular field to associate with Symbol access int4 fieldNum; ///< Particular field to associate with Symbol access
public: public:

View file

@ -963,7 +963,8 @@ public:
class UnknownProtoModel : public ProtoModel { class UnknownProtoModel : public ProtoModel {
ProtoModel *placeholderModel; ///< The model whose behavior \b this adopts as a behavior placeholder ProtoModel *placeholderModel; ///< The model whose behavior \b this adopts as a behavior placeholder
public: public:
UnknownProtoModel(const string &nm,ProtoModel *placeHold) : ProtoModel(nm,*placeHold) { placeholderModel = placeHold; } UnknownProtoModel(const string &nm,ProtoModel *placeHold) : ProtoModel(nm,*placeHold) {
placeholderModel = placeHold; } ///< Constructor
ProtoModel *getPlaceholderModel(void) const { return placeholderModel; } ///< Retrieve the placeholder model ProtoModel *getPlaceholderModel(void) const { return placeholderModel; } ///< Retrieve the placeholder model
virtual bool isUnknown(void) const { return true; } virtual bool isUnknown(void) const { return true; }
}; };

View file

@ -101,6 +101,7 @@ void Funcdata::clear(void)
clearJumpTables(); clearJumpTables();
// Do not clear overrides // Do not clear overrides
heritage.clear(); heritage.clear();
covermerge.clear();
#ifdef OPACTION_DEBUG #ifdef OPACTION_DEBUG
opactdbg_count = 0; opactdbg_count = 0;
#endif #endif

View file

@ -414,6 +414,7 @@ public:
void clearDeadOps(void) { obank.destroyDead(); } ///< Delete any dead PcodeOps void clearDeadOps(void) { obank.destroyDead(); } ///< Delete any dead PcodeOps
void remapVarnode(Varnode *vn,Symbol *sym,const Address &usepoint); void remapVarnode(Varnode *vn,Symbol *sym,const Address &usepoint);
void remapDynamicVarnode(Varnode *vn,Symbol *sym,const Address &usepoint,uint8 hash); void remapDynamicVarnode(Varnode *vn,Symbol *sym,const Address &usepoint,uint8 hash);
void linkProtoPartial(Varnode *vn); ///< Find or create Symbol and a partial mapping
Symbol *linkSymbol(Varnode *vn); ///< Find or create Symbol associated with given Varnode Symbol *linkSymbol(Varnode *vn); ///< Find or create Symbol associated with given Varnode
Symbol *linkSymbolReference(Varnode *vn); ///< Discover and attach Symbol to a constant reference Symbol *linkSymbolReference(Varnode *vn); ///< Discover and attach Symbol to a constant reference
Varnode *findLinkedVarnode(SymbolEntry *entry) const; ///< Find a Varnode matching the given Symbol mapping Varnode *findLinkedVarnode(SymbolEntry *entry) const; ///< Find a Varnode matching the given Symbol mapping

View file

@ -813,7 +813,8 @@ void Funcdata::calcNZMask(void)
/// The caller can elect to update data-type information as well, where Varnodes /// The caller can elect to update data-type information as well, where Varnodes
/// and their associated HighVariables have their data-type finalized based symbols. /// and their associated HighVariables have their data-type finalized based symbols.
/// \param lm is the Symbol scope within which to search for mapped Varnodes /// \param lm is the Symbol scope within which to search for mapped Varnodes
/// \param updataDatatypes is \b true if the caller wants to update data-types /// \param updateDatatypes is \b true if the caller wants to update data-types
/// \param unmappedAliasCheck is \b true if an alias check should be performed on unmapped Varnodes
/// \return \b true if any Varnode was updated /// \return \b true if any Varnode was updated
bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool updateDatatypes,bool unmappedAliasCheck) bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool updateDatatypes,bool unmappedAliasCheck)
@ -883,16 +884,7 @@ Datatype *Funcdata::checkSymbolType(Varnode *vn)
if (curType->getSize() == vn->getSize()) if (curType->getSize() == vn->getSize())
return (Datatype *)0; return (Datatype *)0;
int4 curOff = (vn->getAddr().getOffset() - entry->getAddr().getOffset()) + entry->getOffset(); int4 curOff = (vn->getAddr().getOffset() - entry->getAddr().getOffset()) + entry->getOffset();
// Drill down until we hit something that isn't a containing structure return glb->types->getExactPiece(curType, curOff, vn->getSize());
while(curType != (Datatype *)0 && curType->getMetatype() == TYPE_STRUCT && curType->getSize() > vn->getSize()) {
uintb newOff;
curType = curType->getSubType(curOff, &newOff);
curOff = newOff;
}
if (curType == (Datatype *)0 || curType->getSize() <= vn->getSize() || curType->getMetatype() != TYPE_UNION)
return (Datatype *)0;
// If we hit a containing union
return glb->types->getTypePartialUnion((TypeUnion *)curType, curOff, vn->getSize());
} }
/// A Varnode overlaps the given SymbolEntry. Make sure the Varnode is part of the variable /// A Varnode overlaps the given SymbolEntry. Make sure the Varnode is part of the variable
@ -1033,6 +1025,25 @@ void Funcdata::remapDynamicVarnode(Varnode *vn,Symbol *sym,const Address &usepoi
vn->setSymbolEntry(entry); vn->setSymbolEntry(entry);
} }
/// PIECE operations put the given Varnode into a larger structure. Find the resulting
/// whole Varnode, make sure it has a symbol assigned, and then assign the same symbol
/// to the given Varnode piece. If the given Varnode has been merged with something
/// else or the whole Varnode can't be found, do nothing.
void Funcdata::linkProtoPartial(Varnode *vn)
{
HighVariable *high = vn->getHigh();
if (high->getSymbol() != (Symbol *)0) return;
Varnode *rootVn = PieceNode::findRoot(vn);
if (rootVn == vn) return;
Varnode *nameRep = rootVn->getHigh()->getNameRepresentative();
Symbol *sym = linkSymbol(nameRep);
if (sym == (Symbol *)0) return;
SymbolEntry *entry = sym->getFirstWholeMap();
vn->setSymbolEntry(entry);
}
/// The Symbol is really attached to the Varnode's HighVariable (which must exist). /// The Symbol is really attached to the Varnode's HighVariable (which must exist).
/// The only reason a Symbol doesn't get set is if, the HighVariable /// The only reason a Symbol doesn't get set is if, the HighVariable
/// is global and there is no pre-existing Symbol. (see mapGlobals()) /// is global and there is no pre-existing Symbol. (see mapGlobals())
@ -1041,6 +1052,8 @@ void Funcdata::remapDynamicVarnode(Varnode *vn,Symbol *sym,const Address &usepoi
Symbol *Funcdata::linkSymbol(Varnode *vn) Symbol *Funcdata::linkSymbol(Varnode *vn)
{ {
if (vn->isProtoPartial())
linkProtoPartial(vn);
HighVariable *high = vn->getHigh(); HighVariable *high = vn->getHigh();
SymbolEntry *entry; SymbolEntry *entry;
uint4 fl = 0; uint4 fl = 0;

View file

@ -112,9 +112,18 @@ bool Merge::mergeTestRequired(HighVariable *high_out,HighVariable *high_in)
return false; // Map to different parts of same symbol return false; // Map to different parts of same symbol
} }
// Currently don't allow merging of variables that are in separate overlapping collections if (high_out->piece != (VariablePiece *)0 || high_in->piece != (VariablePiece *)0) {
if (high_out->piece != (VariablePiece *)0 && high_in->piece != (VariablePiece *)0) // Currently don't allow merging of variables that are in separate overlapping collections
return false; if (high_out->piece != (VariablePiece *)0 && high_in->piece != (VariablePiece *)0)
return false;
if (symbolIn != symbolOut) { // If we know symbols are involved, and not both the same symbol
// Treat piece as if it were a separate symbol
if (symbolIn != (Symbol *)0 && high_out->piece != (VariablePiece *)0)
return false; // effectively different symbols
if (symbolOut != (Symbol *)0 && high_in->piece != (VariablePiece *)0)
return false; // effectively different symbols
}
}
return true; return true;
} }
@ -184,7 +193,19 @@ bool Merge::mergeTestSpeculative(HighVariable *high_out,HighVariable *high_in)
return true; return true;
} }
/// \brief A test if the given Varnode can ever be merged /// \brief Test if the given Varnode that \e must be merged, \e can be merged.
///
/// If it cannot be merged, throw an exception.
/// \param vn is the given Varnode
void Merge::mergeTestMust(Varnode *vn)
{
if (vn->hasCover() && !vn->isImplied())
return;
throw LowlevelError("Cannot force merge of range");
}
/// \brief Test if the given Varnode can ever be merged.
/// ///
/// Some Varnodes (constants, annotations, implied, spacebase) are never merged with another /// Some Varnodes (constants, annotations, implied, spacebase) are never merged with another
/// Varnode. /// Varnode.
@ -196,6 +217,7 @@ bool Merge::mergeTestBasic(Varnode *vn)
if (vn == (Varnode *)0) return false; if (vn == (Varnode *)0) return false;
if (!vn->hasCover()) return false; if (!vn->hasCover()) return false;
if (vn->isImplied()) return false; if (vn->isImplied()) return false;
if (vn->isProtoPartial()) return false;
if (vn->isSpacebase()) return false; if (vn->isSpacebase()) return false;
return true; return true;
} }
@ -242,18 +264,12 @@ void Merge::mergeRangeMust(VarnodeLocSet::const_iterator startiter,VarnodeLocSet
Varnode *vn; Varnode *vn;
vn = *startiter++; vn = *startiter++;
if (!mergeTestBasic(vn)) { mergeTestMust(vn);
if (!vn->isSpacebase())
throw LowlevelError("Cannot force merge of range");
}
high = vn->getHigh(); high = vn->getHigh();
for(;startiter!=enditer;++startiter) { for(;startiter!=enditer;++startiter) {
vn = *startiter; vn = *startiter;
if (vn->getHigh() == high) continue; if (vn->getHigh() == high) continue;
if (!mergeTestBasic(vn)) { mergeTestMust(vn);
if (!vn->isSpacebase())
throw LowlevelError("Cannot force merge of range");
}
if (!merge(high,vn->getHigh(),false)) if (!merge(high,vn->getHigh(),false))
throw LowlevelError("Forced merge caused intersection"); throw LowlevelError("Forced merge caused intersection");
} }
@ -933,6 +949,19 @@ void Merge::mergeMultiEntry(void)
} }
} }
/// \brief Run through CONCAT tree roots and group each tree
///
void Merge::groupPartials(void)
{
for(int4 i=0;i<protoPartial.size();++i) {
PcodeOp *op = protoPartial[i];
if (op->isDead()) continue;
if (!op->isPartialRoot()) continue;
groupPartialRoot(op->getOut());
}
}
/// \brief Speculatively merge Varnodes that are input/output to the same p-code op /// \brief Speculatively merge Varnodes that are input/output to the same p-code op
/// ///
/// If a single p-code op has an input and output HighVariable that share the same data-type, /// If a single p-code op has an input and output HighVariable that share the same data-type,
@ -1324,6 +1353,37 @@ void Merge::processHighRedundantCopy(HighVariable *high)
} }
} }
/// \brief Group the different nodes of a CONCAT tree into a VariableGroup
///
/// This formally labels all the Varnodes in the tree as overlapping pieces of the same variable.
/// The tree is reconstructed from the root Varnode.
/// \param vn is the root Varnode
void Merge::groupPartialRoot(Varnode *vn)
{
HighVariable *high = vn->getHigh();
if (high->numInstances() != 1) return;
vector<PieceNode> pieces;
int4 baseOffset = 0;
SymbolEntry *entry = vn->getSymbolEntry();
if (entry != (SymbolEntry *)0) {
baseOffset = entry->getOffset();
}
PieceNode::gatherPieces(pieces, vn, vn->getDef(), baseOffset);
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
// Make sure each node is still marked and hasn't merged with anything else
if (!nodeVn->isProtoPartial()) return;
if (nodeVn->getHigh()->numInstances() != 1) return;
}
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
nodeVn->getHigh()->groupWith(pieces[i].getTypeOffset() - baseOffset,high);
}
}
/// \brief Try to reduce/eliminate COPYs produced by the merge trimming process /// \brief Try to reduce/eliminate COPYs produced by the merge trimming process
/// ///
/// In order to force merging of certain Varnodes, extra COPY operations may be inserted /// In order to force merging of certain Varnodes, extra COPY operations may be inserted
@ -1396,12 +1456,19 @@ void Merge::markInternalCopies(void)
h1 = op->getOut()->getHigh(); h1 = op->getOut()->getHigh();
h2 = op->getIn(0)->getHigh(); h2 = op->getIn(0)->getHigh();
h3 = op->getIn(1)->getHigh(); h3 = op->getIn(1)->getHigh();
if (!h1->isAddrTied()) break; if (!h2->isPartial()) break;
if (!h2->isAddrTied()) break; if (!h3->isPartial()) break;
if (!h3->isAddrTied()) break; v2 = h2->getPartial();
v1 = h1->getTiedVarnode(); v3 = h3->getPartial();
v2 = h2->getTiedVarnode(); if (v2->isAddrTied()) {
v3 = h3->getTiedVarnode(); if (!h1->isAddrTied()) break;
v1 = h1->getTiedVarnode();
}
else {
if (op->getIn(0) != v2) break;
if (op->getIn(1) != v3) break;
v1 = op->getOut();
}
if (v3->overlap(*v1) != 0) break; if (v3->overlap(*v1) != 0) break;
if (v2->overlap(*v1) != v3->getSize()) break; if (v2->overlap(*v1) != v3->getSize()) break;
data.opMarkNonPrinting(op); data.opMarkNonPrinting(op);
@ -1409,10 +1476,16 @@ void Merge::markInternalCopies(void)
case CPUI_SUBPIECE: case CPUI_SUBPIECE:
h1 = op->getOut()->getHigh(); h1 = op->getOut()->getHigh();
h2 = op->getIn(0)->getHigh(); h2 = op->getIn(0)->getHigh();
if (!h1->isAddrTied()) break; if (!h1->isPartial()) break;
if (!h2->isAddrTied()) break; v1 = h1->getPartial();
v1 = h1->getTiedVarnode(); if (v1->isAddrTied()) {
v2 = h2->getTiedVarnode(); if (!h2->isAddrTied()) break;
v2 = h2->getTiedVarnode();
}
else {
if (!h1->sameGroup(h2)) break;
v2 = op->getIn(0);
}
val = op->getIn(1)->getOffset(); val = op->getIn(1)->getOffset();
if (v1->overlap(*v2) != val) break; if (v1->overlap(*v2) != val) break;
data.opMarkNonPrinting(op); data.opMarkNonPrinting(op);
@ -1432,6 +1505,17 @@ void Merge::markInternalCopies(void)
#endif #endif
} }
/// \brief Register an unmapped CONCAT stack with the merge process
///
/// The given Varnode must be the root of a tree of CPUI_PIECE operations as produced by
/// PieceNode::gatherPieces. These will be grouped together into a single variable.
/// \param vn is the given root Varnode
void Merge::registerProtoPartialRoot(Varnode *vn)
{
protoPartial.push_back(vn->getDef());
}
/// \brief Translate any intersection tests for \e high2 into tests for \e high1 /// \brief Translate any intersection tests for \e high2 into tests for \e high1
/// ///
/// The two variables will be merged and \e high2, as an object, will be freed. /// The two variables will be merged and \e high2, as an object, will be freed.
@ -1548,6 +1632,17 @@ void Merge::purgeHigh(HighVariable *high)
highedgemap.erase(iterfirst,iterlast); highedgemap.erase(iterfirst,iterlast);
} }
/// \brief Clear the any cached data from the last merge process
///
/// Free up resources used by cached intersection tests etc.
void Merge::clear(void)
{
highedgemap.clear();
copyTrims.clear();
protoPartial.clear();
}
/// \brief Test the intersection of two HighVariables and cache the result /// \brief Test the intersection of two HighVariables and cache the result
/// ///
/// If the Covers of the two variables intersect, this routine returns \b true. To avoid /// If the Covers of the two variables intersect, this routine returns \b true. To avoid

View file

@ -81,6 +81,7 @@ class Merge {
Funcdata &data; ///< The function containing the Varnodes to be merged Funcdata &data; ///< The function containing the Varnodes to be merged
map<HighEdge,bool> highedgemap; ///< A cache of intersection tests, sorted by HighVariable pair map<HighEdge,bool> highedgemap; ///< A cache of intersection tests, sorted by HighVariable pair
vector<PcodeOp *> copyTrims; ///< COPY ops inserted to facilitate merges vector<PcodeOp *> copyTrims; ///< COPY ops inserted to facilitate merges
vector<PcodeOp *> protoPartial; ///< Roots of unmapped CONCAT trees
bool updateHigh(HighVariable *a); ///< Make sure given HighVariable's Cover is up-to-date bool updateHigh(HighVariable *a); ///< Make sure given HighVariable's Cover is up-to-date
void purgeHigh(HighVariable *high); ///< Remove cached intersection tests for a given HighVariable void purgeHigh(HighVariable *high); ///< Remove cached intersection tests for a given HighVariable
static void gatherBlockVarnodes(HighVariable *a,int4 blk,const Cover &cover,vector<Varnode *> &res); static void gatherBlockVarnodes(HighVariable *a,int4 blk,const Cover &cover,vector<Varnode *> &res);
@ -89,6 +90,7 @@ class Merge {
static bool mergeTestRequired(HighVariable *high_out,HighVariable *high_in); static bool mergeTestRequired(HighVariable *high_out,HighVariable *high_in);
static bool mergeTestAdjacent(HighVariable *high_out,HighVariable *high_in); static bool mergeTestAdjacent(HighVariable *high_out,HighVariable *high_in);
static bool mergeTestSpeculative(HighVariable *high_out,HighVariable *high_in); static bool mergeTestSpeculative(HighVariable *high_out,HighVariable *high_in);
static void mergeTestMust(Varnode *vn);
static bool mergeTestBasic(Varnode *vn); static bool mergeTestBasic(Varnode *vn);
static void findSingleCopy(HighVariable *high,vector<Varnode *> &singlelist); static void findSingleCopy(HighVariable *high,vector<Varnode *> &singlelist);
static bool compareHighByBlock(const HighVariable *a,const HighVariable *b); static bool compareHighByBlock(const HighVariable *a,const HighVariable *b);
@ -116,8 +118,10 @@ class Merge {
void markRedundantCopies(HighVariable *high,vector<PcodeOp *> &copy,int4 pos,int4 size); void markRedundantCopies(HighVariable *high,vector<PcodeOp *> &copy,int4 pos,int4 size);
void processHighDominantCopy(HighVariable *high); void processHighDominantCopy(HighVariable *high);
void processHighRedundantCopy(HighVariable *high); void processHighRedundantCopy(HighVariable *high);
void groupPartialRoot(Varnode *vn);
public: public:
Merge(Funcdata &fd) : data(fd) {} ///< Construct given a specific function Merge(Funcdata &fd) : data(fd) {} ///< Construct given a specific function
void clear(void);
bool intersection(HighVariable *a,HighVariable *b); bool intersection(HighVariable *a,HighVariable *b);
bool inflateTest(Varnode *a,HighVariable *high); bool inflateTest(Varnode *a,HighVariable *high);
void inflate(Varnode *a,HighVariable *high); void inflate(Varnode *a,HighVariable *high);
@ -127,11 +131,13 @@ public:
void mergeByDatatype(VarnodeLocSet::const_iterator startiter,VarnodeLocSet::const_iterator enditer); void mergeByDatatype(VarnodeLocSet::const_iterator startiter,VarnodeLocSet::const_iterator enditer);
void mergeAddrTied(void); void mergeAddrTied(void);
void mergeMarker(void); void mergeMarker(void);
void groupPartials(void);
void mergeAdjacent(void); void mergeAdjacent(void);
void mergeMultiEntry(void); void mergeMultiEntry(void);
bool hideShadows(HighVariable *high); bool hideShadows(HighVariable *high);
void processCopyTrims(void); void processCopyTrims(void);
void markInternalCopies(void); void markInternalCopies(void);
void registerProtoPartialRoot(Varnode *vn);
#ifdef MERGEMULTI_DEBUG #ifdef MERGEMULTI_DEBUG
void verifyHighCovers(void); void verifyHighCovers(void);
#endif #endif

View file

@ -750,6 +750,90 @@ int4 PcodeOp::compareOrder(const PcodeOp *bop) const
return 0; return 0;
} }
/// \brief Determine if a Varnode is a leaf within the CONCAT tree rooted at the given Varnode
///
/// The CONCAT tree is the maximal set of Varnodes that are all inputs to CPUI_PIECE operations,
/// with no other uses, and that all ultimately flow to the root Varnode. This method tests
/// whether a Varnode is a leaf of this tree.
/// \param rootVn is the given root of the CONCAT tree
/// \param vn is the Varnode to test as a leaf
/// \param typeOffset is byte offset of the test Varnode within fully concatenated value
/// \return \b true is the test Varnode is a leaf of the tree
bool PieceNode::isLeaf(Varnode *rootVn,Varnode *vn,int4 typeOffset)
{
if (vn->isMapped() && rootVn->getSymbolEntry() != vn->getSymbolEntry()) {
return true;
}
if (!vn->isWritten()) return true;
PcodeOp *def = vn->getDef();
if (def->code() != CPUI_PIECE) return true;
PcodeOp *op = vn->loneDescend();
if (op == (PcodeOp *)0) return true;
if (vn->isAddrTied()) {
Address addr = rootVn->getAddr() + typeOffset;
if (vn->getAddr() != addr) return true;
}
return false;
}
/// Find the root of the CONCAT tree of Varnodes marked either isProtoPartial() or isAddrTied().
/// This will be the maximal Varnode that containing the given Varnode (as storage), with a
/// backward path to it through PIECE operations. All Varnodes along the path, except the root, will be
/// marked as isProtoPartial() or isAddrTied().
/// \return the root of the CONCAT tree
Varnode *PieceNode::findRoot(Varnode *vn)
{
while(vn->isProtoPartial() || vn->isAddrTied()) {
list<PcodeOp *>::const_iterator iter = vn->beginDescend();
PcodeOp *pieceOp = (PcodeOp *)0;
while(iter != vn->endDescend()) {
PcodeOp *op = *iter;
++iter;
if (op->code() != CPUI_PIECE) continue;
int4 slot = op->getSlot(vn);
Address addr = op->getOut()->getAddr();
if (addr.getSpace()->isBigEndian() == (slot == 1))
addr = addr + op->getIn(1-slot)->getSize();
if (addr == vn->getAddr()) {
if (pieceOp != (PcodeOp *)0) { // If there is more than one valid PIECE
if (op->compareOrder(pieceOp)) // Attach this to earliest one
pieceOp = op;
}
else
pieceOp = op;
}
}
if (pieceOp == (PcodeOp *)0)
break;
vn = pieceOp->getOut();
}
return vn;
}
/// \brief Build the CONCAT tree rooted at the given Varnode
///
/// Recursively walk backwards from the root through CPUI_PIECE operations, stopping if a Varnode
/// is deemed a leaf. Collect all Varnodes involved in the tree in a list. For each Varnode in the tree,
/// record whether it is leaf and also calculate its offset within the data-type attached to the root.
/// \param stack holds the markup for each node of the tree
/// \param rootVn is the given root of the tree
/// \param op is the current PIECE op to explore as part of the tree
/// \param baseOffset is the offset associated with the output of the current PIECE op
void PieceNode::gatherPieces(vector<PieceNode> &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset)
{
for(int4 i=0;i<2;++i) {
Varnode *vn = op->getIn(i);
int4 offset = (rootVn->getSpace()->isBigEndian() == (i==1)) ? baseOffset + op->getIn(1-i)->getSize() : baseOffset;
bool res = isLeaf(rootVn,vn,offset);
stack.emplace_back(op,i,offset,res);
if (!res)
gatherPieces(stack,rootVn,vn->getDef(),offset);
}
}
/// Add the PcodeOp to the list of ops with the same op-code. Currently only certain /// Add the PcodeOp to the list of ops with the same op-code. Currently only certain
/// op-codes have a dedicated list. /// op-codes have a dedicated list.
/// \param op is the given PcodeOp /// \param op is the given PcodeOp

View file

@ -111,7 +111,8 @@ public:
incidental_copy = 0x10, ///< Treat this as \e incidental for parameter recovery algorithms incidental_copy = 0x10, ///< Treat this as \e incidental for parameter recovery algorithms
is_cpool_transformed = 0x20, ///< Have we checked for cpool transforms is_cpool_transformed = 0x20, ///< Have we checked for cpool transforms
stop_type_propagation = 0x40, ///< Stop data-type propagation into output from descendants stop_type_propagation = 0x40, ///< Stop data-type propagation into output from descendants
hold_output = 0x80 ///< Output varnode (of call) should not be removed if it is unread hold_output = 0x80, ///< Output varnode (of call) should not be removed if it is unread
concat_root = 0x100 ///< Output of \b this is root of a CONCAT tree
}; };
private: private:
TypeOp *opcode; ///< Pointer to class providing behavioral details of the operation TypeOp *opcode; ///< Pointer to class providing behavioral details of the operation
@ -212,6 +213,8 @@ public:
void clearStopTypePropagation(void) { addlflags &= ~stop_type_propagation; } ///< Allow data-type propagation from below void clearStopTypePropagation(void) { addlflags &= ~stop_type_propagation; } ///< Allow data-type propagation from below
bool holdOutput(void) const { return ((addlflags&hold_output)!=0); } ///< If \b true, do not remove output as dead code bool holdOutput(void) const { return ((addlflags&hold_output)!=0); } ///< If \b true, do not remove output as dead code
void setHoldOutput(void) { addlflags |= hold_output; } ///< Prevent output from being removed as dead code void setHoldOutput(void) { addlflags |= hold_output; } ///< Prevent output from being removed as dead code
bool isPartialRoot(void) const { return ((addlflags&concat_root)!=0); } ///< Output is root of CONCAT tree
void setPartialRoot(void) { addlflags |= concat_root; } ///< Mark \b this as root of CONCAT tree
bool stopsCopyPropagation(void) const { return ((flags&no_copy_propagation)!=0); } ///< Does \b this allow COPY propagation bool stopsCopyPropagation(void) const { return ((flags&no_copy_propagation)!=0); } ///< Does \b this allow COPY propagation
void setStopCopyPropagation(void) { flags |= no_copy_propagation; } ///< Stop COPY propagation through inputs void setStopCopyPropagation(void) { flags |= no_copy_propagation; } ///< Stop COPY propagation through inputs
/// \brief Return \b true if this LOADs or STOREs from a dynamic \e spacebase pointer /// \brief Return \b true if this LOADs or STOREs from a dynamic \e spacebase pointer
@ -251,6 +254,30 @@ struct PcodeOpNode {
PcodeOpNode(PcodeOp *o,int4 s) { op = o; slot = s; } ///< Constructor PcodeOpNode(PcodeOp *o,int4 s) { op = o; slot = s; } ///< Constructor
}; };
/// \brief A node in a tree structure of CPUI_PIECE operations
///
/// If a group of Varnodes are concatenated into a larger structure, this object is used to explicitly gather
/// the PcodeOps (and Varnodes) in the data-flow and view them as a unit. In a properly formed tree, for each
/// CPUI_PIECE operation, the addresses of the input Varnodes and the output Varnode align according to the
/// concatenation. Internal Varnodes can have only one descendant, but the leaf and the root Varnodes
/// can each have multiple descendants
class PieceNode {
PcodeOp *pieceOp; ///< CPUI_PIECE operation combining this particular Varnode piece
int4 slot; ///< The particular slot of this Varnode within CPUI_PIECE
int4 typeOffset; ///< Byte offset into structure/array
bool leaf; ///< \b true if this is a leaf of the tree structure
public:
PieceNode(PcodeOp *op,int4 sl,int4 off,bool l) { pieceOp=op; slot=sl; typeOffset=off; leaf = l; } ///< Constructor
bool isLeaf(void) const { return leaf; } ///< Return \b true if \b this node is a leaf of the tree structure
int4 getTypeOffset(void) const { return typeOffset; } ///< Get the byte offset of \b this node into the data-type
int4 getSlot(void) const { return slot; } ///< Get the input slot associated with \b this node
PcodeOp *getOp(void) const { return pieceOp; } ///< Get the PcodeOp reading \b this piece
Varnode *getVarnode(void) const { return pieceOp->getIn(slot); } ///< Get the Varnode representing \b this piece
static bool isLeaf(Varnode *rootVn,Varnode *vn,int4 typeOffset);
static Varnode *findRoot(Varnode *vn);
static void gatherPieces(vector<PieceNode> &stack,Varnode *rootVn,PcodeOp *op,int4 baseOffset);
};
/// A map from sequence number (SeqNum) to PcodeOp /// A map from sequence number (SeqNum) to PcodeOp
typedef map<SeqNum,PcodeOp *> PcodeOpTree; typedef map<SeqNum,PcodeOp *> PcodeOpTree;

View file

@ -758,14 +758,29 @@ void PrintC::opSubpiece(const PcodeOp *op)
{ {
if (op->doesSpecialPrinting()) { // Special printing means it is a field extraction if (op->doesSpecialPrinting()) { // Special printing means it is a field extraction
int4 offset; const Varnode *vn = op->getIn(0);
Datatype *ct; Datatype *ct = vn->getHighTypeReadFacing(op);
const TypeField *field = TypeOpSubpiece::testExtraction(true, op, ct, offset); if (ct->isPieceStructured()) {
if (field != (const TypeField *)0 && offset == 0) { int4 offset;
pushOp(&object_member,op); int4 byteOff = TypeOpSubpiece::computeByteOffsetForComposite(op);
pushVn(op->getIn(0), op, mods); const TypeField *field = ct->findTruncation(byteOff,op->getOut()->getSize(),op,1,offset); // Use artificial slot
pushAtom(Atom(field->name,fieldtoken,EmitMarkup::no_color,ct,field->ident,op)); if (field != (const TypeField*)0 && offset == 0) { // A formal structure field
return; pushOp(&object_member,op);
pushVn(vn,op,mods);
pushAtom(Atom(field->name,fieldtoken,EmitMarkup::no_color,ct,field->ident,op));
return;
}
else if (vn->isExplicit() && vn->getHigh()->getSymbolOffset() == -1) { // An explicit, entire, structured object
Symbol *sym = vn->getHigh()->getSymbol();
if (sym != (Symbol *)0) {
int4 sz = op->getOut()->getSize();
int4 off = (int4)op->getIn(1)->getOffset();
off = vn->getSpace()->isBigEndian() ? vn->getSize() - (sz + off) : off;
pushPartialSymbol(sym, off, sz, vn, op, -1);
return;
}
}
// Fall thru to functional printing
} }
} }
if (castStrategy->isSubpieceCast(op->getOut()->getHighTypeDefFacing(), if (castStrategy->isSubpieceCast(op->getOut()->getHighTypeDefFacing(),
@ -1930,13 +1945,9 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz,
stack.emplace_back(); stack.emplace_back();
PartialSymbolEntry &entry(stack.back()); PartialSymbolEntry &entry(stack.back());
entry.token = &object_member; entry.token = &object_member;
ostringstream s;
if (sz == 0) if (sz == 0)
sz = ct->getSize() - off; sz = ct->getSize() - off;
// Special notation for subpiece which is neither entry.fieldname = unnamedField(off, sz); // If nothing else works, generate artificial field name
// array entry nor struct field
s << '_' << dec << off << '_' << sz << '_';
entry.fieldname = s.str();
entry.field = (const TypeField *)0; entry.field = (const TypeField *)0;
entry.hilite = EmitMarkup::no_color; entry.hilite = EmitMarkup::no_color;
ct = (Datatype *)0; ct = (Datatype *)0;

View file

@ -693,6 +693,19 @@ void PrintLanguage::setIntegerFormat(const string &nm)
mods |= mod; // Set any new force mods |= mod; // Set any new force
} }
/// This is used if a value is extracted from a structured data-type, but the natural name is not available.
/// An artificial name is generated given just the offset into the data-type and the size in bytes.
/// \param off is the byte offset into the data-type
/// \param size is the number of bytes in the extracted value
/// \return a string describing the artificial field
string PrintLanguage::unnamedField(int4 off,int4 size)
{
ostringstream s;
s << '_' << dec << off << '_' << size << '_';
return s.str();
}
/// Count '0' and '9' digits base 10. Count '0' and 'f' digits base 16. /// Count '0' and '9' digits base 10. Count '0' and 'f' digits base 16.
/// The highest count is the preferred base. /// The highest count is the preferred base.
/// \param val is the given integer /// \param val is the given integer

View file

@ -554,6 +554,7 @@ public:
virtual void opInsertOp(const PcodeOp *op)=0; ///< Emit an INSERT operator virtual void opInsertOp(const PcodeOp *op)=0; ///< Emit an INSERT operator
virtual void opExtractOp(const PcodeOp *op)=0; ///< Emit an EXTRACT operator virtual void opExtractOp(const PcodeOp *op)=0; ///< Emit an EXTRACT operator
virtual void opPopcountOp(const PcodeOp *op)=0; ///< Emit a POPCOUNT operator virtual void opPopcountOp(const PcodeOp *op)=0; ///< Emit a POPCOUNT operator
virtual string unnamedField(int4 off,int4 size); ///< Generate an artificial field name
static int4 mostNaturalBase(uintb val); ///< Determine the most natural base for an integer static int4 mostNaturalBase(uintb val); ///< Determine the most natural base for an integer
static void formatBinary(ostream &s,uintb val); ///< Print a number in binary form static void formatBinary(ostream &s,uintb val); ///< Print a number in binary form

View file

@ -4964,89 +4964,6 @@ int4 RuleHumptyOr::applyOp(PcodeOp *op,Funcdata &data)
return 1; return 1;
} }
/// \class RuleEmbed
/// \brief Simplify PIECE intended as embedding: `concat(V, sub(W,0)) => W & 0xff | (zext(W) << 8)`
///
/// There is a complementary form:
/// `concat(sub(V,c),W) => (V & 0xff00) | zext(W)`
void RuleEmbed::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_PIECE);
}
int4 RuleEmbed::applyOp(PcodeOp *op,Funcdata &data)
{
// Beware of humpty dumpty
Varnode *a,*subout,*x;
PcodeOp *subop;
int4 i;
if (op->getOut()->getSize() > sizeof(uintb)) return 0; // FIXME: Can't exceed uintb precision
for(i=0;i<2;++i) {
subout = op->getIn(i);
if (!subout->isWritten()) continue;
subop = subout->getDef();
if (subop->code() != CPUI_SUBPIECE) continue;
int4 c = subop->getIn(1)->getOffset();
a = subop->getIn(0);
if (a->isFree()) continue;
if (a->getSize() != op->getOut()->getSize()) continue;
x = op->getIn(1-i);
if (x->isFree()) continue;
if (i==0) {
if (subout->getSize()+c != a->getSize()) continue; // Not hi SUB
}
else {
if (c != 0) continue; // Not lo SUB
}
if (x->isWritten()) { // Check for humptydumpty
PcodeOp *othersub = x->getDef();
if (othersub->code() == CPUI_SUBPIECE) {
if (othersub->getIn(0)==a) {
int4 d = othersub->getIn(1)->getOffset();
if ((i==0)&&(d==0)) continue;
if ((i==1)&&(d==subout->getSize())) continue;
}
}
}
uintb mask = calc_mask(subout->getSize());
mask <<= 8*c;
// Construct mask
PcodeOp *andop = data.newOp(2,op->getAddr());
data.opSetOpcode(andop,CPUI_INT_AND);
data.newUniqueOut(a->getSize(),andop);
data.opSetInput(andop,a,0);
data.opSetInput(andop,data.newConstant(a->getSize(),mask),1);
data.opInsertBefore(andop,op);
// Extend x
PcodeOp *extop = data.newOp(1,op->getAddr());
data.opSetOpcode(extop,CPUI_INT_ZEXT);
data.newUniqueOut(a->getSize(),extop);
data.opSetInput(extop,x,0);
data.opInsertBefore(extop,op);
x = extop->getOut();
if (i==1) { // Shift x into position
PcodeOp *shiftop = data.newOp(2,op->getAddr());
data.opSetOpcode(shiftop,CPUI_INT_LEFT);
data.newUniqueOut(a->getSize(),shiftop);
data.opSetInput(shiftop,x,0);
data.opSetInput(shiftop,data.newConstant(4,8*subout->getSize()),1);
data.opInsertBefore(shiftop,op);
x = shiftop->getOut();
}
data.opSetOpcode(op,CPUI_INT_OR);
data.opSetInput(op,andop->getOut(),0);
data.opSetInput(op,x,1);
return 1;
}
return 0;
}
/// \class RuleSwitchSingle /// \class RuleSwitchSingle
/// \brief Convert BRANCHIND with only one computed destination to a BRANCH /// \brief Convert BRANCHIND with only one computed destination to a BRANCH
void RuleSwitchSingle::getOpList(vector<uint4> &oplist) const void RuleSwitchSingle::getOpList(vector<uint4> &oplist) const
@ -6696,12 +6613,9 @@ void RuleSubRight::getOpList(vector<uint4> &oplist) const
int4 RuleSubRight::applyOp(PcodeOp *op,Funcdata &data) int4 RuleSubRight::applyOp(PcodeOp *op,Funcdata &data)
{ {
Datatype *parent;
int4 offset;
if (op->doesSpecialPrinting()) if (op->doesSpecialPrinting())
return 0; return 0;
if (TypeOpSubpiece::testExtraction(false, op, parent, offset) != (const TypeField *)0) { if (op->getIn(0)->getTypeReadFacing(op)->isPieceStructured()) {
data.opMarkSpecialPrint(op); // Print this as a field extraction data.opMarkSpecialPrint(op); // Print this as a field extraction
return 0; return 0;
} }
@ -6899,6 +6813,251 @@ int4 RuleExtensionPush::applyOp(PcodeOp *op,Funcdata &data)
return 1; return 1;
} }
/// \brief Find the base structure or array data-type that the given Varnode is part of
///
/// If the Varnode's data-type is already a structure or array, return that data-type.
/// If the Varnode is part of a known symbol, use that data-type.
/// The starting byte offset of the given Varnode within the structure or array is passed back.
/// \param vn is the given Varnode
/// \param baseOffset is used to pass back the starting offset
/// \return the structure or array data-type, or null otherwise
Datatype *RulePieceStructure::determineDatatype(Varnode *vn,int4 &baseOffset)
{
Datatype *ct = vn->getStructuredType();
if (ct == (Datatype *)0)
return ct;
if (ct->getSize() != vn->getSize()) { // vn is a partial
SymbolEntry *entry = vn->getSymbolEntry();
baseOffset = vn->getAddr().overlap(0,entry->getAddr(),ct->getSize());
if (baseOffset < 0)
return (Datatype*)0;
baseOffset += entry->getOffset();
// Find concrete sub-type that matches the size of the Varnode
Datatype *subType = ct;
uintb subOffset = baseOffset;
while(subType != (Datatype *)0 && subType->getSize() > vn->getSize()) {
subType = subType->getSubType(subOffset, &subOffset);
}
if (subType != (Datatype *)0 && subType->getSize() == vn->getSize() && subOffset == 0) {
// If there is a concrete sub-type
if (!subType->isPieceStructured()) // and the concrete sub-type is not a structured type itself
return (Datatype *)0; // don't split out CONCAT forming the sub-type
}
}
else {
baseOffset = 0;
}
return ct;
}
/// \brief For a structured data-type, determine if the given range spans multiple elements
///
/// Return true unless the range falls within a single non-structured element.
/// \param ct is the structured data-type
/// \param offset is the start of the given range
/// \param size is the number of bytes in the range
/// \return \b true if the range spans multiple elements
bool RulePieceStructure::spanningRange(Datatype *ct,int4 offset,int4 size)
{
if (offset + size > ct->getSize()) return false;
uintb newOff = offset;
for(;;) {
ct = ct->getSubType(newOff, &newOff);
if (ct == (Datatype *)0) return true; // Don't know what it spans, assume multiple
if ((int4)newOff + size > ct->getSize()) return true; // Spans more than 1
if (!ct->isPieceStructured()) break;
}
return false;
}
/// \brief Convert an INT_ZEXT operation to a PIECE with a zero constant as the first parameter
///
/// The caller provides a parent data-type and an offset into it corresponding to the \e output of the INT_ZEXT.
/// The op is converted to a PIECE with a 0 Varnode, which will be assigned a data-type based on
/// the parent data-type and a computed offset.
/// \param zext is the INT_ZEXT operation
/// \param ct is the parent data-type
/// \param offset is the byte offset of the \e output within the parent data-type
/// \param data is the function containing the operation
/// \return true if the INT_ZEXT was successfully converted
bool RulePieceStructure::convertZextToPiece(PcodeOp *zext,Datatype *ct,int4 offset,Funcdata &data)
{
Varnode *outvn = zext->getOut();
Varnode *invn = zext->getIn(0);
if (invn->isConstant()) return false;
int4 sz = outvn->getSize() - invn->getSize();
if (sz > sizeof(uintb)) return false;
offset += outvn->getSpace()->isBigEndian() ? 0 : invn->getSize();
uintb newOff = offset;
while(ct != (Datatype *)0 && ct->getSize() > sz) {
ct = ct->getSubType(newOff, &newOff);
}
Varnode *zerovn = data.newConstant(sz, 0);
if (ct != (Datatype *)0 && ct->getSize() == sz)
zerovn->updateType(ct, false, false);
data.opSetOpcode(zext, CPUI_PIECE);
data.opInsertInput(zext, zerovn, 0);
if (invn->getType()->needsResolution())
data.inheritResolution(invn->getType(), zext, 1, zext, 0); // Transfer invn's resolution to slot 1
return true;
}
/// \brief Search for leaves in the CONCAT tree defined by an INT_ZEXT operation and convert them to PIECE
///
/// The CONCAT tree can be extended through an INT_ZEXT, if the extensions output crosses multiple fields of
/// the parent data-type. We check this and replace the INT_ZEXT with PIECE if appropriate.
/// \param stack is the node container for the CONCAT tree
/// \param structuredType is the parent data-type for the tree
/// \param data is the function containing the tree
/// \return \b true if any INT_ZEXT replacement was performed
bool RulePieceStructure::findReplaceZext(vector<PieceNode> &stack,Datatype *structuredType,Funcdata &data)
{
bool change = false;
for(int4 i=0;i<stack.size();++i) {
PieceNode &node(stack[i]);
if (!node.isLeaf()) continue;
Varnode *vn = node.getVarnode();
if (!vn->isWritten()) continue;
PcodeOp *op = vn->getDef();
if (op->code() != CPUI_INT_ZEXT) continue;
if (!spanningRange(structuredType,node.getTypeOffset(),vn->getSize())) continue;
if (convertZextToPiece(op,structuredType,node.getTypeOffset(),data))
change = true;
}
return change;
}
/// \brief Return \b true if the two given \b root and \b leaf should be part of different symbols
///
/// A leaf in a CONCAT tree can be in a separate from the root if it is a parameter or a separate root.
/// \param root is the root of the CONCAT tree
/// \param leaf is the given leaf Varnode
/// \return \b true if the two Varnodes should be in different symbols
bool RulePieceStructure::separateSymbol(Varnode *root,Varnode *leaf)
{
if (root->getSymbolEntry() != leaf->getSymbolEntry()) return true; // Forced to be different symbols
if (root->isAddrTied()) return false;
if (!leaf->isWritten()) return true; // Assume to be different symbols
if (leaf->isProtoPartial()) return true; // Already in another tree
PcodeOp *op = leaf->getDef();
if (op->code() != CPUI_PIECE) return false;
if (leaf->getType()->isPieceStructured()) return true; // Would be a separate root
return false;
}
/// \class RulePieceStructure
/// \brief Concatenating structure pieces gets printed as explicit write statements
///
/// Set properties so that a CONCAT expression like `v = CONCAT(CONCAT(v1,v2),CONCAT(v3,v4))` gets
/// rendered as a sequence of separate write statements. `v.field1 = v1; v.field2 = v2; v.field3 = v3; v.field4 = v4;`
void RulePieceStructure::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_PIECE);
oplist.push_back(CPUI_INT_ZEXT);
}
int4 RulePieceStructure::applyOp(PcodeOp *op,Funcdata &data)
{
if (op->isPartialRoot()) return 0; // Check if CONCAT tree already been visited
Varnode *outvn = op->getOut();
int4 baseOffset;
Datatype *ct = determineDatatype(outvn, baseOffset);
if (ct == (Datatype *)0) return 0;
if (op->code() == CPUI_INT_ZEXT) {
if (convertZextToPiece(op,outvn->getType(),0,data))
return 1;
return 0;
}
// Check if outvn is really the root of the tree
PcodeOp *zext = outvn->loneDescend();
if (zext != (PcodeOp*)0) {
if (zext->code() == CPUI_PIECE)
return 0; // More PIECEs below us, not a root
if (zext->code() == CPUI_INT_ZEXT) {
// Extension of a structured data-type, convert extension to PIECE first
if (convertZextToPiece(zext,zext->getOut()->getType(),0,data))
return 1;
return 0;
}
}
vector<PieceNode> stack;
for(;;) {
PieceNode::gatherPieces(stack, outvn, op, baseOffset);
if (!findReplaceZext(stack, ct, data)) // Check for INT_ZEXT leaves that need to be converted to PIECEs
break;
stack.clear(); // If we found some, regenerate the tree
}
op->setPartialRoot();
bool anyAddrTied = outvn->isAddrTied();
Address baseAddr = outvn->getAddr() - baseOffset;
for(int4 i=0;i<stack.size();++i) {
PieceNode &node(stack[i]);
Varnode *vn = node.getVarnode();
Address addr = baseAddr + node.getTypeOffset();
if (vn->getAddr() == addr) {
if (!node.isLeaf() || !separateSymbol(outvn, vn)) {
// Varnode already has correct address and will be part of the same symbol as root
// so we don't need to change the storage or insert a COPY
if (!vn->isAddrTied() && !vn->isProtoPartial()) {
vn->setProtoPartial();
}
anyAddrTied = anyAddrTied || vn->isAddrTied();
continue;
}
}
if (node.isLeaf()) {
PcodeOp *copyOp = data.newOp(1,node.getOp()->getAddr());
Varnode *newVn = data.newVarnodeOut(vn->getSize(), addr, copyOp);
anyAddrTied = anyAddrTied || newVn->isAddrTied(); // Its possible newVn is addrtied, even if vn isn't
Datatype *newType = data.getArch()->types->getExactPiece(ct, node.getTypeOffset(), vn->getSize());
if (newType == (Datatype *)0)
newType = vn->getType();
newVn->updateType(newType, false, false);
data.opSetOpcode(copyOp, CPUI_COPY);
data.opSetInput(copyOp, vn, 0);
data.opSetInput(node.getOp(),newVn,node.getSlot());
data.opInsertBefore(copyOp, node.getOp());
if (vn->getType()->needsResolution()) {
// Inherit PIECE's read resolution for COPY's read
data.inheritResolution(vn->getType(), copyOp, 0, node.getOp(), node.getSlot());
}
if (newType->needsResolution()) {
newType->resolveInFlow(copyOp, -1); // If the piece represents part of a union, resolve it
}
if (!newVn->isAddrTied())
newVn->setProtoPartial();
}
else {
// Reaching here we know vn is NOT addrtied and has a lone descendant
// We completely replace the Varnode with one having the correct storage
PcodeOp *defOp = vn->getDef();
PcodeOp *loneOp = vn->loneDescend();
int4 slot = loneOp->getSlot(vn);
Varnode *newVn = data.newVarnode(vn->getSize(), addr, vn->getType());
data.opSetOutput(defOp, newVn);
data.opSetInput(loneOp, newVn, slot);
data.deleteVarnode(vn);
if (!newVn->isAddrTied())
newVn->setProtoPartial();
}
}
if (!anyAddrTied)
data.getMerge().registerProtoPartialRoot(outvn);
return 1;
}
/// \class RuleSubNormal /// \class RuleSubNormal
/// \brief Pull-back SUBPIECE through INT_RIGHT and INT_SRIGHT /// \brief Pull-back SUBPIECE through INT_RIGHT and INT_SRIGHT
/// ///
@ -8142,7 +8301,7 @@ Varnode *RuleSignMod2nOpt2::checkSignExtForm(PcodeOp *op)
/// \brief Verify an \e if block like `V = (V s< 0) ? V + 2^n-1 : V` /// \brief Verify an \e if block like `V = (V s< 0) ? V + 2^n-1 : V`
/// ///
/// \param op is the MULTIEQUAL /// \param op is the MULTIEQUAL
/// \param npos is the constant 2^n /// \param npow is the constant 2^n
/// \return the Varnode V in the form, or null if the form doesn't match /// \return the Varnode V in the form, or null if the form doesn't match
Varnode *RuleSignMod2nOpt2::checkMultiequalForm(PcodeOp *op,uintb npow) Varnode *RuleSignMod2nOpt2::checkMultiequalForm(PcodeOp *op,uintb npow)

View file

@ -940,16 +940,6 @@ public:
virtual void getOpList(vector<uint4> &oplist) const; virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data); virtual int4 applyOp(PcodeOp *op,Funcdata &data);
}; };
class RuleEmbed : public Rule {
public:
RuleEmbed(const string &g) : Rule(g, 0, "embed") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleEmbed(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSwitchSingle : public Rule { class RuleSwitchSingle : public Rule {
public: public:
RuleSwitchSingle(const string &g) : Rule(g,0,"switchsingle") {} ///< Constructor RuleSwitchSingle(const string &g) : Rule(g,0,"switchsingle") {} ///< Constructor
@ -1155,6 +1145,23 @@ public:
virtual int4 applyOp(PcodeOp *op,Funcdata &data); virtual int4 applyOp(PcodeOp *op,Funcdata &data);
}; };
class RulePieceStructure : public Rule {
/// \brief Markup for Varnodes pieced together into structure/array
static Datatype *determineDatatype(Varnode *vn,int4 &baseOffset);
static bool spanningRange(Datatype *ct,int4 off,int4 size);
static bool convertZextToPiece(PcodeOp *zext,Datatype *structuredType,int4 offset,Funcdata &data);
static bool findReplaceZext(vector<PieceNode> &stack,Datatype *structuredType,Funcdata &data);
static bool separateSymbol(Varnode *root,Varnode *leaf);
public:
RulePieceStructure(const string &g) : Rule( g, 0, "piecestructure") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RulePieceStructure(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSubNormal : public Rule { class RuleSubNormal : public Rule {
public: public:
RuleSubNormal(const string &g) : Rule( g, 0, "subnormal") {} ///< Constructor RuleSubNormal(const string &g) : Rule( g, 0, "subnormal") {} ///< Constructor

View file

@ -1924,13 +1924,13 @@ const TypeField *TypePartialUnion::findTruncation(int4 off,int4 sz,const PcodeOp
return container->findTruncation(off + offset, sz, op, slot, newoff); return container->findTruncation(off + offset, sz, op, slot, newoff);
} }
int4 TypePartialUnion::numDepend(void) int4 TypePartialUnion::numDepend(void) const
{ {
return container->numDepend(); return container->numDepend();
} }
Datatype *TypePartialUnion::getDepend(int4 index) Datatype *TypePartialUnion::getDepend(int4 index) const
{ {
// Treat dependents as coming from the underlying union // Treat dependents as coming from the underlying union
@ -3483,6 +3483,31 @@ TypePointer *TypeFactory::getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc
return res; return res;
} }
/// Drill down into nested data-types until we get to a data-type that exactly matches the
/// given offset and size, and return this data-type. Any \e union data-type encountered
/// terminates the process and a partial union data-type is constructed and returned.
/// If the range indicated by the offset and size contains only a partial field or crosses
/// field boundaries, null is returned.
/// \param ct is the structured data-type
/// \param offset is the starting byte offset for the piece
/// \param size is the number of bytes in the piece
/// \return the data-type of the piece or null
Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size)
{
uintb newOff = offset;
while(ct != (Datatype *)0 && ct->getSize() > size && ct->getMetatype() != TYPE_UNION) {
ct = ct->getSubType(newOff, &newOff);
}
if (ct == (Datatype *)0 || ct->getSize() < size)
return (Datatype *)0;
if (ct->getSize() == size)
return ct;
if (ct->getMetatype() == TYPE_UNION) // If we hit a containing union
return getTypePartialUnion((TypeUnion *)ct, newOff, size);
return (Datatype *)0;
}
/// The indicated Datatype object is removed from this container. /// The indicated Datatype object is removed from this container.
/// Indirect references (via TypeArray TypeStruct etc.) are not affected /// Indirect references (via TypeArray TypeStruct etc.) are not affected
/// \param ct is the data-type to destroy /// \param ct is the data-type to destroy

View file

@ -222,6 +222,7 @@ public:
int4 typeOrder(const Datatype &op) const { if (this==&op) return 0; return compare(op,10); } ///< Order this with -op- datatype int4 typeOrder(const Datatype &op) const { if (this==&op) return 0; return compare(op,10); } ///< Order this with -op- datatype
int4 typeOrderBool(const Datatype &op) const; ///< Order \b this with -op-, treating \e bool data-type as special int4 typeOrderBool(const Datatype &op) const; ///< Order \b this with -op-, treating \e bool data-type as special
void encodeRef(Encoder &encoder) const; ///< Encode a reference of \b this to a stream void encodeRef(Encoder &encoder) const; ///< Encode a reference of \b this to a stream
bool isPieceStructured(void) const; ///< Does \b this data-type consist of separate pieces?
static uint4 encodeIntegerFormat(const string &val); static uint4 encodeIntegerFormat(const string &val);
static string decodeIntegerFormat(uint4 val); static string decodeIntegerFormat(uint4 val);
}; };
@ -501,8 +502,8 @@ public:
TypeUnion *getParentUnion(void) const { return container; } ///< Get the union which \b this is part of TypeUnion *getParentUnion(void) const { return container; } ///< Get the union which \b this is part of
virtual void printRaw(ostream &s) const; ///< Print a description of the type to stream virtual void printRaw(ostream &s) const; ///< Print a description of the type to stream
virtual const TypeField *findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const; virtual const TypeField *findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const;
virtual int4 numDepend(void); virtual int4 numDepend(void) const;
virtual Datatype *getDepend(int4 index); virtual Datatype *getDepend(int4 index) const;
virtual int4 compare(const Datatype &op,int4 level) const; virtual int4 compare(const Datatype &op,int4 level) const;
virtual int4 compareDependency(const Datatype &op) const; virtual int4 compareDependency(const Datatype &op) const;
virtual Datatype *clone(void) const { return new TypePartialUnion(*this); } virtual Datatype *clone(void) const { return new TypePartialUnion(*this); }
@ -691,6 +692,7 @@ public:
TypePointerRel *getTypePointerRel(TypePointer *parentPtr,Datatype *ptrTo,int4 off); ///< Get pointer offset relative to a container TypePointerRel *getTypePointerRel(TypePointer *parentPtr,Datatype *ptrTo,int4 off); ///< Get pointer offset relative to a container
TypePointerRel *getTypePointerRel(int4 sz,Datatype *parent,Datatype *ptrTo,int4 ws,int4 off,const string &nm); TypePointerRel *getTypePointerRel(int4 sz,Datatype *parent,Datatype *ptrTo,int4 ws,int4 off,const string &nm);
TypePointer *getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc,const string &nm); TypePointer *getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc,const string &nm);
Datatype *getExactPiece(Datatype *ct,int4 offset,int4 size); ///< Get the data-type associated with piece of a structured data-type
void destroyType(Datatype *ct); ///< Remove a data-type from \b this void destroyType(Datatype *ct); ///< Remove a data-type from \b this
Datatype *concretize(Datatype *ct); ///< Convert given data-type to concrete form Datatype *concretize(Datatype *ct); ///< Convert given data-type to concrete form
void dependentOrder(vector<Datatype *> &deporder) const; ///< Place all data-types in dependency order void dependentOrder(vector<Datatype *> &deporder) const; ///< Place all data-types in dependency order
@ -745,6 +747,18 @@ inline int4 Datatype::typeOrderBool(const Datatype &op) const
return compare(op,10); return compare(op,10);
} }
/// If a value with \b this data-type is put together from multiple pieces, is it better to display
/// this construction as a sequence of separate assignments or as a single concatenation.
/// Generally a TYPE_STRUCT or TYPE_ARRAY should be represented with separate assignments.
/// \return \b true if the data-type is put together with multiple assignments
inline bool Datatype::isPieceStructured(void) const
{
// if (metatype == TYPE_STRUCT || metatype == TYPE_ARRAY || metatype == TYPE_UNION ||
// metatype == TYPE_PARTIALUNION || metatype == TYPE_PARTIALSTRUCT)
return (metatype <= TYPE_ARRAY);
}
inline TypeArray::TypeArray(int4 n,Datatype *ao) : Datatype(n*ao->getSize(),TYPE_ARRAY) inline TypeArray::TypeArray(int4 n,Datatype *ao) : Datatype(n*ao->getSize(),TYPE_ARRAY)
{ {

View file

@ -1929,18 +1929,20 @@ string TypeOpSubpiece::getOperatorName(const PcodeOp *op) const
Datatype *TypeOpSubpiece::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const Datatype *TypeOpSubpiece::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const
{ {
const Varnode *outvn = op->getOut();
const TypeField *field;
Datatype *ct = op->getIn(0)->getHighTypeReadFacing(op);
int4 offset; int4 offset;
Datatype *parent; int4 byteOff = computeByteOffsetForComposite(op);
const Varnode *vn = op->getOut(); field = ct->findTruncation(byteOff,outvn->getSize(),op,1,offset); // Use artificial slot
const TypeField *field = testExtraction(true, op, parent, offset);
if (field != (const TypeField *)0) { if (field != (const TypeField *)0) {
if (vn->getSize() == field->type->getSize()) if (outvn->getSize() == field->type->getSize())
return field->type; return field->type;
} }
Datatype *dt = vn->getHighTypeDefFacing(); // SUBPIECE prints as cast to whatever its output is Datatype *dt = outvn->getHighTypeDefFacing(); // SUBPIECE prints as cast to whatever its output is
if (dt->getMetatype() != TYPE_UNKNOWN) if (dt->getMetatype() != TYPE_UNKNOWN)
return dt; return dt;
return tlst->getBase(vn->getSize(),TYPE_INT); // If output is unknown, treat as cast to int return tlst->getBase(outvn->getSize(),TYPE_INT); // If output is unknown, treat as cast to int
} }
Datatype *TypeOpSubpiece::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, Datatype *TypeOpSubpiece::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn,
@ -1969,30 +1971,6 @@ Datatype *TypeOpSubpiece::propagateType(Datatype *alttype,PcodeOp *op,Varnode *i
return (Datatype *)0; return (Datatype *)0;
} }
/// \brief Test if the given SUBPIECE PcodeOp is acting as a field extraction operator
///
/// For packed structures with small fields, SUBPIECE may be used to extract the field.
/// Test if the HighVariable being truncated is a structure and if the truncation produces
/// part of a \e single field. If so return the TypeField descriptor, and pass back the parent
/// structure and the number of least significant bytes that have been truncated from the field.
/// \param useHigh is \b true if the HighVariable data-type is checked, otherwise the Varnode data-type is used
/// \param op is the given SUBPIECE PcodeOp
/// \param parent holds the parent Datatype being passed back
/// \param offset holds the LSB offset being passed back
/// \return the TypeField if a field is being extracted or null otherwise
const TypeField *TypeOpSubpiece::testExtraction(bool useHigh,const PcodeOp *op,Datatype *&parent,int4 &offset)
{
const Varnode *vn = op->getIn(0);
Datatype *ct = useHigh ? vn->getHighTypeReadFacing(op) : vn->getTypeReadFacing(op);
type_metatype meta = ct->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_UNION && meta != TYPE_PARTIALUNION)
return (const TypeField *)0;
parent = ct;
int4 byteOff = computeByteOffsetForComposite(op);
return ct->findTruncation(byteOff,op->getOut()->getSize(),op,1,offset); // Use artificial slot
}
/// \brief Compute the byte offset into an assumed composite data-type produced by the given CPUI_SUBPIECE /// \brief Compute the byte offset into an assumed composite data-type produced by the given CPUI_SUBPIECE
/// ///
/// If the input Varnode is a composite data-type, the extracted result of the SUBPIECE represent a /// If the input Varnode is a composite data-type, the extracted result of the SUBPIECE represent a

View file

@ -749,7 +749,6 @@ public:
int4 inslot,int4 outslot); int4 inslot,int4 outslot);
virtual string getOperatorName(const PcodeOp *op) const; virtual string getOperatorName(const PcodeOp *op) const;
virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opSubpiece(op); } virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opSubpiece(op); }
static const TypeField *testExtraction(bool useHigh,const PcodeOp *op,Datatype *&parent,int4 &offset);
static int4 computeByteOffsetForComposite(const PcodeOp *op); static int4 computeByteOffsetForComposite(const PcodeOp *op);
}; };

View file

@ -44,6 +44,7 @@ extern ElementId ELEM_SEGMENTOP; ///< Marshaling element \<segmentop>
/// its CALLOTHER index. A facility for reading in implementation details is provided via decode(). /// its CALLOTHER index. A facility for reading in implementation details is provided via decode().
class UserPcodeOp { class UserPcodeOp {
public: public:
/// \brief Enumeration of different boolean properties that can be assigned to a CALLOTHER
enum userop_flags { enum userop_flags {
annotation_assignment = 1, ///< Displayed as assignment, `in1 = in2`, where the first parameter is an annotation annotation_assignment = 1, ///< Displayed as assignment, `in1 = in2`, where the first parameter is an annotation
no_operator = 2 ///< Don't emit special token, just emit the first input parameter as expression no_operator = 2 ///< Don't emit special token, just emit the first input parameter as expression

View file

@ -24,8 +24,9 @@ AttributeId ATTRIB_SYMREF = AttributeId("symref",68);
ElementId ELEM_HIGH = ElementId("high",82); ElementId ELEM_HIGH = ElementId("high",82);
/// Compare by offset within the group, then by size. /// Compare by offset within the group, then by size.
/// \param op2 is the other piece to compare with \b this /// \param a is the first piece to compare
/// \return \b true if \b this should be ordered before the other piece /// \param b is the other piece to compare
/// \return \b true if \b a should be ordered before the \b b
bool VariableGroup::PieceCompareByOffset::operator()(const VariablePiece *a,const VariablePiece *b) const bool VariableGroup::PieceCompareByOffset::operator()(const VariablePiece *a,const VariablePiece *b) const
{ {
@ -145,7 +146,7 @@ void VariablePiece::adjustOffset(int4 amt)
} }
/// If there are no remaining references to the old VariableGroup it is deleted. /// If there are no remaining references to the old VariableGroup it is deleted.
/// \param newGropu is the new VariableGroup to transfer \b this to /// \param newGroup is the new VariableGroup to transfer \b this to
void VariablePiece::transferGroup(VariableGroup *newGroup) void VariablePiece::transferGroup(VariableGroup *newGroup)
{ {
@ -227,15 +228,26 @@ void HighVariable::setSymbol(Varnode *vn) const
} }
} }
symbol = entry->getSymbol(); symbol = entry->getSymbol();
if (entry->isDynamic()) // Dynamic symbols match whole variable if (vn->isProtoPartial()) {
Varnode *rootVn = PieceNode::findRoot(vn);
if (rootVn == vn)
throw LowlevelError("Partial varnode does not match symbol");
symboloffset = vn->getAddr().overlap(0,rootVn->getAddr(),rootVn->getSize());
SymbolEntry *entry = rootVn->getSymbolEntry();
if (entry != (SymbolEntry *)0)
symboloffset += entry->getOffset();
}
else if (entry->isDynamic()) // Dynamic symbols (that aren't partials) match whole variable
symboloffset = -1; symboloffset = -1;
else if (symbol->getCategory() == Symbol::equate) else if (symbol->getCategory() == Symbol::equate)
symboloffset = -1; // For equates, we don't care about size symboloffset = -1; // For equates, we don't care about size
else if (symbol->getType()->getSize() == vn->getSize() && else if (symbol->getType()->getSize() == vn->getSize() &&
entry->getAddr() == vn->getAddr() && !entry->isPiece()) entry->getAddr() == vn->getAddr() && !entry->isPiece())
symboloffset = -1; // A matching entry symboloffset = -1; // A matching entry
else else {
symboloffset = vn->getAddr().overlap(0,entry->getAddr(),symbol->getType()->getSize()) + entry->getOffset(); symboloffset = vn->getAddr().overlap(0,entry->getAddr(),symbol->getType()->getSize()) + entry->getOffset();
}
highflags &= ~((uint4)symboldirty); // We are no longer dirty highflags &= ~((uint4)symboldirty); // We are no longer dirty
} }
@ -455,6 +467,21 @@ Varnode *HighVariable::getNameRepresentative(void) const
return nameRepresentative; return nameRepresentative;
} }
/// Find the first member that is either address tied or marked as a proto partial.
/// \return a member Varnode acting as partial storage or null if none exist
Varnode *HighVariable::getPartial(void) const
{
int4 i;
for(i=0;i<inst.size();++i) {
Varnode *vn = inst[i];
if (vn->isAddrTied() || vn->isProtoPartial())
return vn;
}
return (Varnode *)0;
}
/// Search for the given Varnode and cut it out of the list, marking all properties as \e dirty. /// Search for the given Varnode and cut it out of the list, marking all properties as \e dirty.
/// \param vn is the given Varnode member to remove /// \param vn is the given Varnode member to remove
void HighVariable::remove(Varnode *vn) void HighVariable::remove(Varnode *vn)
@ -726,6 +753,15 @@ int4 HighVariable::instanceIndex(const Varnode *vn) const
return -1; return -1;
} }
/// \param op2 is the other HighVariable to compare with \b this
/// \return \b true if they are in the same group
bool HighVariable::sameGroup(const HighVariable *op2) const
{
if (piece == (VariablePiece *)0 || op2->piece == (VariablePiece *)0) return false;
return (piece->getGroup() == op2->piece->getGroup());
}
/// \param encoder is the stream encoder /// \param encoder is the stream encoder
void HighVariable::encode(Encoder &encoder) const void HighVariable::encode(Encoder &encoder) const

View file

@ -179,6 +179,7 @@ public:
Varnode *getInputVarnode(void) const; ///< Find (the) input member Varnode Varnode *getInputVarnode(void) const; ///< Find (the) input member Varnode
Varnode *getTypeRepresentative(void) const; ///< Get a member Varnode with the strongest data-type Varnode *getTypeRepresentative(void) const; ///< Get a member Varnode with the strongest data-type
Varnode *getNameRepresentative(void) const; ///< Get a member Varnode that dictates the naming of \b this HighVariable Varnode *getNameRepresentative(void) const; ///< Get a member Varnode that dictates the naming of \b this HighVariable
Varnode *getPartial(void) const; ///< Find the first member that can act as partial symbol storage
int4 getNumMergeClasses(void) const { return numMergeClasses; } ///< Get the number of speculative merges for \b this int4 getNumMergeClasses(void) const { return numMergeClasses; } ///< Get the number of speculative merges for \b this
bool isMapped(void) const { updateFlags(); return ((flags&Varnode::mapped)!=0); } ///< Return \b true if \b this is mapped bool isMapped(void) const { updateFlags(); return ((flags&Varnode::mapped)!=0); } ///< Return \b true if \b this is mapped
bool isPersist(void) const { updateFlags(); return ((flags&Varnode::persist)!=0); } ///< Return \b true if \b this is a global variable bool isPersist(void) const { updateFlags(); return ((flags&Varnode::persist)!=0); } ///< Return \b true if \b this is a global variable
@ -189,6 +190,7 @@ public:
bool isConstant(void) const { updateFlags(); return ((flags&Varnode::constant)!=0); } ///< Return \b true if \b this is a constant bool isConstant(void) const { updateFlags(); return ((flags&Varnode::constant)!=0); } ///< Return \b true if \b this is a constant
bool isUnaffected(void) const { updateFlags(); return ((flags&Varnode::unaffected)!=0); } ///< Return \b true if \b this is an \e unaffected register bool isUnaffected(void) const { updateFlags(); return ((flags&Varnode::unaffected)!=0); } ///< Return \b true if \b this is an \e unaffected register
bool isExtraOut(void) const { updateFlags(); return ((flags&(Varnode::indirect_creation|Varnode::addrtied))==Varnode::indirect_creation); } ///< Return \b true if \b this is an extra output bool isExtraOut(void) const { updateFlags(); return ((flags&(Varnode::indirect_creation|Varnode::addrtied))==Varnode::indirect_creation); } ///< Return \b true if \b this is an extra output
bool isPartial(void) const { updateFlags(); return ((flags&(Varnode::addrtied|Varnode::proto_partial))!=0); } ///< Return \b true if \b this is potential partial symbol
void setMark(void) const { flags |= Varnode::mark; } ///< Set the mark on this variable void setMark(void) const { flags |= Varnode::mark; } ///< Set the mark on this variable
void clearMark(void) const { flags &= ~Varnode::mark; } ///< Clear the mark on this variable void clearMark(void) const { flags &= ~Varnode::mark; } ///< Clear the mark on this variable
bool isMark(void) const { return ((flags&Varnode::mark)!=0); } ///< Return \b true if \b this is marked bool isMark(void) const { return ((flags&Varnode::mark)!=0); } ///< Return \b true if \b this is marked
@ -205,6 +207,7 @@ public:
bool isUnattached(void) const { return inst.empty(); } ///< Return \b true if \b this has no member Varnode bool isUnattached(void) const { return inst.empty(); } ///< Return \b true if \b this has no member Varnode
bool isTypeLock(void) const { updateType(); return ((flags & Varnode::typelock)!=0); } ///< Return \b true if \b this is \e typelocked bool isTypeLock(void) const { updateType(); return ((flags & Varnode::typelock)!=0); } ///< Return \b true if \b this is \e typelocked
bool isNameLock(void) const { updateFlags(); return ((flags & Varnode::namelock)!=0); } ///< Return \b true if \b this is \e namelocked bool isNameLock(void) const { updateFlags(); return ((flags & Varnode::namelock)!=0); } ///< Return \b true if \b this is \e namelocked
bool sameGroup(const HighVariable *op2) const; ///< Return \b true if \b and other variable are parts of the same variable
void encode(Encoder &encoder) const; ///< Encode \b this variable to stream as a \<high> element void encode(Encoder &encoder) const; ///< Encode \b this variable to stream as a \<high> element
#ifdef MERGEMULTI_DEBUG #ifdef MERGEMULTI_DEBUG
void verifyCover(void) const; void verifyCover(void) const;

View file

@ -936,6 +936,13 @@ bool Varnode::findSubpieceShadow(int4 leastByte,const Varnode *whole,int4 recurs
return false; return false;
} }
/// \brief Try to find a PIECE operation that produces \b this from a given Varnode \b piece
///
/// \param leastByte is the number of least significant bytes being truncated from the
/// putative \b this to get \b piece. The routine can backtrack through COPY operations and
/// more than one PIECE operations to verify that \b this is formed out of \b piece.
/// \param piece is the given Varnode piece
/// \return \b true if \b this and \b whole have the prescribed PIECE relationship
bool Varnode::findPieceShadow(int4 leastByte,const Varnode *piece) const bool Varnode::findPieceShadow(int4 leastByte,const Varnode *piece) const
{ {
@ -1007,6 +1014,23 @@ bool Varnode::partialCopyShadow(const Varnode *op2,int4 relOff) const
return false; return false;
} }
/// If \b this has a data-type built out of separate pieces, return it.
/// If \b this is mapped as a partial to a symbol with one of these data-types, return it.
/// Return null otherwise.
/// \return the associated structured data-type or null
Datatype *Varnode::getStructuredType(void) const
{
Datatype *ct;
if (mapentry != (SymbolEntry *)0)
ct = mapentry->getSymbol()->getType();
else
ct = type;
if (ct->isPieceStructured())
return ct;
return (Datatype *)0;
}
/// Compare term order of two Varnodes. Used in Term Rewriting strategies to order operands of commutative ops /// Compare term order of two Varnodes. Used in Term Rewriting strategies to order operands of commutative ops
/// \param op is the Varnode to order against \b this /// \param op is the Varnode to order against \b this
/// \return -1 if \b this comes before \b op, 1 if op before this, or 0 /// \return -1 if \b this comes before \b op, 1 if op before this, or 0

View file

@ -109,7 +109,8 @@ public:
indirectstorage = 0x8000000, ///< Is this Varnode storing a pointer to the actual symbol indirectstorage = 0x8000000, ///< Is this Varnode storing a pointer to the actual symbol
hiddenretparm = 0x10000000, ///< Does this varnode point to the return value storage location hiddenretparm = 0x10000000, ///< Does this varnode point to the return value storage location
incidental_copy = 0x20000000, ///< Do copies of this varnode happen as a side-effect incidental_copy = 0x20000000, ///< Do copies of this varnode happen as a side-effect
autolive_hold = 0x40000000 ///< Temporarily block dead-code removal of \b this autolive_hold = 0x40000000, ///< Temporarily block dead-code removal of \b this
proto_partial = 0x80000000 ///< Varnode is getting PIECEd together into an (unmapped) structure
}; };
/// Additional boolean properties on a Varnode /// Additional boolean properties on a Varnode
enum addl_flags { enum addl_flags {
@ -250,6 +251,7 @@ public:
bool isUnaffected(void) const { return ((flags&Varnode::unaffected)!=0); } ///< Is \b this a value that is supposed to be preserved across the function? bool isUnaffected(void) const { return ((flags&Varnode::unaffected)!=0); } ///< Is \b this a value that is supposed to be preserved across the function?
bool isSpacebase(void) const { return ((flags&Varnode::spacebase)!=0); } ///< Is this location used to store the base point for a virtual address space? bool isSpacebase(void) const { return ((flags&Varnode::spacebase)!=0); } ///< Is this location used to store the base point for a virtual address space?
bool isReturnAddress(void) const { return ((flags&Varnode::return_address)!=0); } ///< Is this storage for a calls return address? bool isReturnAddress(void) const { return ((flags&Varnode::return_address)!=0); } ///< Is this storage for a calls return address?
bool isProtoPartial(void) const { return ((flags&Varnode::proto_partial)!=0); } ///< Is \b this getting pieced together into a larger whole
bool isPtrCheck(void) const { return ((addlflags&Varnode::ptrcheck)!=0); } ///< Has \b this been checked as a constant pointer to a mapped symbol? bool isPtrCheck(void) const { return ((addlflags&Varnode::ptrcheck)!=0); } ///< Has \b this been checked as a constant pointer to a mapped symbol?
bool isPtrFlow(void) const { return ((addlflags&Varnode::ptrflow)!=0); } ///< Does this varnode flow to or from a known pointer bool isPtrFlow(void) const { return ((addlflags&Varnode::ptrflow)!=0); } ///< Does this varnode flow to or from a known pointer
bool isSpacebasePlaceholder(void) const { return ((addlflags&Varnode::spacebase_placeholder)!=0); } ///< Is \b this used specifically to track stackpointer values? bool isSpacebasePlaceholder(void) const { return ((addlflags&Varnode::spacebase_placeholder)!=0); } ///< Is \b this used specifically to track stackpointer values?
@ -318,6 +320,8 @@ public:
void clearWriteMask(void) { addlflags &= ~Varnode::writemask; } ///< Clear the mark indicating \b this is not a true write void clearWriteMask(void) { addlflags &= ~Varnode::writemask; } ///< Clear the mark indicating \b this is not a true write
void setAutoLiveHold(void) { flags |= Varnode::autolive_hold; } ///< Place temporary hold on dead code removal void setAutoLiveHold(void) { flags |= Varnode::autolive_hold; } ///< Place temporary hold on dead code removal
void clearAutoLiveHold(void) { flags &= ~Varnode::autolive_hold; } ///< Clear temporary hold on dead code removal void clearAutoLiveHold(void) { flags &= ~Varnode::autolive_hold; } ///< Clear temporary hold on dead code removal
void setProtoPartial(void) { flags |= Varnode::proto_partial; } ///< Mark \b this gets pieced into larger structure
void clearProtoPartial(void) { flags &= ~Varnode::proto_partial; } ///< Clear mark indicating \b this gets pieced into larger structure
void setUnsignedPrint(void) { addlflags |= Varnode::unsignedprint; } ///< Force \b this to be printed as unsigned void setUnsignedPrint(void) { addlflags |= Varnode::unsignedprint; } ///< Force \b this to be printed as unsigned
void setLongPrint(void) { addlflags |= Varnode::longprint; } ///< Force \b this to be printed as a \e long token void setLongPrint(void) { addlflags |= Varnode::longprint; } ///< Force \b this to be printed as a \e long token
void setStopUpPropagation(void) { addlflags |= Varnode::stop_uppropagation; } ///< Stop up-propagation thru \b this void setStopUpPropagation(void) { addlflags |= Varnode::stop_uppropagation; } ///< Stop up-propagation thru \b this
@ -334,6 +338,7 @@ public:
bool findSubpieceShadow(int4 leastByte,const Varnode *whole,int4 recurse) const; bool findSubpieceShadow(int4 leastByte,const Varnode *whole,int4 recurse) const;
bool findPieceShadow(int4 leastByte,const Varnode *piece) const; bool findPieceShadow(int4 leastByte,const Varnode *piece) const;
bool partialCopyShadow(const Varnode *op2,int4 relOff) const; ///< Is one of \b this or \b op2 a partial copy of the other? bool partialCopyShadow(const Varnode *op2,int4 relOff) const; ///< Is one of \b this or \b op2 a partial copy of the other?
Datatype *getStructuredType(void) const; ///< Get structure/array/union that \b this is a piece of
void encode(Encoder &encoder) const; ///< Encode a description of \b this to a stream void encode(Encoder &encoder) const; ///< Encode a description of \b this to a stream
static bool comparePointers(const Varnode *a,const Varnode *b) { return (*a < *b); } ///< Compare Varnodes as pointers static bool comparePointers(const Varnode *a,const Varnode *b) { return (*a < *b); } ///< Compare Varnodes as pointers
static void printRaw(ostream &s,const Varnode *vn); ///< Print raw info about a Varnode to stream static void printRaw(ostream &s,const Varnode *vn); ///< Print raw info about a Varnode to stream

View file

@ -0,0 +1,81 @@
<decompilertest>
<binaryimage arch="x86:LE:64:default:gcc">
<!--
Examples of entire structures built out of PIECE and ZEXT operations.
We should see the individual fields being read/assigned and not CONCAT or ZEXT inlines.
-->
<bytechunk space="ram" offset="0x100783" readonly="true">
4883ec0889f84889f748c1e720
4809c7e812ffffff4883c408c348c1e6
2089f84809f0c30fb7c648c1e0104889
c60fb7c70fb7d248c1e2204809f048c1
e1304809d04809c8c35389f366893d4f
082000488b3d46082000e806ffffff66
891d400820005bc389f8c3
</bytechunk>
<bytechunk space="ram" offset="0x100847" readonly="true">
554889e5897dec8b45
ec8945f8c745fc00000000488b45f85d
c3
</bytechunk>
<symbol space="ram" offset="0x1006aa" name="structparam"/>
<symbol space="ram" offset="0x1006e5" name="structshort"/>
<symbol space="ram" offset="0x100783" name="concatregparam"/>
<symbol space="ram" offset="0x10079d" name="concatreturn"/>
<symbol space="ram" offset="0x1007a7" name="concatreturnshort"/>
<symbol space="ram" offset="0x1007c9" name="replaceshort"/>
<symbol space="ram" offset="0x1007e8" name="zerofield"/>
<symbol space="ram" offset="0x100847" name="zerofield2"/>
</binaryimage>
<script>
<com>parse line struct foo { int4 A; int4 B; };</com>
<com>parse line struct fooshort { int2 a; int2 b; int2 c; int2 d; };</com>
<com>map addr r0x301020 fooshort globshort</com>
<com>parse line extern void structparam(foo param_1);</com>
<com>parse line extern void structshort(fooshort param_1);</com>
<com>parse line extern void concatregparam(int4 regp1,int4 regp2);</com>
<com>parse line extern foo concatreturn(int4 retp1,int4 retp2);</com>
<com>parse line extern fooshort concatreturnshort(int2 short1,int2 short2,int2 short3,int2 short4);</com>
<com>parse line extern void replaceshort(int2 replace1,int2 replace2);</com>
<com>parse line extern foo zerofield(int4 zero1);</com>
<com>parse line extern foo zerofield2(int4 zero_two1);</com>
<com>lo fu concatregparam</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu concatreturn</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu concatreturnshort</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu replaceshort</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu zerofield</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu zerofield2</com>
<com>map hash r0x10085b 7df6374cc45 foo footwo</com>
<com>decompile</com>
<com>print C</com>
<com>quit</com>
</script>
<stringmatch name="Concat #1" min="0" max="0">CONCAT</stringmatch>
<stringmatch name="Concat #2" min="0" max="0">ZEXT</stringmatch>
<stringmatch name="Concat #3" min="1" max="1">fVar1\.A = regp1;</stringmatch>
<stringmatch name="Concat #4" min="1" max="1">fVar1\.B = regp2;</stringmatch>
<stringmatch name="Concat #5" min="1" max="1">fVar1\.A = retp1;</stringmatch>
<stringmatch name="Concat #6" min="1" max="1">fVar1\.B = retp2;</stringmatch>
<stringmatch name="Concat #7" min="1" max="1">fVar1\.a = short1;</stringmatch>
<stringmatch name="Concat #8" min="1" max="1">fVar1\.b = short2;</stringmatch>
<stringmatch name="Concat #9" min="1" max="1">fVar1\.c = short3;</stringmatch>
<stringmatch name="Concat #10" min="1" max="1">fVar1\.d = short4;</stringmatch>
<stringmatch name="Concat #11" min="1" max="1">globshort\.b = replace1;</stringmatch>
<stringmatch name="Concat #12" min="1" max="1">globshort\.d = replace2;</stringmatch>
<stringmatch name="Concat #13" min="0" max="0">globshort\.a =</stringmatch>
<stringmatch name="Concat #14" min="0" max="0">globshort\.c =</stringmatch>
<stringmatch name="Concat #15" min="1" max="1">fVar1\.A = zero1;</stringmatch>
<stringmatch name="Concat #16" min="1" max="1">fVar1\.B = 0;</stringmatch>
<stringmatch name="Concat #17" min="1" max="1">footwo\.A = zero_two1;</stringmatch>
<stringmatch name="Concat #18" min="1" max="1">footwo\.B = 0;</stringmatch>
</decompilertest>

View file

@ -166,15 +166,23 @@ public class HighSymbol {
/** /**
* Associate a particular HighVariable with this symbol. This is used to link the symbol * Associate a particular HighVariable with this symbol. This is used to link the symbol
* into the decompiler's description of how a function manipulates a particular symbol. * into the decompiler's description of how a function manipulates a particular symbol.
* Multiple partial HighVariables may get associated with the same HighSymbol. The HighSymbol
* keeps a reference to the biggest HighVariable passed to this method.
* @param high is the associated HighVariable * @param high is the associated HighVariable
*/ */
public void setHighVariable(HighVariable high) { void setHighVariable(HighVariable high) {
this.highVariable = high; if (highVariable != null) {
if (highVariable.getSize() >= high.getSize()) {
return;
}
}
highVariable = high;
} }
/** /**
* Get the HighVariable associate with this symbol if any. This allows the user to go straight * Get the HighVariable associate with this symbol if any. The symbol may have multiple
* into the decompiler's function to see how the symbol gets manipulated. * partial HighVariables associated with it. This method returns the biggest one, which
* may not be the same size as the symbol itself.
* @return the associated HighVariable or null * @return the associated HighVariable or null
*/ */
public HighVariable getHighVariable() { public HighVariable getHighVariable() {