GP-2563 SplitDatatype transformer

This commit is contained in:
caheckman 2023-04-03 19:47:17 -04:00
parent 269ea1ae7a
commit bdf1314b4f
28 changed files with 1699 additions and 161 deletions

View file

@ -1401,7 +1401,9 @@ void Architecture::resetDefaultsInternal(void)
infer_pointers = true;
analyze_for_loops = true;
readonlypropagate = false;
alias_block_level = 2; // Block structs and arrays by default
alias_block_level = 2; // Block structs and arrays by default, but not more primitive data-types
split_datatype_config = OptionSplitDatatypes::option_struct | OptionSplitDatatypes::option_array
| OptionSplitDatatypes::option_pointer;
}
/// Reset options that can be modified by the OptionDatabase. This includes

View file

@ -181,6 +181,7 @@ public:
uint4 flowoptions; ///< options passed to flow following engine
uint4 max_instructions; ///< Maximum instructions that can be processed in one function
int4 alias_block_level; ///< Aliases blocked by 0=none, 1=struct, 2=array, 3=all
uint4 split_datatype_config; ///< Toggle for data-types splitting: Bit 0=structs, 1=arrays, 2=pointers
vector<Rule *> extra_pool_rules; ///< Extra rules that go in the main pool (cpu specific, experimental)
Database *symboltab; ///< Memory map of global variables and functions

View file

@ -2868,8 +2868,7 @@ int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref)
else if (useOp->code() == CPUI_PIECE) {
Varnode *rootVn = PieceNode::findRoot(vn);
if (vn == rootVn) return -1;
Datatype *ct = rootVn->getStructuredType();
if (ct != (Datatype *)0) {
if (rootVn->getDef()->isPartialRoot()) {
// Getting PIECEd into a structured thing. Unless vn is a leaf, it should be implicit
if (def->code() != CPUI_PIECE) return -1;
if (vn->loneDescend() == (PcodeOp *)0) return -1;
@ -5205,7 +5204,7 @@ void ActionDatabase::buildDefaultGroups(void)
const char *members[] = { "base", "protorecovery", "protorecovery_a", "deindirect", "localrecovery",
"deadcode", "typerecovery", "stackptrflow",
"blockrecovery", "stackvars", "deadcontrolflow", "switchnorm",
"cleanup", "merge", "dynamic", "casts", "analysis",
"cleanup", "splitcopy", "splitpointer", "merge", "dynamic", "casts", "analysis",
"fixateglobals", "fixateproto",
"segment", "returnsplit", "nodejoin", "doubleload", "doubleprecis",
"unreachable", "subvar", "floatprecision",
@ -5472,6 +5471,9 @@ void ActionDatabase::universalAction(Architecture *conf)
actcleanup->addRule( new RulePtrsubCharConstant("cleanup") );
actcleanup->addRule( new RuleExtensionPush("cleanup") );
actcleanup->addRule( new RulePieceStructure("cleanup") );
actcleanup->addRule( new RuleSplitCopy("splitcopy") );
actcleanup->addRule( new RuleSplitLoad("splitpointer") );
actcleanup->addRule( new RuleSplitStore("splitpointer") );
}
act->addAction( actcleanup );

View file

@ -151,25 +151,14 @@ bool SymbolEntry::updateType(Varnode *vn) const
Datatype *SymbolEntry::getSizedType(const Address &inaddr,int4 sz) const
{
uintb off;
int4 off;
if (isDynamic())
off = offset;
else
off = (inaddr.getOffset() - addr.getOffset()) + offset;
off = (int4)(inaddr.getOffset() - addr.getOffset()) + offset;
Datatype *cur = symbol->getType();
do {
if (offset == 0 && cur->getSize() == sz)
return cur;
cur = cur->getSubType(off,&off);
} while(cur != (Datatype *)0);
// else {
// This case occurs if the varnode is a "partial type" of some sort
// This PROBABLY means the varnode shouldn't be considered addrtied
// I.e. it shouldn't be considered part of the same variable as symbol
// }
return (Datatype *)0;
return symbol->getScope()->getArch()->types->getExactPiece(cur, off, sz);
}
/// Give a contained one-line description of \b this storage, suitable for a debug console

View file

@ -888,6 +888,21 @@ bool Funcdata::setUnionField(const Datatype *parent,const PcodeOp *op,int4 slot,
}
(*res.first).second = resolve;
}
if (op->code() == CPUI_MULTIEQUAL && slot >= 0) {
// Data-type propagation doesn't happen between MULTIEQUAL input slots holding the same Varnode
// So if this is a MULTIEQUAL, copy resolution to any other input slots holding the same Varnode
const Varnode *vn = op->getIn(slot); // The Varnode being directly set
for(int4 i=0;i<op->numInput();++i) {
if (i == slot) continue;
if (op->getIn(i) != vn) continue; // Check that different input slot holds same Varnode
ResolveEdge dupedge(parent,op,i);
res = unionMap.emplace(dupedge,resolve);
if (!res.second) {
if (!(*res.first).second.isLocked())
(*res.first).second = resolve;
}
}
}
return true;
}

View file

@ -1160,6 +1160,6 @@ ElementId ELEM_VAL = ElementId("val",8);
ElementId ELEM_VALUE = ElementId("value",9);
ElementId ELEM_VOID = ElementId("void",10);
ElementId ELEM_UNKNOWN = ElementId("XMLunknown",270); // Number serves as next open index
ElementId ELEM_UNKNOWN = ElementId("XMLunknown",271); // Number serves as next open index
} // End namespace ghidra

View file

@ -639,6 +639,7 @@ void Merge::trimOpOutput(PcodeOp *op)
vn = op->getOut();
Datatype *ct = vn->getType();
copyop = data.newOp(1,op->getAddr());
data.opSetOpcode(copyop,CPUI_COPY);
if (ct->needsResolution()) {
int4 fieldNum = data.inheritResolution(ct, copyop, -1, op, -1);
data.forceFacingType(ct, fieldNum, copyop, 0);
@ -647,7 +648,6 @@ void Merge::trimOpOutput(PcodeOp *op)
}
uniq = data.newUnique(vn->getSize(),ct);
data.opSetOutput(op,uniq); // Output of op is now stubby uniq
data.opSetOpcode(copyop,CPUI_COPY);
data.opSetOutput(copyop,vn); // Original output is bumped forward slightly
data.opSetInput(copyop,uniq,0);
data.opInsertAfter(copyop,afterop);
@ -1385,15 +1385,24 @@ void Merge::groupPartialRoot(Varnode *vn)
}
PieceNode::gatherPieces(pieces, vn, vn->getDef(), baseOffset);
bool throwOut = false;
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
// Make sure each node is still marked and hasn't merged with anything else
if (!nodeVn->isProtoPartial()) return;
if (nodeVn->getHigh()->numInstances() != 1) return;
if (!nodeVn->isProtoPartial() || nodeVn->getHigh()->numInstances() != 1) {
throwOut = true;
break;
}
}
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
nodeVn->getHigh()->groupWith(pieces[i].getTypeOffset() - baseOffset,high);
if (throwOut) {
for(int4 i=0;i<pieces.size();++i)
pieces[i].getVarnode()->clearProtoPartial();
}
else {
for(int4 i=0;i<pieces.size();++i) {
Varnode *nodeVn = pieces[i].getVarnode();
nodeVn->getHigh()->groupWith(pieces[i].getTypeOffset() - baseOffset,high);
}
}
}

View file

@ -54,6 +54,7 @@ ElementId ELEM_PARAM3 = ElementId("param3",204);
ElementId ELEM_PROTOEVAL = ElementId("protoeval",205);
ElementId ELEM_SETACTION = ElementId("setaction",206);
ElementId ELEM_SETLANGUAGE = ElementId("setlanguage",207);
ElementId ELEM_SPLITDATATYPE = ElementId("splitdatatype",270);
ElementId ELEM_STRUCTALIGN = ElementId("structalign",208);
ElementId ELEM_TOGGLERULE = ElementId("togglerule",209);
ElementId ELEM_WARNING = ElementId("warning",210);
@ -124,6 +125,7 @@ OptionDatabase::OptionDatabase(Architecture *g)
registerOption(new OptionAliasBlock());
registerOption(new OptionMaxInstruction());
registerOption(new OptionNamespaceStrategy());
registerOption(new OptionSplitDatatypes());
}
OptionDatabase::~OptionDatabase(void)
@ -920,4 +922,45 @@ string OptionNamespaceStrategy::apply(Architecture *glb,const string &p1,const s
return "Namespace strategy set";
}
/// Possible value are:
/// - (empty string) = 0
/// - "struct" = 1
/// - "array" = 2
/// - "pointer" = 4
///
/// \param val is the option string
/// \return the corresponding configuration bit
uint4 OptionSplitDatatypes::getOptionBit(const string &val)
{
if (val.size() == 0) return 0;
if (val == "struct") return option_struct;
if (val == "array") return option_array;
if (val == "pointer") return option_pointer;
throw LowlevelError("Unknown data-type split option: "+val);
}
string OptionSplitDatatypes::apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const
{
uint4 oldConfig = glb->split_datatype_config;
glb->split_datatype_config = getOptionBit(p1);
glb->split_datatype_config |= getOptionBit(p2);
glb->split_datatype_config |= getOptionBit(p3);
if ((glb->split_datatype_config & (option_struct | option_array)) == 0) {
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitcopy",false);
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitpointer",false);
}
else {
bool pointers = (glb->split_datatype_config & option_pointer) != 0;
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitcopy",true);
glb->allacts.toggleAction(glb->allacts.getCurrentName(),"splitpointer",pointers);
}
if (oldConfig == glb->split_datatype_config)
return "Split data-type configuration unchanged";
return "Split data-type configuration set";
}
} // End namespace ghidra

View file

@ -60,6 +60,7 @@ extern ElementId ELEM_PARAM3; ///< Marshaling element \<param3>
extern ElementId ELEM_PROTOEVAL; ///< Marshaling element \<protoeval>
extern ElementId ELEM_SETACTION; ///< Marshaling element \<setaction>
extern ElementId ELEM_SETLANGUAGE; ///< Marshaling element \<setlanguage>
extern ElementId ELEM_SPLITDATATYPE; ///< Marshaling element \<splitdatatype>
extern ElementId ELEM_STRUCTALIGN; ///< Marshaling element \<structalign>
extern ElementId ELEM_TOGGLERULE; ///< Marshaling element \<togglerule>
extern ElementId ELEM_WARNING; ///< Marshaling element \<warning>
@ -322,5 +323,18 @@ public:
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
class OptionSplitDatatypes : public ArchOption {
public:
enum {
option_struct = 1, ///< Split combined structure fields
option_array = 2, ///< Split combined array elements
option_pointer = 4 ///< Split combined LOAD and STORE operations
};
static uint4 getOptionBit(const string &val); ///< Translate option string to a configuration bit
public:
OptionSplitDatatypes(void) { name = "splitdatatype"; } ///< Constructor
virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
};
} // End namespace ghidra
#endif

View file

@ -7119,6 +7119,86 @@ int4 RulePieceStructure::applyOp(PcodeOp *op,Funcdata &data)
return 1;
}
/// \class RuleSplitCopy
/// \brief Split COPY ops based on TypePartialStruct
///
/// If more than one logical component of a structure or array is copied at once,
/// rewrite the COPY operator as multiple COPYs.
void RuleSplitCopy::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_COPY);
}
int4 RuleSplitCopy::applyOp(PcodeOp *op,Funcdata &data)
{
Datatype *inType = op->getIn(0)->getTypeReadFacing(op);
Datatype *outType = op->getOut()->getTypeDefFacing();
type_metatype metain = inType->getMetatype();
type_metatype metaout = outType->getMetatype();
if (metain != TYPE_PARTIALSTRUCT && metaout != TYPE_PARTIALSTRUCT &&
metain != TYPE_ARRAY && metaout != TYPE_ARRAY &&
metain != TYPE_STRUCT && metaout != TYPE_STRUCT)
return false;
SplitDatatype splitter(data);
if (splitter.splitCopy(op, inType, outType))
return 1;
return 0;
}
/// \class RuleSplitLoad
/// \brief Split LOAD ops based on TypePartialStruct
///
/// If more than one logical component of a structure or array is loaded at once,
/// rewrite the LOAD operator as multiple LOADs.
void RuleSplitLoad::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_LOAD);
}
int4 RuleSplitLoad::applyOp(PcodeOp *op,Funcdata &data)
{
Datatype *inType = SplitDatatype::getValueDatatype(op, op->getOut()->getSize(), data.getArch()->types);
if (inType == (Datatype *)0)
return 0;
type_metatype metain = inType->getMetatype();
if (metain != TYPE_STRUCT && metain != TYPE_ARRAY && metain != TYPE_PARTIALSTRUCT)
return 0;
SplitDatatype splitter(data);
if (splitter.splitLoad(op, inType))
return 1;
return 0;
}
/// \class RuleSplitStore
/// \brief Split STORE ops based on TypePartialStruct
///
/// If more than one logical component of a structure or array is stored at once,
/// rewrite the STORE operator as multiple STOREs.
void RuleSplitStore::getOpList(vector<uint4> &oplist) const
{
oplist.push_back(CPUI_STORE);
}
int4 RuleSplitStore::applyOp(PcodeOp *op,Funcdata &data)
{
Datatype *outType = SplitDatatype::getValueDatatype(op, op->getIn(2)->getSize(), data.getArch()->types);
if (outType == (Datatype *)0)
return 0;
type_metatype metain = outType->getMetatype();
if (metain != TYPE_STRUCT && metain != TYPE_ARRAY && metain != TYPE_PARTIALSTRUCT)
return 0;
SplitDatatype splitter(data);
if (splitter.splitStore(op, outType))
return 1;
return 0;
}
/// \class RuleSubNormal
/// \brief Pull-back SUBPIECE through INT_RIGHT and INT_SRIGHT
///

View file

@ -1174,6 +1174,39 @@ public:
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSplitCopy : public Rule {
public:
RuleSplitCopy(const string &g) : Rule( g, 0, "splitcopy") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleSplitCopy(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSplitLoad : public Rule {
public:
RuleSplitLoad(const string &g) : Rule( g, 0, "splitload") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleSplitLoad(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSplitStore : public Rule {
public:
RuleSplitStore(const string &g) : Rule( g, 0, "splitstore") {} ///< Constructor
virtual Rule *clone(const ActionGroupList &grouplist) const {
if (!grouplist.contains(getGroup())) return (Rule *)0;
return new RuleSplitStore(getGroup());
}
virtual void getOpList(vector<uint4> &oplist) const;
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
};
class RuleSubNormal : public Rule {
public:
RuleSubNormal(const string &g) : Rule( g, 0, "subnormal") {} ///< Constructor

View file

@ -99,7 +99,7 @@ SubvariableFlow::ReplaceVarnode *SubvariableFlow::setReplacement(Varnode *vn,uin
if ((!aggressive)&& vn->isInput()) return (ReplaceVarnode *)0; // Cannot assume input is sign extended
if (vn->isPersist()) return (ReplaceVarnode *)0;
}
if (vn->isTypeLock()) {
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
if (vn->getType()->getSize() != flowsize)
return (ReplaceVarnode *)0;
}
@ -110,7 +110,7 @@ SubvariableFlow::ReplaceVarnode *SubvariableFlow::setReplacement(Varnode *vn,uin
// are packed into a single location, i.e. always consider it a single variable
if ((!aggressive)&&((vn->getConsume()&~mask)!=0)) // If there is any use of value outside of the logical variable
return (ReplaceVarnode *)0; // This probably means the whole thing is a variable, i.e. quit
if (vn->isTypeLock()) {
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
int4 sz = vn->getType()->getSize();
if (sz != flowsize)
return (ReplaceVarnode *)0;
@ -1470,7 +1470,7 @@ TransformVar *SplitFlow::setReplacement(Varnode *vn)
return res;
}
if (vn->isTypeLock())
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT)
return (TransformVar *)0;
if (vn->isInput())
return (TransformVar *)0; // Right now we can't split inputs
@ -1744,6 +1744,805 @@ bool SplitFlow::doTrace(void)
return true;
}
/// If \b pointer Varnode is written by an INT_ADD, PTRSUB, or PTRADD from a another pointer
/// to a structure or array, update \b pointer Varnode, \b baseOffset, and \b ptrType to this.
/// \return \b true if \b pointer was successfully updated
bool SplitDatatype::RootPointer::backUpPointer(void)
{
if (!pointer->isWritten())
return false;
PcodeOp *addOp = pointer->getDef();
OpCode opc = addOp->code();
if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD)
return false;
Varnode *cvn = addOp->getIn(1);
if (!cvn->isConstant())
return false;
Varnode *tmpPointer = addOp->getIn(0);
Datatype *ct = tmpPointer->getTypeReadFacing(addOp);
if (ct->getMetatype() != TYPE_PTR)
return false;
Datatype *parent = ((TypePointer *)ct)->getPtrTo();
type_metatype meta = parent->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_ARRAY)
return false;
ptrType = (TypePointer *)ct;
int4 off = (int4)cvn->getOffset();
if (opc == CPUI_PTRADD)
off *= (int4)addOp->getIn(2)->getOffset();
off = AddrSpace::addressToByteInt(off, ptrType->getWordSize());
baseOffset += off;
pointer = tmpPointer;
return true;
}
/// The LOAD or STORE pointer Varnode is examined. If it is a pointer to the given data-type, the
/// root \b pointer is returned. If not, we try to recursively walk back through either PTRSUB or INT_ADD instructions,
/// until a pointer Varnode matching the data-type is found. Any accumulated offset, relative to the original
/// LOAD or STORE pointer is recorded in the \b baseOffset. If a matching pointer is not found, \b false is returned.
/// \param op is the LOAD or STORE
/// \param valueType is the specific data-type to match
/// \return \b true if the root pointer is found
bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType)
{
if (valueType->getMetatype() == TYPE_PARTIALSTRUCT)
valueType = ((TypePartialStruct *)valueType)->getParent();
loadStore = op;
baseOffset = 0;
firstPointer = pointer = op->getIn(1);
Datatype *ct = pointer->getTypeReadFacing(op);
if (ct->getMetatype() != TYPE_PTR)
return false;
ptrType = (TypePointer *)ct;
if (ptrType->getPtrTo() != valueType) {
if (!backUpPointer())
return false;
if (ptrType->getPtrTo() != valueType)
return false;
}
for(int4 i=0;i<2;++i) {
if (pointer->isAddrTied() || pointer->loneDescend() == (PcodeOp *)0) break;
if (!backUpPointer())
break;
}
return true;
}
/// If the pointer Varnode is no longer used, recursively check and remove the op producing it,
/// which will be either an INT_ADD or PTRSUB, until the root \b pointer is reached or
/// a Varnode still being used is encountered.
/// \param data is the containing function
void SplitDatatype::RootPointer::freePointerChain(Funcdata &data)
{
while (firstPointer != pointer && !firstPointer->isAddrTied() && firstPointer->hasNoDescend()) {
PcodeOp *tmpOp = firstPointer->getDef();
firstPointer = tmpOp->getIn(0);
data.opDestroy(tmpOp);
}
}
/// \brief Obtain the component of the given data-type at the specified offset
///
/// The data-type must be a composite of some form. This method finds a component data-type
/// starting exactly at the offset, if it exists. The component may be nested more than 1 level deep.
/// If the given data-type is of composite form and has no component defined at the specified offset,
/// an undefined data-type matching the size of the \e hole is returned and \b isHole is set to \b true.
/// \param ct is the given data-type
/// \param offset is the specified offset
/// \param isHole passes back whether a hole in the composite was encountered
/// \return the component data-type at the offset or null, if no such component exists
Datatype *SplitDatatype::getComponent(Datatype *ct,int4 offset,bool &isHole)
{
isHole = false;
Datatype *curType = ct;
uintb curOff = offset;
do {
curType = curType->getSubType(curOff,&curOff);
if (curType == (Datatype *)0) {
int4 hole = ct->getHoleSize(offset);
if (hole > 0) {
if (hole > 8)
hole = 8;
isHole = true;
return types->getBase(hole, TYPE_UNKNOWN);
}
return curType;
}
} while(curOff != 0 || curType->getMetatype() == TYPE_ARRAY);
return curType;
}
/// For the given data-type, taking into account configuration options, return:
/// - -1 for not splittable
/// - 0 for data-type that needs to be split
/// - 1 for data-type that can be split multiple ways
/// \param ct is the given data-type
/// \return the categorization
int4 SplitDatatype::categorizeDatatype(Datatype *ct)
{
Datatype *subType;
switch(ct->getMetatype()) {
case TYPE_ARRAY:
if (!splitArrays) break;
subType = ((TypeArray *)ct)->getBase();
if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1)
return 0;
else
return 1; // unknown1 array does not need splitting and acts as (large) primitive
case TYPE_PARTIALSTRUCT:
subType = ((TypePartialStruct *)ct)->getParent();
if (subType->getMetatype() == TYPE_ARRAY) {
if (!splitArrays) break;
subType = ((TypeArray *)subType)->getBase();
if (subType->getMetatype() != TYPE_UNKNOWN || subType->getSize() != 1)
return 0;
else
return 1; // unknown1 array does not need splitting and acts as (large) primitive
}
else if (subType->getMetatype() == TYPE_STRUCT) {
if (!splitStructures) break;
return 0;
}
break;
case TYPE_STRUCT:
if (!splitStructures) break;
if (ct->numDepend() > 1)
return 0;
break;
case TYPE_INT:
case TYPE_UINT:
case TYPE_UNKNOWN:
return 1;
default:
break;
}
return -1;
}
/// \brief Can the two given data-types be mutually split into matching logical components
///
/// Test if the data-types have components with matching size and offset. If so, the component
/// data-types and offsets are saved to the \b pieces array and \b true is returned.
/// At least one of the data-types must be a partial data-type, but the other may be a
/// TYPE_UNKNOWN, which this method assumes can be split into components of arbitrary size.
/// \param inBase is the data-type coming into the operation
/// \param outBase is the data-type coming out of the operation
/// \param inConstant is \b true if the incoming data-type labels a constant
/// \return \b true if the data-types have compatible components, \b false otherwise
bool SplitDatatype::testDatatypeCompatibility(Datatype *inBase,Datatype *outBase,bool inConstant)
{
int4 inCategory = categorizeDatatype(inBase);
if (inCategory < 0)
return false;
int4 outCategory = categorizeDatatype(outBase);
if (outCategory < 0)
return false;
if (outCategory != 0 && inCategory != 0)
return false;
if (!inConstant && inBase == outBase && inBase->getMetatype() == TYPE_STRUCT)
return false; // Don't split a whole structure unless it is getting initialized from a constant
bool inHole;
bool outHole;
int4 curOff = 0;
int4 sizeLeft = inBase->getSize();
if (inCategory == 1) {
while(sizeLeft > 0) {
Datatype *curOut = getComponent(outBase,curOff,outHole);
if (curOut == (Datatype *)0) return false;
// Throw away primitive data-type if it is a constant
Datatype *curIn = inConstant ? curOut : types->getBase(curOut->getSize(), TYPE_UNKNOWN);
dataTypePieces.emplace_back(curIn,curOut,curOff);
sizeLeft -= curOut->getSize();
curOff += curOut->getSize();
if (outHole) {
if (dataTypePieces.size() == 1)
return false; // Initial offset into structure is at a hole
if (sizeLeft == 0 && dataTypePieces.size() == 2)
return false; // Two pieces, one is a hole. Likely padding.
}
}
}
else if (outCategory == 1) {
while(sizeLeft > 0) {
Datatype *curIn = getComponent(inBase,curOff,inHole);
if (curIn == (Datatype *)0) return false;
Datatype *curOut = types->getBase(curIn->getSize(), TYPE_UNKNOWN);
dataTypePieces.emplace_back(curIn,curOut,curOff);
sizeLeft -= curIn->getSize();
curOff += curIn->getSize();
if (inHole) {
if (dataTypePieces.size() == 1)
return false; // Initial offset into structure is at a hole
if (sizeLeft == 0 && dataTypePieces.size() == 2)
return false; // Two pieces, one is a hole. Likely padding.
}
}
}
else { // Both in and out data-types have components
while(sizeLeft > 0) {
Datatype *curIn = getComponent(inBase,curOff,inHole);
if (curIn == (Datatype *)0) return false;
Datatype *curOut = getComponent(outBase,curOff,outHole);
if (curOut == (Datatype *)0) return false;
while(curIn->getSize() != curOut->getSize()) {
if (curIn->getSize() > curOut->getSize()) {
if (inHole)
curIn = types->getBase(curOut->getSize(), TYPE_UNKNOWN);
else
curIn = getComponent(curIn,0,inHole);
if (curIn == (Datatype *)0) return false;
}
else {
if (outHole)
curOut = types->getBase(curIn->getSize(), TYPE_UNKNOWN);
else
curOut = getComponent(curOut,0,outHole);
if (curOut == (Datatype *)0) return false;
}
}
dataTypePieces.emplace_back(curIn,curOut,curOff);
sizeLeft -= curIn->getSize();
curOff += curIn->getSize();
}
}
return dataTypePieces.size() > 1;
}
/// \brief Test specific constraints for splitting the given COPY operation into pieces
///
/// Don't split function inputs. Don't split hidden COPYs.
/// \return \b true if the split can proceed
bool SplitDatatype::testCopyConstraints(PcodeOp *copyOp)
{
Varnode *inVn = copyOp->getIn(0);
if (inVn->isInput()) return false;
if (inVn->isAddrTied()) {
Varnode *outVn = copyOp->getOut();
if (outVn->isAddrTied() && outVn->getAddr() == inVn->getAddr())
return false;
}
else if (inVn->isWritten() && inVn->getDef()->code() == CPUI_LOAD) {
if (inVn->loneDescend() == copyOp)
return false; // This situation is handled by splitCopy()
}
return true;
}
/// \brief If the given Varnode is an extended precision constant, create split constants
///
/// Look for ZEXT(#c) and CONCAT(#c1,#c2) forms. Try to split into single precision Varnodes.
/// \param vn is the given Varnode
/// \param inVarnodes will contain the split constant Varnodes
/// \return \b true if the Varnode is an extended precision constant and the split is successful
bool SplitDatatype::generateConstants(Varnode *vn,vector<Varnode *> &inVarnodes)
{
if (vn->loneDescend() == (PcodeOp *)0) return false;
if (!vn->isWritten()) return false;
PcodeOp *op = vn->getDef();
OpCode opc = op->code();
if (opc == CPUI_INT_ZEXT) {
if (!op->getIn(0)->isConstant()) return false;
}
else if (opc == CPUI_PIECE) {
if (!op->getIn(0)->isConstant() || !op->getIn(1)->isConstant())
return false;
}
else
return false;
uintb lo,hi;
int4 losize;
int4 fullsize = vn->getSize();
bool isBigEndian = vn->getSpace()->isBigEndian();
if (opc == CPUI_INT_ZEXT) {
hi = 0;
lo = op->getIn(0)->getOffset();
losize = op->getIn(0)->getSize();
}
else {
hi = op->getIn(0)->getOffset();
lo = op->getIn(1)->getOffset();
losize = op->getIn(1)->getSize();
}
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].inType;
if (dt->getSize() > sizeof(uintb)) {
inVarnodes.clear();
return false;
}
int4 sa;
if (isBigEndian)
sa = fullsize - (dataTypePieces[i].offset + dt->getSize());
else
sa = dataTypePieces[i].offset;
uintb val;
if (sa >= losize)
val = hi >> (sa-losize);
else {
val = lo >> sa * 8;
if (sa + dt->getSize() > losize)
val |= hi << (losize - sa)*8;
}
val &= calc_mask(dt->getSize());
Varnode *outVn = data.newConstant(dt->getSize(), val);
inVarnodes.push_back(outVn);
outVn->updateType(dt, false, false);
}
data.opDestroy(op);
return true;
}
/// \brief Assuming the input is a constant, build split constants
///
/// Build constant input Varnodes, extracting the constant value from the given root constant
/// based on the input offsets in \b dataTypePieces.
/// \param rootVn is the given root constant
/// \param inVarnodes is the container for the new Varnodes
void SplitDatatype::buildInConstants(Varnode *rootVn,vector<Varnode *> &inVarnodes)
{
uintb baseVal = rootVn->getOffset();
bool bigEndian = rootVn->getSpace()->isBigEndian();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].inType;
int4 off = dataTypePieces[i].offset;
if (bigEndian)
off = rootVn->getSize() - off - dt->getSize();
uintb val = (baseVal >> (8*off)) & calc_mask(dt->getSize());
Varnode *outVn = data.newConstant(dt->getSize(), val);
inVarnodes.push_back(outVn);
outVn->updateType(dt, false, false);
}
}
/// \brief Build input Varnodes by extracting SUBPIECEs from the root
///
/// Extract different pieces from the given root based on the offsets and
/// input data-types in \b dataTypePieces.
/// \param rootVn is the given root Varnode
/// \param followOp is the point at which the SUBPIECEs should be inserted (before)
/// \param inVarnodes is the container for the new Varnodes
void SplitDatatype::buildInSubpieces(Varnode *rootVn,PcodeOp *followOp,vector<Varnode *> &inVarnodes)
{
if (generateConstants(rootVn, inVarnodes))
return;
Address baseAddr = rootVn->getAddr();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].inType;
int4 off = dataTypePieces[i].offset;
Address addr = baseAddr + off;
addr.renormalize(dt->getSize());
if (addr.isBigEndian())
off = rootVn->getSize() - off - dt->getSize();
PcodeOp *subpiece = data.newOp(2, followOp->getAddr());
data.opSetOpcode(subpiece, CPUI_SUBPIECE);
data.opSetInput(subpiece,rootVn,0);
data.opSetInput(subpiece,data.newConstant(4, off), 1);
Varnode *outVn = data.newVarnodeOut(dt->getSize(), addr, subpiece);
inVarnodes.push_back(outVn);
outVn->updateType(dt, false, false);
data.opInsertBefore(subpiece, followOp);
}
}
/// \brief Build output Varnodes with storage based on the given root
///
/// Extract different pieces from the given root based on the offsets and
/// output data-types in \b dataTypePieces.
/// \param rootVn is the given root Varnode
/// \param inVarnodes is the container for the new Varnodes
void SplitDatatype::buildOutVarnodes(Varnode *rootVn,vector<Varnode *> &outVarnodes)
{
Address baseAddr = rootVn->getAddr();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *dt = dataTypePieces[i].outType;
int4 off = dataTypePieces[i].offset;
Address addr = baseAddr + off;
addr.renormalize(dt->getSize());
Varnode *outVn = data.newVarnode(dt->getSize(), addr, dt);
outVarnodes.push_back(outVn);
}
}
/// \brief Concatenate output Varnodes into given root Varnode
///
/// Insert PIECE operators concatenating all output Varnodes from most significant to least significant
/// producing the root Varnode as the final result.
/// \param rootVn is the given root Varnode
/// \param previousOp is the point at which to insert (after)
/// \param outVarnodes is the list of output Varnodes
void SplitDatatype::buildOutConcats(Varnode *rootVn,PcodeOp *previousOp,vector<Varnode *> &outVarnodes)
{
if (rootVn->hasNoDescend())
return; // Don't need to produce concatenation if its unused
Address baseAddr = rootVn->getAddr();
Varnode *vn;
PcodeOp *concatOp;
PcodeOp *preOp = previousOp;
bool addressTied = rootVn->isAddrTied();
// We are creating a CONCAT stack, mark varnodes appropriately
for(int4 i=0;i<outVarnodes.size();++i) {
if (!addressTied)
outVarnodes[i]->setProtoPartial();
}
if (baseAddr.isBigEndian()) {
vn = outVarnodes[0];
for(int4 i=1;;++i) { // Traverse most to least significant
concatOp = data.newOp(2,previousOp->getAddr());
data.opSetOpcode(concatOp,CPUI_PIECE);
data.opSetInput(concatOp,vn,0); // Most significant
data.opSetInput(concatOp,outVarnodes[i],1); // Least significant
data.opInsertAfter(concatOp, preOp);
if (i + 1 >= outVarnodes.size()) break;
preOp = concatOp;
int4 sz = vn->getSize() + outVarnodes[i]->getSize();
Address addr = baseAddr;
addr.renormalize(sz);
vn = data.newVarnodeOut(sz,addr,concatOp);
if (!addressTied)
vn->setProtoPartial();
}
}
else {
vn = outVarnodes[outVarnodes.size()-1];
for(int4 i=outVarnodes.size()-2;;--i) { // Traverse most to least significant
concatOp = data.newOp(2,previousOp->getAddr());
data.opSetOpcode(concatOp,CPUI_PIECE);
data.opSetInput(concatOp,vn,0); // Most significant
data.opSetInput(concatOp,outVarnodes[i],1); // Least significant
data.opInsertAfter(concatOp, preOp);
if (i<=0) break;
preOp = concatOp;
int4 sz = vn->getSize() + outVarnodes[i]->getSize();
Address addr = outVarnodes[i]->getAddr();
addr.renormalize(sz);
vn = data.newVarnodeOut(sz,addr,concatOp);
if (!addressTied)
vn->setProtoPartial();
}
}
concatOp->setPartialRoot();
data.opSetOutput(concatOp, rootVn);
if (!addressTied)
data.getMerge().registerProtoPartialRoot(rootVn);
}
/// \brief Build a a series of PTRSUB ops at different offsets, given a root pointer
///
/// Offsets and data-types are based on \b dataTypePieces, taking input data-types if \b isInput is \b true,
/// output data-types otherwise. The data-types, relative to the root pointer, are assumed to start at
/// the given base offset.
/// \param rootVn is the root pointer
/// \param ptrType is the pointer data-type associated with the root
/// \param baseOffset is the given base offset
/// \param followOp is the point at which the new PTRSUB ops are inserted (before)
/// \param ptrVarnodes is the container for the new pointer Varnodes
/// \param isInput specifies either input (\b true) or output (\b false) data-types
void SplitDatatype::buildPointers(Varnode *rootVn,TypePointer *ptrType,int4 baseOffset,PcodeOp *followOp,
vector<Varnode *> &ptrVarnodes,bool isInput)
{
Datatype *baseType = ptrType->getPtrTo();
for(int4 i=0;i<dataTypePieces.size();++i) {
Datatype *matchType = isInput ? dataTypePieces[i].inType : dataTypePieces[i].outType;
int4 byteOffset = baseOffset + dataTypePieces[i].offset;
Datatype *tmpType = baseType;
uintb curOff = byteOffset;
Varnode *inPtr = rootVn;
do {
uintb newOff;
PcodeOp *newOp;
Datatype *newType;
if (curOff >= tmpType->getSize()) { // An offset bigger than current data-type indicates an array
newType = tmpType; // The new data-type will be the same as current data-type
intb sNewOff = (intb)curOff % tmpType->getSize(); // But new offset will be old offset modulo data-type size
newOff = (sNewOff < 0) ? (sNewOff + tmpType->getSize()) : sNewOff;
}
else {
newType = tmpType->getSubType(curOff, &newOff);
if (newType == (Datatype *)0) {
// Null should only be returned for a hole in a structure, in which case use precomputed data-type
newType = matchType;
newOff = 0;
}
}
if (tmpType == newType || tmpType->getMetatype() == TYPE_ARRAY) {
int4 finalOffset = (int4)curOff - (int4)newOff;
int4 sz = newType->getSize(); // Element size in bytes
finalOffset = finalOffset / sz; // Number of elements
sz = AddrSpace::byteToAddressInt(sz, ptrType->getWordSize());
newOp = data.newOp(3,followOp->getAddr());
data.opSetOpcode(newOp, CPUI_PTRADD);
data.opSetInput(newOp, inPtr, 0);
Varnode *indexVn = data.newConstant(inPtr->getSize(), finalOffset);
data.opSetInput(newOp, indexVn, 1);
data.opSetInput(newOp, data.newConstant(inPtr->getSize(), sz), 2);
Datatype *indexType = types->getBase(indexVn->getSize(),TYPE_INT);
indexVn->updateType(indexType, false, false);
}
else {
int4 finalOffset = AddrSpace::byteToAddressInt((int4)curOff - (int4)newOff,ptrType->getWordSize());
newOp = data.newOp(2,followOp->getAddr());
data.opSetOpcode(newOp, CPUI_PTRSUB);
data.opSetInput(newOp, inPtr, 0);
data.opSetInput(newOp, data.newConstant(inPtr->getSize(), finalOffset), 1);
}
inPtr = data.newUniqueOut(inPtr->getSize(), newOp);
Datatype *tmpPtr = types->getTypePointerStripArray(ptrType->getSize(), newType, ptrType->getWordSize());
inPtr->updateType(tmpPtr, false, false);
data.opInsertBefore(newOp, followOp);
tmpType = newType;
curOff = newOff;
} while(tmpType->getSize() > matchType->getSize());
ptrVarnodes.push_back(inPtr);
}
}
/// Iterate through descendants of the given Varnode, looking for arithmetic ops.
/// \param vn is the given Varnode
/// \return \b true if the Varnode has an arithmetic op as a descendant
bool SplitDatatype::isArithmeticInput(Varnode *vn)
{
list<PcodeOp *>::const_iterator iter = vn->beginDescend();
while(iter != vn->endDescend()) {
PcodeOp *op = *iter;
if (op->getOpcode()->isArithmeticOp())
return true;
++iter;
}
return false;
}
/// Check if the defining PcodeOp is arithmetic.
/// \param vn is the given Varnode
/// \return \b true if the defining op is arithemetic
bool SplitDatatype::isArithmeticOutput(Varnode *vn)
{
if (!vn->isWritten())
return false;
return vn->getDef()->getOpcode()->isArithmeticOp();
}
SplitDatatype::SplitDatatype(Funcdata &func)
: data(func)
{
Architecture *glb = func.getArch();
types = glb->types;
splitStructures = (glb->split_datatype_config & OptionSplitDatatypes::option_struct) != 0;
splitArrays = (glb->split_datatype_config & OptionSplitDatatypes::option_array) != 0;
}
/// Based on the input and output data-types, determine if and how the given COPY operation
/// should be split into pieces. Then if possible, perform the split.
/// \param copyOp is the given COPY
/// \param inType is the data-type of the COPY input
/// \param outType is the data-type of the COPY output
/// \return \b true if the split was performed
bool SplitDatatype::splitCopy(PcodeOp *copyOp,Datatype *inType,Datatype *outType)
{
if (!testCopyConstraints(copyOp))
return false;
Varnode *inVn = copyOp->getIn(0);
if (!testDatatypeCompatibility(inType, outType, inVn->isConstant()))
return false;
if (isArithmeticOutput(inVn)) // Sanity check on input
return false;
Varnode *outVn = copyOp->getOut();
if (isArithmeticInput(outVn)) // Sanity check on output
return false;
vector<Varnode *> inVarnodes;
vector<Varnode *> outVarnodes;
if (inVn->isConstant())
buildInConstants(inVn,inVarnodes);
else
buildInSubpieces(inVn,copyOp,inVarnodes);
buildOutVarnodes(outVn,outVarnodes);
buildOutConcats(outVn,copyOp,outVarnodes);
for(int4 i=0;i<inVarnodes.size();++i) {
PcodeOp *newCopyOp = data.newOp(1,copyOp->getAddr());
data.opSetOpcode(newCopyOp,CPUI_COPY);
data.opSetInput(newCopyOp,inVarnodes[i],0);
data.opSetOutput(newCopyOp,outVarnodes[i]);
data.opInsertBefore(newCopyOp, copyOp);
}
data.opDestroy(copyOp);
return true;
}
/// Based on the LOAD data-type, determine if the given LOAD can be split into smaller LOADs.
/// Then, if possible, perform the split. The input data-type describes the size and composition of
/// the value being loaded. Check for the special case where, the LOAD output is a lone input to a COPY,
/// and split the outputs of the COPY as well.
/// \param loadOp is the given LOAD to split
/// \param inType is the data-type associated with the value being loaded
/// \return \b true if the split was performed
bool SplitDatatype::splitLoad(PcodeOp *loadOp,Datatype *inType)
{
Varnode *outVn = loadOp->getOut();
PcodeOp *copyOp = (PcodeOp *)0;
if (!outVn->isAddrTied())
copyOp = outVn->loneDescend();
if (copyOp != (PcodeOp *)0) {
OpCode opc = copyOp->code();
if (opc == CPUI_STORE) return false; // Handled by RuleSplitStore
if (opc != CPUI_COPY)
copyOp = (PcodeOp *)0;
}
if (copyOp != (PcodeOp *)0)
outVn = copyOp->getOut();
Datatype *outType = outVn->getTypeDefFacing();
if (!testDatatypeCompatibility(inType, outType, false))
return false;
if (isArithmeticInput(outVn)) // Sanity check on output
return false;
RootPointer root;
if (!root.find(loadOp,inType))
return false;
vector<Varnode *> ptrVarnodes;
vector<Varnode *> outVarnodes;
PcodeOp *insertPoint = (copyOp == (PcodeOp *)0) ? loadOp:copyOp;
buildPointers(root.pointer, root.ptrType, root.baseOffset, loadOp, ptrVarnodes, true);
buildOutVarnodes(outVn, outVarnodes);
buildOutConcats(outVn, insertPoint, outVarnodes);
AddrSpace *spc = loadOp->getIn(0)->getSpaceFromConst();
for(int4 i=0;i<ptrVarnodes.size();++i) {
PcodeOp *newLoadOp = data.newOp(2,insertPoint->getAddr());
data.opSetOpcode(newLoadOp,CPUI_LOAD);
data.opSetInput(newLoadOp,data.newVarnodeSpace(spc),0);
data.opSetInput(newLoadOp,ptrVarnodes[i],1);
data.opSetOutput(newLoadOp,outVarnodes[i]);
data.opInsertBefore(newLoadOp, insertPoint);
}
if (copyOp != (PcodeOp *)0)
data.opDestroy(copyOp);
data.opDestroy(loadOp);
root.freePointerChain(data);
return true;
}
/// Based on the STORE data-type, determine if the given STORE can be split into smaller STOREs.
/// Then, if possible, perform the split. The output data-type describes the size and composition of
/// the value being stored.
/// \param storeOp is the given STORE to split
/// \param outType is the data-type associated with the value being stored
/// \return \b true if the split was performed
bool SplitDatatype::splitStore(PcodeOp *storeOp,Datatype *outType)
{
Varnode *inVn = storeOp->getIn(2);
PcodeOp *loadOp = (PcodeOp *)0;
Datatype *inType = (Datatype *)0;
if (inVn->isWritten() && inVn->getDef()->code() == CPUI_LOAD && inVn->loneDescend() == storeOp) {
loadOp = inVn->getDef();
inType = getValueDatatype(loadOp, inVn->getSize(), data.getArch()->types);
if (inType == (Datatype *)0)
loadOp = (PcodeOp *)0;
}
if (inType == (Datatype *)0) {
inType = inVn->getTypeReadFacing(storeOp);
}
if (!testDatatypeCompatibility(inType, outType, inVn->isConstant())) {
if (loadOp != (PcodeOp *)0) {
// If not compatible while considering the LOAD, check again, but without the LOAD
loadOp = (PcodeOp *)0;
inType = inVn->getTypeReadFacing(storeOp);
dataTypePieces.clear();
if (!testDatatypeCompatibility(inType, outType, inVn->isConstant()))
return false;
}
else
return false;
}
if (isArithmeticOutput(inVn)) // Sanity check
return false;
RootPointer storeRoot;
if (!storeRoot.find(storeOp,outType))
return false;
RootPointer loadRoot;
if (loadOp != (PcodeOp *)0) {
if (!loadRoot.find(loadOp,inType))
return false;
}
vector<Varnode *> inVarnodes;
if (inVn->isConstant())
buildInConstants(inVn,inVarnodes);
else if (loadOp != (PcodeOp *)0) {
vector<Varnode *> loadPtrs;
buildPointers(loadRoot.pointer, loadRoot.ptrType, loadRoot.baseOffset, loadOp, loadPtrs, true);
AddrSpace *loadSpace = loadOp->getIn(0)->getSpaceFromConst();
for(int4 i=0;i<loadPtrs.size();++i) {
PcodeOp *newLoadOp = data.newOp(2,loadOp->getAddr());
data.opSetOpcode(newLoadOp,CPUI_LOAD);
data.opSetInput(newLoadOp,data.newVarnodeSpace(loadSpace),0);
data.opSetInput(newLoadOp,loadPtrs[i],1);
Datatype *dt = dataTypePieces[i].inType;
Varnode *vn = data.newUniqueOut(dt->getSize(), newLoadOp);
vn->updateType(dt, false, false);
inVarnodes.push_back(vn);
data.opInsertBefore(newLoadOp, loadOp);
}
}
else
buildInSubpieces(inVn,storeOp,inVarnodes);
vector<Varnode *> storePtrs;
buildPointers(storeRoot.pointer, storeRoot.ptrType, storeRoot.baseOffset, storeOp, storePtrs, false);
AddrSpace *storeSpace = storeOp->getIn(0)->getSpaceFromConst();
// Preserve original STORE object, so that INDIRECT references are still valid
// but convert it into the first of the smaller STOREs
data.opSetInput(storeOp,storePtrs[0],1);
data.opSetInput(storeOp,inVarnodes[0],2);
PcodeOp *lastStore = storeOp;
for(int4 i=1;i<storePtrs.size();++i) {
PcodeOp *newStoreOp = data.newOp(3,storeOp->getAddr());
data.opSetOpcode(newStoreOp,CPUI_STORE);
data.opSetInput(newStoreOp,data.newVarnodeSpace(storeSpace),0);
data.opSetInput(newStoreOp,storePtrs[i],1);
data.opSetInput(newStoreOp,inVarnodes[i],2);
data.opInsertAfter(newStoreOp, lastStore);
lastStore = newStoreOp;
}
if (loadOp != (PcodeOp *)0) {
data.opDestroy(loadOp);
loadRoot.freePointerChain(data);
}
storeRoot.freePointerChain(data);
return true;
}
/// \brief Get a data-type description of the value being pointed at by the given LOAD or STORE
///
/// Take the data-type of the pointer and construct the data-type of the thing being pointed at
/// so that it matches a specific size. This takes into account TypePointerRel and can produce
/// TypePartialStruct in order to match the size. If no interpretation of the value as a
/// splittable data-type is possible, null is returned.
/// \param loadStore is the given LOAD or STORE
/// \param size is the number of bytes in the value being pointed at
/// \param tlst is the TypeFactory for constructing partial data-types if necessary
/// \return the data-type description of the value or null
Datatype *SplitDatatype::getValueDatatype(PcodeOp *loadStore,int4 size,TypeFactory *tlst)
{
Datatype *resType;
Datatype *ptrType = loadStore->getIn(1)->getTypeReadFacing(loadStore);
if (ptrType->getMetatype() != TYPE_PTR)
return (Datatype *)0;
int4 baseOffset;
if (ptrType->isPointerRel()) {
TypePointerRel *ptrRel = (TypePointerRel *)ptrType;
resType = ptrRel->getParent();
baseOffset = ptrRel->getPointerOffset();
baseOffset = AddrSpace::addressToByteInt(baseOffset, ptrRel->getWordSize());
}
else {
resType = ((TypePointer *)ptrType)->getPtrTo();
baseOffset = 0;
}
type_metatype metain = resType->getMetatype();
if (metain != TYPE_STRUCT && metain == TYPE_ARRAY)
return (Datatype *)0;
return tlst->getExactPiece(resType, baseOffset, size);
}
/// \brief Create and return a placeholder associated with the given Varnode
///
/// Add the placeholder to the worklist if it hasn't been visited before
@ -1769,7 +2568,7 @@ TransformVar *SubfloatFlow::setReplacement(Varnode *vn)
if (vn->isAddrForce() && (vn->getSize() != precision))
return (TransformVar *)0;
if (vn->isTypeLock()) {
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
int4 sz = vn->getType()->getSize();
if (sz != precision)
return (TransformVar *)0;
@ -2047,8 +2846,9 @@ TransformVar *LaneDivide::setReplacement(Varnode *vn,int4 numLanes,int4 skipLane
// if (vn->isFree())
// return (TransformVar *)0;
if (vn->isTypeLock())
return (TransformVar *)0;
if (vn->isTypeLock() && vn->getType()->getMetatype() != TYPE_PARTIALSTRUCT) {
return (TransformVar *)0;
}
vn->setMark();
TransformVar *res = newSplit(vn, description, numLanes, skipLanes);

View file

@ -145,6 +145,65 @@ public:
bool doTrace(void); ///< Trace split through data-flow, constructing transform
};
/// \brief Split a p-code COPY, LOAD, or STORE op based on underlying composite data-type
///
/// During the cleanup phase, if a COPY, LOAD, or STORE occurs on a partial structure or array
/// (TypePartialStruct), try to break it up into multiple operations that each act on logical component
/// of the structure or array.
class SplitDatatype {
/// \brief A helper class describing a pair of matching data-types for the split
///
/// Data-types being copied simultaneously are split up into these matching pairs.
class Component {
friend class SplitDatatype;
Datatype *inType; ///< Data-type coming into the logical COPY operation
Datatype *outType; ///< Data-type coming out of the logical COPY operation
int4 offset; ///< Offset of this logical piece within the whole
public:
Component(Datatype *in,Datatype *out,int4 off) { inType=in; outType=out; offset=off; } ///< Constructor
};
/// \brief A helper class describing the pointer being passed to a LOAD or STORE
///
/// It makes distinction between the immediate pointer to the LOAD or STORE and a \e root pointer
/// to the main structure or array, which the immediate pointer may be at an offset from.
class RootPointer {
friend class SplitDatatype;
PcodeOp *loadStore; ///< LOAD or STORE op
TypePointer *ptrType; ///< Base pointer data-type of LOAD or STORE
Varnode *firstPointer; ///< Direct pointer input for LOAD or STORE
Varnode *pointer; ///< The root pointer
int4 baseOffset; ///< Offset of the LOAD or STORE relative to root pointer
bool backUpPointer(void); ///< Follow flow of \b pointer back thru INT_ADD or PTRSUB
public:
bool find(PcodeOp *op,Datatype *valueType); ///< Locate root pointer for underlying LOAD or STORE
void freePointerChain(Funcdata &data); ///< Remove unused pointer calculations
};
Funcdata &data; ///< The containing function
TypeFactory *types; ///< The data-type container
vector<Component> dataTypePieces; ///< Sequence of all data-type pairs being copied
bool splitStructures; ///< Whether or not structures should be split
bool splitArrays; ///< Whether or not arrays should be split
Datatype *getComponent(Datatype *ct,int4 offset,bool &isHole);
int4 categorizeDatatype(Datatype *ct); ///< Categorize if and how data-type should be split
bool testDatatypeCompatibility(Datatype *inBase,Datatype *outBase,bool inConstant);
bool testCopyConstraints(PcodeOp *copyOp);
bool generateConstants(Varnode *vn,vector<Varnode *> &inVarnodes);
void buildInConstants(Varnode *rootVn,vector<Varnode *> &inVarnodes);
void buildInSubpieces(Varnode *rootVn,PcodeOp *followOp,vector<Varnode *> &inVarnodes);
void buildOutVarnodes(Varnode *rootVn,vector<Varnode *> &outVarnodes);
void buildOutConcats(Varnode *rootVn,PcodeOp *previousOp,vector<Varnode *> &outVarnodes);
void buildPointers(Varnode *rootVn,TypePointer *ptrType,int4 baseOffset,PcodeOp *followOp,
vector<Varnode *> &ptrVarnodes,bool isInput);
static bool isArithmeticInput(Varnode *vn); ///< Is \b this the input to an arithmetic operation
static bool isArithmeticOutput(Varnode *vn); ///< Is \b this defined by an arithmetic operation
public:
SplitDatatype(Funcdata &func); ///< Constructor
bool splitCopy(PcodeOp *copyOp,Datatype *inType,Datatype *outType); ///< Split a COPY operation
bool splitLoad(PcodeOp *loadOp,Datatype *inType); ///< Split a LOAD operation
bool splitStore(PcodeOp *storeOp,Datatype *outType); ///< Split a STORE operation
static Datatype *getValueDatatype(PcodeOp *loadStore,int4 size,TypeFactory *tlst);
};
/// \brief Class for tracing changes of precision in floating point variables
///
/// It follows the flow of a logical lower precision value stored in higher precision locations

View file

@ -1011,6 +1011,13 @@ Datatype *TypeArray::getSubType(uintb off,uintb *newoff) const
return arrayof;
}
int4 TypeArray::getHoleSize(int4 off) const
{
int4 newOff = off % arrayof->getSize();
return arrayof->getHoleSize(newOff);
}
/// Given some contiguous piece of the array, figure out which element overlaps
/// the piece, and pass back the element index and the renormalized offset
/// \param off is the offset into the array
@ -1403,6 +1410,23 @@ Datatype *TypeStruct::getSubType(uintb off,uintb *newoff) const
return curfield.type;
}
int4 TypeStruct::getHoleSize(int4 off) const
{
int4 i = getLowerBoundField(off);
if (i >= 0) {
const TypeField &curfield( field[i] );
int4 newOff = off - curfield.offset;
if (newOff < curfield.type->getSize())
return curfield.type->getHoleSize(newOff);
}
i += 1; // advance to first field following off
if (i < field.size()) {
return field[i].offset - off; // Distance to following field
}
return getSize() - off; // Distance to end of structure
}
Datatype *TypeStruct::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
{
@ -1827,12 +1851,14 @@ const TypeField *TypeUnion::resolveTruncation(int4 offset,PcodeOp *op,int4 slot,
{
Funcdata *fd = op->getParent()->getFuncdata();
const ResolvedUnion *res = fd->getUnionField(this, op, slot);
if (res != (ResolvedUnion *)0 && res->getFieldNum() >= 0) {
const TypeField *field = getField(res->getFieldNum());
newoff = offset - field->offset;
return field;
if (res != (ResolvedUnion *)0) {
if (res->getFieldNum() >= 0) {
const TypeField *field = getField(res->getFieldNum());
newoff = offset - field->offset;
return field;
}
}
if (op->code() == CPUI_SUBPIECE && slot == 1) { // The slot is artificial in this case
else if (op->code() == CPUI_SUBPIECE && slot == 1) { // The slot is artificial in this case
ScoreUnionFields scoreFields(*fd->getArch()->types,this,offset,op);
fd->setUnionField(this, op, slot, scoreFields.getResult());
if (scoreFields.getResult().getFieldNum() >= 0) {
@ -1927,8 +1953,28 @@ void TypePartialStruct::printRaw(ostream &s) const
Datatype *TypePartialStruct::getSubType(uintb off,uintb *newoff) const
{
int4 sizeLeft = (size - (int4)off);
off += offset;
return container->getSubType(off, newoff);
Datatype *ct = container;
do {
ct = ct->getSubType(off, newoff);
if (ct == (Datatype *)0)
break;
off = *newoff;
// Component can extend beyond range of this partial, in which case we go down another level
} while(ct->getSize() - (int4)off > sizeLeft);
return ct;
}
int4 TypePartialStruct::getHoleSize(int4 off) const
{
int4 sizeLeft = size-off;
off += offset;
int4 res = container->getHoleSize(off);
if (res > sizeLeft)
res = sizeLeft;
return res;
}
int4 TypePartialStruct::compare(const Datatype &op,int4 level) const
@ -3566,7 +3612,10 @@ TypePointer *TypeFactory::getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc
Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size)
{
if (offset + size > ct->getSize())
return (Datatype *)0;
Datatype *lastType = (Datatype *)0;
uintb lastOff = 0;
uintb curOff = offset;
do {
if (ct->getSize() <= size) {
@ -3578,11 +3627,12 @@ Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size)
return getTypePartialUnion((TypeUnion *)ct, curOff, size);
}
lastType = ct;
lastOff = curOff;
ct = ct->getSubType(curOff,&curOff);
} while(ct != (Datatype *)0);
// If we reach here, lastType is bigger than size
if (lastType->getMetatype() == TYPE_STRUCT || lastType->getMetatype() == TYPE_ARRAY)
return getTypePartialStruct(lastType, curOff, size);
return getTypePartialStruct(lastType, lastOff, size);
return (Datatype *)0;
}

View file

@ -209,6 +209,7 @@ public:
virtual Datatype *getSubType(uintb off,uintb *newoff) const; ///< Recover component data-type one-level down
virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
virtual int4 getHoleSize(int4 off) const { return 0; } ///< Get number of bytes at the given offset that are padding
virtual int4 numDepend(void) const { return 0; } ///< Return number of component sub-types
virtual Datatype *getDepend(int4 index) const { return (Datatype *)0; } ///< Return the i-th component sub-type
virtual void printNameBase(ostream &s) const { if (!name.empty()) s<<name[0]; } ///< Print name as short prefix
@ -386,6 +387,7 @@ public:
Datatype *getSubEntry(int4 off,int4 sz,int4 *newoff,int4 *el) const; ///< Figure out what a byte range overlaps
virtual void printRaw(ostream &s) const;
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual int4 getHoleSize(int4 off) const;
virtual int4 numDepend(void) const { return 1; }
virtual Datatype *getDepend(int4 index) const { return arrayof; }
virtual void printNameBase(ostream &s) const { s << 'a'; arrayof->printNameBase(s); }
@ -446,6 +448,7 @@ public:
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
virtual int4 getHoleSize(int4 off) const;
virtual int4 numDepend(void) const { return field.size(); }
virtual Datatype *getDepend(int4 index) const { return field[index].type; }
virtual int4 compare(const Datatype &op,int4 level) const; // For tree structure
@ -498,6 +501,7 @@ public:
Datatype *getParent(void) const { return container; } ///< Get the data-type containing \b this piece
virtual void printRaw(ostream &s) const;
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual int4 getHoleSize(int4 off) const;
virtual int4 compare(const Datatype &op,int4 level) const;
virtual int4 compareDependency(const Datatype &op) const;
virtual Datatype *clone(void) const { return new TypePartialStruct(*this); }

View file

@ -1102,7 +1102,7 @@ TypeOpIntAdd::TypeOpIntAdd(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_ADD,"+",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntAdd();
}
@ -1253,7 +1253,7 @@ TypeOpIntSub::TypeOpIntSub(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_SUB,"-",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntSub();
}
@ -1267,6 +1267,7 @@ TypeOpIntCarry::TypeOpIntCarry(TypeFactory *t)
: TypeOpFunc(t,CPUI_INT_CARRY,"CARRY",TYPE_BOOL,TYPE_UINT)
{
opflags = PcodeOp::binary;
addlflags = arithmetic_op;
behave = new OpBehaviorIntCarry();
}
@ -1297,6 +1298,7 @@ TypeOpIntSborrow::TypeOpIntSborrow(TypeFactory *t)
: TypeOpFunc(t,CPUI_INT_SBORROW,"SBORROW",TYPE_BOOL,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = arithmetic_op;
behave = new OpBehaviorIntSborrow();
}
@ -1312,7 +1314,7 @@ TypeOpInt2Comp::TypeOpInt2Comp(TypeFactory *t)
: TypeOpUnary(t,CPUI_INT_2COMP,"-",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::unary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorInt2Comp();
}
@ -1326,7 +1328,7 @@ TypeOpIntNegate::TypeOpIntNegate(TypeFactory *t)
: TypeOpUnary(t,CPUI_INT_NEGATE,"~",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::unary;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntNegate();
}
@ -1340,7 +1342,7 @@ TypeOpIntXor::TypeOpIntXor(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_XOR,"^",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntXor();
}
@ -1368,7 +1370,7 @@ TypeOpIntAnd::TypeOpIntAnd(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_AND,"&",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntAnd();
}
@ -1396,7 +1398,7 @@ TypeOpIntOr::TypeOpIntOr(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_OR,"|",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = logical_op | inherits_sign;
behave = new OpBehaviorIntOr();
}
@ -1539,7 +1541,7 @@ TypeOpIntMult::TypeOpIntMult(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_MULT,"*",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntMult();
}
@ -1553,7 +1555,7 @@ TypeOpIntDiv::TypeOpIntDiv(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_DIV,"/",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntDiv();
}
@ -1573,7 +1575,7 @@ TypeOpIntSdiv::TypeOpIntSdiv(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_SDIV,"/",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign;
addlflags = arithmetic_op | inherits_sign;
behave = new OpBehaviorIntSdiv();
}
@ -1593,7 +1595,7 @@ TypeOpIntRem::TypeOpIntRem(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_REM,"%",TYPE_UINT,TYPE_UINT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign | inherits_sign_zero;
addlflags = arithmetic_op | inherits_sign | inherits_sign_zero;
behave = new OpBehaviorIntRem();
}
@ -1613,7 +1615,7 @@ TypeOpIntSrem::TypeOpIntSrem(TypeFactory *t)
: TypeOpBinary(t,CPUI_INT_SREM,"%",TYPE_INT,TYPE_INT)
{
opflags = PcodeOp::binary;
addlflags = inherits_sign | inherits_sign_zero;
addlflags = arithmetic_op | inherits_sign | inherits_sign_zero;
behave = new OpBehaviorIntSrem();
}
@ -1633,6 +1635,7 @@ TypeOpBoolNegate::TypeOpBoolNegate(TypeFactory *t)
: TypeOpUnary(t,CPUI_BOOL_NEGATE,"!",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::unary | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolNegate();
}
@ -1640,6 +1643,7 @@ TypeOpBoolXor::TypeOpBoolXor(TypeFactory *t)
: TypeOpBinary(t,CPUI_BOOL_XOR,"^^",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::binary | PcodeOp::commutative | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolXor();
}
@ -1647,6 +1651,7 @@ TypeOpBoolAnd::TypeOpBoolAnd(TypeFactory *t)
: TypeOpBinary(t,CPUI_BOOL_AND,"&&",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::binary | PcodeOp::commutative | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolAnd();
}
@ -1654,6 +1659,7 @@ TypeOpBoolOr::TypeOpBoolOr(TypeFactory *t)
: TypeOpBinary(t,CPUI_BOOL_OR,"||",TYPE_BOOL,TYPE_BOOL)
{
opflags = PcodeOp::binary | PcodeOp::commutative | PcodeOp::booloutput;
addlflags = logical_op;
behave = new OpBehaviorBoolOr();
}
@ -1661,6 +1667,7 @@ TypeOpFloatEqual::TypeOpFloatEqual(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_EQUAL,"==",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatEqual(trans);
}
@ -1668,6 +1675,7 @@ TypeOpFloatNotEqual::TypeOpFloatNotEqual(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_NOTEQUAL,"!=",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatNotEqual(trans);
}
@ -1675,6 +1683,7 @@ TypeOpFloatLess::TypeOpFloatLess(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_LESS,"<",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatLess(trans);
}
@ -1682,6 +1691,7 @@ TypeOpFloatLessEqual::TypeOpFloatLessEqual(TypeFactory *t,const Translate *trans
: TypeOpBinary(t,CPUI_FLOAT_LESSEQUAL,"<=",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::booloutput;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatLessEqual(trans);
}
@ -1689,6 +1699,7 @@ TypeOpFloatNan::TypeOpFloatNan(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_NAN,"NAN",TYPE_BOOL,TYPE_FLOAT)
{
opflags = PcodeOp::unary | PcodeOp::booloutput;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatNan(trans);
}
@ -1696,6 +1707,7 @@ TypeOpFloatAdd::TypeOpFloatAdd(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_ADD,"+",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatAdd(trans);
}
@ -1703,6 +1715,7 @@ TypeOpFloatDiv::TypeOpFloatDiv(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_DIV,"/",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatDiv(trans);
}
@ -1710,6 +1723,7 @@ TypeOpFloatMult::TypeOpFloatMult(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_MULT,"*",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary | PcodeOp::commutative;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatMult(trans);
}
@ -1717,6 +1731,7 @@ TypeOpFloatSub::TypeOpFloatSub(TypeFactory *t,const Translate *trans)
: TypeOpBinary(t,CPUI_FLOAT_SUB,"-",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::binary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatSub(trans);
}
@ -1724,6 +1739,7 @@ TypeOpFloatNeg::TypeOpFloatNeg(TypeFactory *t,const Translate *trans)
: TypeOpUnary(t,CPUI_FLOAT_NEG,"-",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatNeg(trans);
}
@ -1731,6 +1747,7 @@ TypeOpFloatAbs::TypeOpFloatAbs(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_ABS,"ABS",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatAbs(trans);
}
@ -1738,6 +1755,7 @@ TypeOpFloatSqrt::TypeOpFloatSqrt(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_SQRT,"SQRT",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatSqrt(trans);
}
@ -1745,6 +1763,7 @@ TypeOpFloatInt2Float::TypeOpFloatInt2Float(TypeFactory *t,const Translate *trans
: TypeOpFunc(t,CPUI_FLOAT_INT2FLOAT,"INT2FLOAT",TYPE_FLOAT,TYPE_INT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatInt2Float(trans);
}
@ -1752,6 +1771,7 @@ TypeOpFloatFloat2Float::TypeOpFloatFloat2Float(TypeFactory *t,const Translate *t
: TypeOpFunc(t,CPUI_FLOAT_FLOAT2FLOAT,"FLOAT2FLOAT",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatFloat2Float(trans);
}
@ -1759,6 +1779,7 @@ TypeOpFloatTrunc::TypeOpFloatTrunc(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_TRUNC,"TRUNC",TYPE_INT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatTrunc(trans);
}
@ -1766,6 +1787,7 @@ TypeOpFloatCeil::TypeOpFloatCeil(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_CEIL,"CEIL",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatCeil(trans);
}
@ -1773,6 +1795,7 @@ TypeOpFloatFloor::TypeOpFloatFloor(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_FLOOR,"FLOOR",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatFloor(trans);
}
@ -1780,6 +1803,7 @@ TypeOpFloatRound::TypeOpFloatRound(TypeFactory *t,const Translate *trans)
: TypeOpFunc(t,CPUI_FLOAT_ROUND,"ROUND",TYPE_FLOAT,TYPE_FLOAT)
{
opflags = PcodeOp::unary;
addlflags = floatingpoint_op;
behave = new OpBehaviorFloatRound(trans);
}
@ -1794,15 +1818,6 @@ Datatype *TypeOpMulti::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn
int4 inslot,int4 outslot)
{
if ((inslot!=-1)&&(outslot!=-1)) {
if (invn == outvn && outvn->getTempType()->needsResolution()) {
// If same Varnode occupies two input slots of the MULTIEQUAL
// the second input slot should inherit the resolution of the first
Funcdata *fd = op->getParent()->getFuncdata();
Datatype *unionType = outvn->getTempType();
const ResolvedUnion *res = fd->getUnionField(unionType, op, inslot);
if (res != (const ResolvedUnion *)0)
fd->setUnionField(unionType, op, outslot, *res);
}
return (Datatype *)0; // Must propagate input <-> output
}
Datatype *newtype;
@ -2025,6 +2040,7 @@ TypeOpPtradd::TypeOpPtradd(TypeFactory *t) : TypeOp(t,CPUI_PTRADD,"+")
{
opflags = PcodeOp::ternary | PcodeOp::nocollapse;
addlflags = arithmetic_op;
behave = new OpBehavior(CPUI_PTRADD,false); // Dummy behavior
}
@ -2094,6 +2110,7 @@ TypeOpPtrsub::TypeOpPtrsub(TypeFactory *t) : TypeOp(t,CPUI_PTRSUB,"->")
// But the typing information doesn't really
// allow this to be commutative.
opflags = PcodeOp::binary|PcodeOp::nocollapse;
addlflags = arithmetic_op;
behave = new OpBehavior(CPUI_PTRSUB,false); // Dummy behavior
}

View file

@ -41,7 +41,10 @@ public:
enum {
inherits_sign = 1, ///< Operator token inherits signedness from its inputs
inherits_sign_zero = 2, ///< Only inherits sign from first operand, not the second
shift_op = 4 ///< Shift operation
shift_op = 4, ///< Shift operation
arithmetic_op = 8, ///< Operation involving addition, multiplication, or division
logical_op = 0x10, ///< Logical operation
floatingpoint_op = 0x20 ///< Floating-point operation
};
protected:
TypeFactory *tlst; ///< Pointer to data-type factory
@ -114,6 +117,15 @@ public:
/// \brief Return \b true if the op-code is a shift (INT_LEFT, INT_RIGHT, or INT_SRIGHT)
bool isShiftOp(void) const { return ((addlflags & shift_op)!=0); }
/// \brief Return \b true if the opcode is INT_ADD, INT_MULT, INT_DIV, INT_REM, or other arithmetic op
bool isArithmeticOp(void) const { return ((addlflags & arithmetic_op)!=0); }
/// \brief Return \b true if the opcode is INT_AND, INT_OR, INT_XOR, or other logical op
bool isLogicalOp(void) const { return ((addlflags & logical_op)!=0); }
/// \brief Return \b true if the opcode is FLOAT_ADD, FLOAT_MULT, or other floating-point operation
bool isFloatingPointOp(void) const { return ((addlflags & floatingpoint_op)!=0); }
/// \brief Find the minimal (or suggested) data-type of an output to \b this op-code
virtual Datatype *getOutputLocal(const PcodeOp *op) const;

View file

@ -70,6 +70,23 @@ void VariableGroup::removePiece(VariablePiece *piece)
// We currently don't adjust size here as removePiece is currently only called during clean up
}
/// Every VariablePiece in the given group is moved into \b this and the VariableGroup object is deleted.
/// There must be no matching VariablePieces with the same size and offset between the two groups
/// or a LowlevelError exception is thrown.
/// \param op2 is the given VariableGroup to merge into \b this
void VariableGroup::combineGroups(VariableGroup *op2)
{
set<VariablePiece *,VariableGroup::PieceCompareByOffset>::iterator iter = op2->pieceSet.begin();
set<VariablePiece *,VariableGroup::PieceCompareByOffset>::iterator enditer = op2->pieceSet.end();
while(iter != enditer) {
VariablePiece *piece = *iter;
++iter;
piece->transferGroup(this);
}
}
/// Construct piece given a HighVariable and its position within the whole.
/// If \b this is the first piece in the group, allocate a new VariableGroup object.
/// \param h is the given HighVariable to treat as a piece
@ -164,15 +181,15 @@ void VariablePiece::transferGroup(VariableGroup *newGroup)
newGroup->addPiece(this);
}
/// Combine the VariableGroup associated with the given other VariablePiece and the VariableGroup of \b this
/// into one group. Combining in this way requires pieces of the same size and offset to be merged. This
/// Combine the VariableGroup associated \b this and the given other VariablePiece into one group.
/// Offsets are adjusted so that \b this and the other VariablePiece have the same offset.
/// Combining in this way requires pieces of the same size and offset to be merged. This
/// method does not do the merging but passes back a list of HighVariable pairs that need to be merged.
/// The first element in the pair will have its VariablePiece in the new group, and the second element
/// will have its VariablePiece freed in preparation for the merge.
/// Offsets are adjusted so that \b this and the given other piece have the same offset;
/// \param op2 is the given other VariablePiece
/// \param mergePairs passes back the collection of HighVariable pairs that must be merged
void VariablePiece::combineOtherGroup(VariablePiece *op2,vector<HighVariable *> &mergePairs)
void VariablePiece::mergeGroups(VariablePiece *op2,vector<HighVariable *> &mergePairs)
{
int4 diff = groupOffset - op2->groupOffset; // Add to op2, or subtract from this
@ -523,6 +540,17 @@ void HighVariable::finalizeDatatype(Datatype *tp)
{
type = tp;
if (type->hasStripped()) {
if (type->getMetatype() == TYPE_PARTIALUNION) {
if (symbol != (Symbol *)0 && symboloffset != -1) {
type_metatype meta = symbol->getType()->getMetatype();
if (meta != TYPE_STRUCT && meta != TYPE_UNION) // If partial union does not have a bigger backing symbol
type = type->getStripped(); // strip the partial union
}
}
else
type = type->getStripped();
}
highflags |= type_finalized;
}
@ -557,7 +585,11 @@ void HighVariable::groupWith(int4 off,HighVariable *hi2)
hi2->piece = new VariablePiece(hi2,hi2Off,this);
}
else {
throw LowlevelError("Cannot group HighVariables that are already grouped");
int4 offDiff = hi2->piece->getOffset() + off - piece->getOffset();
if (offDiff != 0)
piece->getGroup()->adjustOffsets(offDiff);
hi2->piece->getGroup()->combineGroups(piece->getGroup());
hi2->piece->markIntersectionDirty();
}
}
@ -657,7 +689,7 @@ void HighVariable::merge(HighVariable *tv2,HighIntersectTest *testCache,bool iss
if (isspeculative)
throw LowlevelError("Trying speculatively merge variables in separate groups");
vector<HighVariable *> mergePairs;
piece->combineOtherGroup(tv2->piece, mergePairs);
piece->mergeGroups(tv2->piece, mergePairs);
for(int4 i=0;i<mergePairs.size();i+=2) {
HighVariable *high1 = mergePairs[i];
HighVariable *high2 = mergePairs[i+1];

View file

@ -61,6 +61,7 @@ public:
int4 getSize(void) const { return size; } ///< Get the number of bytes \b this group covers
void setSymbolOffset(int4 val) { symbolOffset = val; } ///< Cache the symbol offset for the group
int4 getSymbolOffset(void) const { return symbolOffset; } ///< Get offset of \b this group within its Symbol
void combineGroups(VariableGroup *op2); ///< Combine given VariableGroup into \b this
};
/// \brief Information about how a HighVariable fits into a larger group or Symbol
@ -91,7 +92,7 @@ public:
void updateCover(void) const; ///< Calculate extended cover based on intersections
void transferGroup(VariableGroup *newGroup); ///< Transfer \b this piece to another VariableGroup
void setHigh(HighVariable *newHigh) { high = newHigh; } ///< Move ownership of \b this to another HighVariable
void combineOtherGroup(VariablePiece *op2,vector<HighVariable *> &mergePairs); ///< Combine two VariableGroups
void mergeGroups(VariablePiece *op2,vector<HighVariable *> &mergePairs); ///< Combine two VariableGroups
};
class HighIntersectTest;

View file

@ -89,7 +89,7 @@ bool RangeHint::contain(const RangeHint *b) const
/// Otherwise data-type ordering is used.
/// \param b is the other given range
/// \param reconcile is \b true is the two ranges have \e reconciled data-types
/// \return \b true if the \b this ranges's data-type is preferred
/// \return \b true if \b this ranges's data-type is preferred
bool RangeHint::preferred(const RangeHint *b,bool reconcile) const
{
@ -104,16 +104,16 @@ bool RangeHint::preferred(const RangeHint *b,bool reconcile) const
return true;
if (!reconcile) { // If the ranges don't reconcile
if ((rangeType == RangeHint::open)&&(b->rangeType != RangeHint::open)) // Throw out the open range
if (rangeType == open && b->rangeType != open) // Throw out the open range
return false;
if ((b->rangeType == RangeHint::open)&&(rangeType != RangeHint::open))
if (b->rangeType == open && rangeType != open)
return true;
}
return (0>type->typeOrder(*b->type)); // Prefer the more specific
}
/// If \b this RangeHint is an array and the following details line up, adjust \b this
/// If \b this RangeHint is an array and the following RangeHint line up, adjust \b this
/// so that it \e absorbs the other given RangeHint and return \b true.
/// The second RangeHint:
/// - must have the same element size
@ -124,12 +124,12 @@ bool RangeHint::preferred(const RangeHint *b,bool reconcile) const
///
/// \param b is the other RangeHint to absorb
/// \return \b true if the other RangeHint was successfully absorbed
bool RangeHint::absorb(RangeHint *b)
bool RangeHint::attemptJoin(RangeHint *b)
{
if (rangeType != RangeHint::open) return false;
if (rangeType != open) return false;
if (highind < 0) return false;
if (b->rangeType == RangeHint::endpoint) return false; // Don't merge with bounding range
if (b->rangeType == endpoint) return false; // Don't merge with bounding range
Datatype *settype = type; // Assume we will keep this data-type
if (settype->getSize() != b->type->getSize()) return false;
if (settype != b->type) {
@ -160,14 +160,29 @@ bool RangeHint::absorb(RangeHint *b)
diffsz /= settype->getSize();
if (diffsz > highind) return false;
type = settype;
if (b->rangeType == RangeHint::open && (0 <= b->highind)) { // If b has array indexing
int4 trialhi = b->highind + diffsz;
if (highind < trialhi)
highind = trialhi;
}
absorb(b);
return true;
}
/// Absorb details of the other RangeHint into \b this, except for the data-type. Inherit an \e open range
/// type and any indexing information. The data-type for \b this is assumed to be compatible and preferred
/// over the other data-type and is not changed.
/// \param b is the other RangeHint to absorb
void RangeHint::absorb(RangeHint *b)
{
if (b->rangeType == open && type->getSize() == b->type->getSize()) {
rangeType = open;
if (0 <= b->highind) { // If b has array indexing
intb diffsz = b->sstart - sstart;
diffsz /= type->getSize();
int4 trialhi = b->highind + diffsz;
if (highind < trialhi)
highind = trialhi;
}
}
}
/// Given that \b this and the other RangeHint intersect, redefine \b this so that it
/// becomes the union of the two original ranges. The union must succeed in some form.
/// An attempt is made to preserve the data-type information of both the original ranges,
@ -180,84 +195,60 @@ bool RangeHint::absorb(RangeHint *b)
bool RangeHint::merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory)
{
uintb aend,bend;
uintb end;
Datatype *resType;
uint4 resFlags;
bool didReconcile;
int4 resHighIndex;
bool overlapProblems = false;
aend = space->wrapOffset(start+size);
bend = space->wrapOffset(b->start+b->size);
RangeHint::RangeType resRangeType = RangeHint::fixed;
resHighIndex = -1;
if ((aend==0)||(bend==0))
end = 0;
else
end = (aend > bend) ? aend : bend;
int4 resType; // 0=this, 1=b, 2=confuse
if (contain(b)) { // Does one range contain the other
didReconcile = reconcile(b); // Can the data-type layout be reconciled
if (preferred(b,didReconcile)) { // If a's data-type is preferred over b
resType = type;
resFlags = flags;
resRangeType = rangeType;
resHighIndex = highind;
}
else {
resType = b->type;
resFlags = b->flags;
resRangeType = b->rangeType;
resHighIndex = b->highind;
}
if ((start==b->start)&&(size==b->size)) {
resRangeType = (rangeType==RangeHint::open || b->rangeType==RangeHint::open) ? RangeHint::open : RangeHint::fixed;
if (resRangeType == RangeHint::open)
resHighIndex = (highind < b->highind) ? b->highind : highind;
}
if (!didReconcile) { // See if two types match up
if ((b->rangeType != RangeHint::open)&&(rangeType != RangeHint::open))
overlapProblems = true;
}
if (!didReconcile && start != b->start)
resType = 2;
else
resType = preferred(b,didReconcile) ? 0 : 1;
}
else {
didReconcile = false;
resType = (Datatype *)0; // Unable to resolve the type
resFlags = 0;
resType = ((flags & Varnode::typelock) != 0) ? 0 : 2;
}
// Check for really problematic cases
if (!didReconcile) {
if ((b->flags & Varnode::typelock)!=0) {
if ((flags & Varnode::typelock)!=0)
if ((flags & Varnode::typelock)!=0) {
if ((b->flags & Varnode::typelock)!=0)
throw LowlevelError("Overlapping forced variable types : " + type->getName() + " " + b->type->getName());
if (start != b->start)
return false; // Discard b entirely
}
}
if (resType == (Datatype *)0) // If all else fails
resType = typeFactory->getBase(1,TYPE_UNKNOWN); // Do unknown array (size 1)
type = resType;
flags = resFlags;
rangeType = resRangeType;
highind = resHighIndex;
if ((!didReconcile)&&(start != b->start)) { // Truncation is forced
if ((flags & Varnode::typelock)!=0) { // If a is locked
return overlapProblems; // Discard b entirely in favor of a
}
// Concede confusion about types, set unknown type rather than a or b's type
rangeType = RangeHint::fixed;
size = space->wrapOffset(end-start);
if (resType == 0) {
if (didReconcile)
absorb(b);
}
else if (resType == 1) {
RangeHint copyRange = *this;
type = b->type;
flags = b->flags;
rangeType = b->rangeType;
highind = b->highind;
size = b->size;
absorb(&copyRange);
}
else if (resType == 2) {
// Concede confusion about types, set unknown type rather than this or b's type
flags = 0;
rangeType = fixed;
int4 diff = (int4)(b->sstart - sstart);
if (diff + b->size > size)
size = diff + b->size;
if (size != 1 && size != 2 && size != 4 && size != 8) {
size = 1;
rangeType = RangeHint::open;
rangeType = open;
}
type = typeFactory->getBase(size,TYPE_UNKNOWN);
flags = 0;
highind = -1;
return overlapProblems;
return false;
}
size = resType->getSize();
return overlapProblems;
return false;
}
/// Compare (signed) offset, size, RangeType, type lock, and high index, in that order.
@ -1146,7 +1137,7 @@ bool ScopeLocal::restructure(MapState &state)
overlapProblems = true;
}
else {
if (!cur.absorb(next)) {
if (!cur.attemptJoin(next)) {
if (cur.rangeType == RangeHint::open)
cur.size = next->sstart-cur.sstart;
if (adjustFit(cur))

View file

@ -112,7 +112,8 @@ public:
bool reconcile(const RangeHint *b) const;
bool contain(const RangeHint *b) const;
bool preferred(const RangeHint *b,bool reconcile) const;
bool absorb(RangeHint *b); ///< Try to absorb the other RangeHint into \b this
bool attemptJoin(RangeHint *b); ///< Try to concatenate another RangeHint onto \b this
void absorb(RangeHint *b); ///< Absorb the other RangeHint into \b this
bool merge(RangeHint *b,AddrSpace *space,TypeFactory *typeFactory); ///< Try to form the union of \b this with another RangeHint
int4 compare(const RangeHint &op2) const; ///< Order \b this with another RangeHint
static bool compareRanges(const RangeHint *a,const RangeHint *b) { return (a->compare(*b) < 0); } ///< Compare two RangeHint pointers