Search for arrays through nested structures

This commit is contained in:
caheckman 2020-05-04 11:34:05 -04:00
parent 5332624de8
commit 14d7bd2b1f
5 changed files with 302 additions and 17 deletions

View file

@ -5639,6 +5639,93 @@ AddTreeState::AddTreeState(Funcdata &d,PcodeOp *op,int4 slot)
distributeOp = (PcodeOp *)0;
}
/// Even if the current base data-type is not an array, the pointer expression may incorporate
/// an array access for a sub component. This manifests as a non-constant non-multiple terms in
/// the tree. If this term is itself defined by a CPUI_INT_MULT with a constant, the constant
/// indicates a likely element size. Return a non-zero value, the likely element size, if there
/// is evidence of a non-constant non-multiple term. Return zero otherwise.
/// \return a non-zero value indicating likely element size, or zero
uint4 AddTreeState::findArrayHint(void) const
{
uint4 res = 0;
for(int4 i=0;i<nonmult.size();++i) {
Varnode *vn = nonmult[i];
if (vn->isConstant()) continue;
uint4 vncoeff = 1;
if (vn->isWritten()) {
PcodeOp *op = vn->getDef();
if (op->code() == CPUI_INT_MULT) {
Varnode *vnconst = op->getIn(1);
if (vnconst->isConstant()) {
intb sval = vnconst->getOffset();
sign_extend(sval,vnconst->getSize()*8-1);
vncoeff = (sval < 0) ? (uint4)-sval : (uint4)sval;
}
}
}
if (vncoeff > res)
res = vncoeff;
}
return res;
}
/// \brief Given an offset into the base data-type and array hints find sub-component being referenced
///
/// An explicit offset should target a specific sub data-type,
/// but array indexing may confuse things. This method passes
/// back the offset of the best matching component, searching among components
/// that are \e nearby the given offset, preferring a matching array element size
/// and a component start that is nearer to the offset.
/// \param off is the given offset into the data-type
/// \param arrayHint if non-zero indicates array access, where the value is the element size
/// \param newoff is used to pass back the actual offset of the selected component
/// \return \b true if a good component match was found
bool AddTreeState::hasMatchingSubType(uintb off,uint4 arrayHint,uintb *newoff) const
{
if (arrayHint == 0)
return (baseType->getSubType(off,newoff) != (Datatype *)0);
int4 elSizeBefore;
uintb offBefore;
Datatype *typeBefore = baseType->nearestArrayedComponentBackward(off, &offBefore, &elSizeBefore);
if (typeBefore != (Datatype *)0) {
if (arrayHint == 1 || elSizeBefore == arrayHint) {
int4 sizeAddr = AddrSpace::byteToAddressInt(typeBefore->getSize(),ct->getWordSize());
if (offBefore < sizeAddr) {
// If the offset is \e inside a component with a compatible array, return it.
*newoff = offBefore;
return true;
}
}
}
int4 elSizeAfter;
uintb offAfter;
Datatype *typeAfter = baseType->nearestArrayedComponentForward(off, &offAfter, &elSizeAfter);
if (typeBefore == (Datatype *)0 && typeAfter == (Datatype *)0)
return (baseType->getSubType(off,newoff) != (Datatype *)0);
if (typeBefore == (Datatype *)0) {
*newoff = offAfter;
return true;
}
if (typeAfter == (Datatype *)0) {
*newoff = offBefore;
return true;
}
uintb distBefore = offBefore;
uintb distAfter = -offAfter;
if (arrayHint != 1) {
if (elSizeBefore != arrayHint)
distBefore += 0x1000;
if (elSizeAfter != arrayHint)
distAfter += 0x1000;
}
*newoff = (distAfter < distBefore) ? offAfter : offBefore;
return true;
}
/// Examine a CPUI_INT_MULT element in the middle of the add tree. Determine if we treat
/// the output simply as a leaf, or if the multiply needs to be distributed to an
/// additive subtree. If the Varnode is a leaf of the tree, return \b true if
@ -5774,13 +5861,27 @@ void AddTreeState::calcSubtype(void)
{
nonmultsum &= ptrmask; // Make sure we are modulo ptr's space
multsum &= ptrmask;
if (size == 0)
if (size == 0 || nonmultsum < size)
offset = nonmultsum;
else {
// For a sum that falls completely outside the data-type, there is presumably some
// type of constant term added to an array index either at the current level or lower.
// If we knew here whether an array of the baseType was possible we could make a slightly
// better decision.
intb snonmult = (intb)nonmultsum;
sign_extend(snonmult,ptrsize*8-1);
snonmult = snonmult % size;
offset = (snonmult < 0) ? (uintb)(snonmult + size) : (uintb)snonmult;
if (snonmult >= 0)
// We assume the sum is big enough it represents an array index at this level
offset = (uintb)snonmult;
else {
// For a negative sum, if the baseType is a structure and there is array hints,
// we assume the sum is an array index at a lower level
if (baseType->getMetatype() == TYPE_STRUCT && findArrayHint() != 0)
offset = nonmultsum;
else
offset = (uintb)(snonmult + size);
}
}
correct = nonmultsum - offset;
nonmultsum = offset;
@ -5795,8 +5896,9 @@ void AddTreeState::calcSubtype(void)
else if (baseType->getMetatype() == TYPE_SPACEBASE) {
uintb nonmultbytes = AddrSpace::addressToByte(nonmultsum,ct->getWordSize()); // Convert to bytes
uintb extra;
uint4 arrayHint = findArrayHint();
// Get offset into mapped variable
if (baseType->getSubType(nonmultbytes, &extra) == (Datatype*)0) {
if (!hasMatchingSubType(nonmultbytes, arrayHint, &extra)) {
valid = false; // Cannot find mapped variable but nonmult is non-empty
return;
}
@ -5807,9 +5909,10 @@ void AddTreeState::calcSubtype(void)
else if (baseType->getMetatype() == TYPE_STRUCT) {
uintb nonmultbytes = AddrSpace::addressToByte(nonmultsum,ct->getWordSize()); // Convert to bytes
uintb extra;
uint4 arrayHint = findArrayHint();
// Get offset into field in structure
if (baseType->getSubType(nonmultbytes, &extra) == (Datatype*) 0) {
if (nonmultbytes >= size) {
if (!hasMatchingSubType(nonmultbytes, arrayHint, &extra)) {
if (nonmultbytes >= baseType->getSize()) { // Compare as bytes! not address units
valid = false; // Out of structure's bounds
return;
}

View file

@ -46,7 +46,7 @@ class AddTreeState {
const TypePointer *ct; ///< The pointer data-type
const Datatype *baseType; ///< The base data-type being pointed at
int4 ptrsize; ///< Size of the pointer
int4 size; ///< Size of data-type being pointed to
int4 size; ///< Size of data-type being pointed to (in address units)
uintb ptrmask; ///< Mask for modulo calculations in ptr space
uintb offset; ///< Number of bytes we dig into the base data-type
uintb correct; ///< Number of bytes being double counted
@ -60,6 +60,8 @@ class AddTreeState {
bool isDistributeUsed; ///< Are terms produced by distributing used
bool isSubtype; ///< Is there a sub-type (using CPUI_PTRSUB)
bool valid; ///< Set to \b true if the whole expression can be transformed
uint4 findArrayHint(void) const; ///< Look for evidence of an array in a sub-component
bool hasMatchingSubType(uintb off,uint4 arrayHint,uintb *newoff) const;
bool checkMultTerm(Varnode *vn,PcodeOp *op, uintb treeCoeff); ///< Accumulate details of INT_MULT term and continue traversal if appropriate
bool checkTerm(Varnode *vn, uintb treeCoeff); ///< Accumulate details of given term and continue tree traversal
bool spanAddTree(PcodeOp *op, uintb treeCoeff); ///< Walk the given sub-tree accumulating details

View file

@ -46,7 +46,7 @@ void print_data(ostream &s,uint1 *buffer,int4 size,const Address &baseaddr)
s << " ";
else
s << setfill('0') << setw(2) << hex << (uint4) buffer[start+i-addr] << ' ';
}
s << " ";
for(i=0;i<16;++i)
@ -104,7 +104,33 @@ Datatype *Datatype::getSubType(uintb off,uintb *newoff) const
return (Datatype *)0;
}
/// Compare \b this with another data-type.
/// Find the first component data-type after the given offset that is (or contains)
/// an array, and pass back the difference between the component's start and the given offset.
/// Return the component data-type or null if no array is found.
/// \param off is the given offset into \b this data-type
/// \param newoff is used to pass back the offset difference
/// \param elSize is used to pass back the array element size
/// \return the component data-type or null
Datatype *Datatype::nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const
{
return (TypeArray *)0;
}
/// Find the first component data-type before the given offset that is (or contains)
/// an array, and pass back the difference between the component's start and the given offset.
/// Return the component data-type or null if no array is found.
/// \param off is the given offset into \b this data-type
/// \param newoff is used to pass back the offset difference
/// \param elSize is used to pass back the array element size
/// \return the component data-type or null
Datatype *Datatype::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
{
return (TypeArray *)0;
}
// Compare \b this with another data-type.
/// 0 (equality) means the data-types are functionally equivalent (even if names differ)
/// Smaller types come earlier. More specific types come earlier.
/// \param op is the data-type to compare with \b this
@ -561,7 +587,7 @@ void TypeArray::saveXml(ostream &s) const
s << "<type";
saveXmlBasic(s);
a_v_i(s,"arraysize",arraysize);
s << '>';
s << '>';
arrayof->saveXmlRef(s);
s << "</type>";
}
@ -614,7 +640,7 @@ void TypeEnum::setNameMap(const map<uintb,string> &nmap)
fieldisempty = true;
while(curmask != lastmask) { // Repeat until there is no change in the current mask
lastmask = curmask; // Note changes from last time through
for(iter=namemap.begin();iter!=namemap.end();++iter) { // For every named enumeration value
uintb val = (*iter).first;
if ((val & curmask) != 0) { // If the value shares ANY bits in common with the current mask
@ -628,7 +654,7 @@ void TypeEnum::setNameMap(const map<uintb,string> &nmap)
int4 msb = mostsigbit_set(curmask);
if (msb > curmaxbit)
curmaxbit = msb;
uintb mask1 = 1;
mask1 = (mask1 << lsb) - 1; // every bit below lsb is set to 1
uintb mask2 = 1;
@ -791,7 +817,7 @@ void TypeStruct::setFields(const vector<TypeField> &fd)
/// \return the index into the field list or -1
int4 TypeStruct::getFieldIter(int4 off) const
{ // Find subfield of given offset
{
int4 min = 0;
int4 max = field.size()-1;
@ -809,6 +835,30 @@ int4 TypeStruct::getFieldIter(int4 off) const
return -1;
}
/// The field returned may or may not contain the offset. If there are no fields
/// that occur earlier than the offset, return -1.
/// \param off is the given offset
/// \return the index of the nearest field or -1
int4 TypeStruct::getLowerBoundField(int4 off) const
{
if (field.empty()) return -1;
int4 min = 0;
int4 max = field.size()-1;
while(min < max) {
int4 mid = (min + max + 1)/2;
if (field[mid].offset > off)
max = mid - 1;
else { // curfield.offset <= off
min = mid;
}
}
if (min == max && field[min].offset <= off)
return min;
return -1;
}
/// Given a byte range within \b this data-type, determine the field it is contained in
/// and pass back the renormalized offset.
/// \param off is the byte offset into \b this
@ -835,7 +885,7 @@ Datatype *TypeStruct::getSubType(uintb off,uintb *newoff) const
{ // Go down one level to field that contains offset
int4 i;
i = getFieldIter(off);
if (i < 0) return Datatype::getSubType(off,newoff);
const TypeField &curfield( field[i] );
@ -843,6 +893,61 @@ Datatype *TypeStruct::getSubType(uintb off,uintb *newoff) const
return curfield.type;
}
Datatype *TypeStruct::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
{
int4 i = getLowerBoundField(off);
while(i >= 0) {
const TypeField &subfield( field[i] );
int4 diff = (int4)off - subfield.offset;
if (diff > 128) break;
Datatype *subtype = subfield.type;
if (subtype->getMetatype() == TYPE_ARRAY) {
*newoff = (intb)diff;
*elSize = ((TypeArray *)subtype)->getBase()->getSize();
return subtype;
}
else {
uintb suboff;
Datatype *res = subtype->nearestArrayedComponentBackward(subtype->getSize(), &suboff, elSize);
if (res != (Datatype *)0) {
*newoff = (intb)diff;
return subtype;
}
}
i -= 1;
}
return (Datatype *)0;
}
Datatype *TypeStruct::nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const
{
int4 i = getLowerBoundField(off);
i += 1;
while(i<field.size()) {
const TypeField &subfield( field[i] );
int4 diff = subfield.offset - off;
if (diff > 128) break;
Datatype *subtype = subfield.type;
if (subtype->getMetatype() == TYPE_ARRAY) {
*newoff = (intb)-diff;
*elSize = ((TypeArray *)subtype)->getBase()->getSize();
return subtype;
}
else {
uintb suboff;
Datatype *res = subtype->nearestArrayedComponentForward(0, &suboff, elSize);
if (res != (Datatype *)0) {
*newoff = (intb)-diff;
return subtype;
}
}
i += 1;
}
return (Datatype *)0;
}
int4 TypeStruct::compare(const Datatype &op,int4 level) const
{
if (size != op.getSize()) return (op.getSize()-size);
@ -1186,7 +1291,7 @@ Datatype *TypeSpacebase::getSubType(uintb off,uintb *newoff) const
// Assume symbol being referenced is address tied so we use a null point of context
// FIXME: A valid point of context may be necessary in the future
smallest = scope->queryContainer(addr,1,nullPoint);
if (smallest == (SymbolEntry *)0) {
*newoff = 0;
return glb->types->getBase(1,TYPE_UNKNOWN);
@ -1195,6 +1300,74 @@ Datatype *TypeSpacebase::getSubType(uintb off,uintb *newoff) const
return smallest->getSymbol()->getType();
}
Datatype *TypeSpacebase::nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const
{
Scope *scope = getMap();
off = AddrSpace::byteToAddress(off, spaceid->getWordSize()); // Convert from byte offset to address unit
// It should always be the case that the given offset represents a full encoding of the
// pointer, so the point of context is unused and the size is given as -1
Address nullPoint;
uintb fullEncoding;
Address addr = glb->resolveConstant(spaceid, off, -1, nullPoint, fullEncoding);
SymbolEntry *smallest = scope->queryContainer(addr,1,nullPoint);
Address nextAddr;
Datatype *symbolType;
if (smallest == (SymbolEntry *)0 || smallest->getOffset() != 0)
nextAddr = addr + 32;
else {
symbolType = smallest->getSymbol()->getType();
if (symbolType->getMetatype() == TYPE_STRUCT) {
uintb structOff = addr.getOffset() - smallest->getAddr().getOffset();
uintb dummyOff;
Datatype *res = symbolType->nearestArrayedComponentForward(structOff, &dummyOff, elSize);
if (res != (Datatype *)0) {
*newoff = structOff;
return symbolType;
}
}
int4 size = AddrSpace::byteToAddressInt(smallest->getSize(), spaceid->getWordSize());
nextAddr = smallest->getAddr() + size;
}
if (nextAddr < addr)
return (Datatype *)0; // Don't let the address wrap
smallest = scope->queryContainer(nextAddr,1,nullPoint);
if (smallest == (SymbolEntry *)0 || smallest->getOffset() != 0)
return (Datatype *)0;
symbolType = smallest->getSymbol()->getType();
*newoff = addr.getOffset() - smallest->getAddr().getOffset();
if (symbolType->getMetatype() == TYPE_ARRAY) {
*elSize = ((TypeArray *)symbolType)->getBase()->getSize();
return symbolType;
}
if (symbolType->getMetatype() == TYPE_STRUCT) {
uintb dummyOff;
Datatype *res = symbolType->nearestArrayedComponentForward(0, &dummyOff, elSize);
if (res != (Datatype *)0)
return symbolType;
}
return (Datatype *)0;
}
Datatype *TypeSpacebase::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
{
Datatype *subType = getSubType(off, newoff);
if (subType == (Datatype *)0)
return (Datatype *)0;
if (subType->getMetatype() == TYPE_ARRAY) {
*elSize = ((TypeArray *)subType)->getBase()->getSize();
return subType;
}
if (subType->getMetatype() == TYPE_STRUCT) {
uintb dummyOff;
Datatype *res = subType->nearestArrayedComponentBackward(*newoff,&dummyOff,elSize);
if (res != (Datatype *)0)
return subType;
}
return (Datatype *)0;
}
int4 TypeSpacebase::compare(const Datatype &op,int4 level) const
{
@ -1478,7 +1651,7 @@ Datatype *TypeFactory::findAdd(Datatype &ct)
{
Datatype *newtype,*res;
if (ct.name.size()!=0) { // If there is a name
if (ct.id == 0) // There must be an id
throw LowlevelError("Datatype must have a valid id");
@ -1509,7 +1682,7 @@ Datatype *TypeFactory::findAdd(Datatype &ct)
nametree.insert(newtype);
return newtype;
}
/// This routine renames a Datatype object and fixes up cross-referencing
/// \param ct is the data-type to rename
/// \param n is the new name

View file

@ -113,6 +113,8 @@ public:
const string &getName(void) const { return name; } ///< Get the type name
virtual void printRaw(ostream &s) const; ///< Print a description of the type to stream
virtual Datatype *getSubType(uintb off,uintb *newoff) const; ///< Recover component data-type one-level down
virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
virtual int4 numDepend(void) const { return 0; } ///< Return number of component sub-types
virtual Datatype *getDepend(int4 index) const { return (Datatype *)0; } ///< Return the i-th component sub-type
virtual void printNameBase(ostream &s) const { if (!name.empty()) s<<name[0]; } ///< Print name as short prefix
@ -310,6 +312,7 @@ protected:
vector<TypeField> field; ///< The list of fields
void setFields(const vector<TypeField> &fd); ///< Establish fields for \b this
int4 getFieldIter(int4 off) const; ///< Get index into field list
int4 getLowerBoundField(int4 off) const; ///< Get index of last field before or equal to given offset
virtual void restoreXml(const Element *el,TypeFactory &typegrp);
public:
TypeStruct(const TypeStruct &op); ///< Construct from another TypeStruct
@ -318,6 +321,8 @@ public:
vector<TypeField>::const_iterator endField(void) const { return field.end(); } ///< End of fields
const TypeField *getField(int4 off,int4 sz,int4 *newoff) const; ///< Get field based on offset
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
virtual int4 numDepend(void) const { return field.size(); }
virtual Datatype *getDepend(int4 index) const { return field[index].type; }
virtual int4 compare(const Datatype &op,int4 level) const; // For tree structure
@ -376,6 +381,8 @@ public:
Scope *getMap(void) const; ///< Get the symbol table indexed by \b this
Address getAddress(uintb off,int4 sz,const Address &point) const; ///< Construct an Address given an offset
virtual Datatype *getSubType(uintb off,uintb *newoff) const;
virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
virtual int4 compare(const Datatype &op,int4 level) const;
virtual int4 compareDependency(const Datatype &op) const; // For tree structure
virtual Datatype *clone(void) const { return new TypeSpacebase(*this); }

View file

@ -119,7 +119,7 @@ public class SpecifyCPrototypeAction extends AbstractDecompilerAction {
ParameterDefinition[] args = new ParameterDefinitionImpl[np];
for (int i = 0; i < np; i++) {
HighParam parm = hf.getLocalSymbolMap().getParam(i);
HighSymbol parm = hf.getLocalSymbolMap().getParamSymbol(i);
args[i] = new ParameterDefinitionImpl(parm.getName(), parm.getDataType(), null);
}
fsig.setArguments(args);