diff --git a/Ghidra/Features/Decompiler/certification.manifest b/Ghidra/Features/Decompiler/certification.manifest index 1d94dfc743..1d9e67040b 100644 --- a/Ghidra/Features/Decompiler/certification.manifest +++ b/Ghidra/Features/Decompiler/certification.manifest @@ -35,6 +35,7 @@ src/decompile/datatests/noforloop_globcall.xml||GHIDRA||||END| src/decompile/datatests/noforloop_iterused.xml||GHIDRA||||END| src/decompile/datatests/offsetarray.xml||GHIDRA||||END| src/decompile/datatests/packstructaccess.xml||GHIDRA||||END| +src/decompile/datatests/partialunion.xml||GHIDRA||||END| src/decompile/datatests/pointercmp.xml||GHIDRA||||END| src/decompile/datatests/pointerrel.xml||GHIDRA||||END| src/decompile/datatests/pointersub.xml||GHIDRA||||END| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index e61204390b..54a3cefde9 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -2334,6 +2334,8 @@ int4 ActionSetCasts::resolveUnion(PcodeOp *op,int4 slot,Funcdata &data) Datatype *dt = vn->getHigh()->getType(); if (!dt->needsResolution()) return 0; + if (dt != vn->getType()) + dt->resolveInFlow(op, slot); // Last chance to resolve data-type based on flow const ResolvedUnion *resUnion = data.getUnionField(dt, op,slot); if (resUnion != (ResolvedUnion*)0 && resUnion->getFieldNum() >= 0) { // Insert specific placeholder indicating which field is accessed @@ -2383,8 +2385,12 @@ int4 ActionSetCasts::castOutput(PcodeOp *op,Funcdata &data,CastStrategy *castStr // Short circuit more sophisticated casting tests. If they are the same type, there is no cast return 0; } - if (outHighType->needsResolution()) - outHighType = outHighType->findResolve(op, -1); // Finish fetching DefFacing data-type + Datatype *outHighResolve = outHighType; + if (outHighType->needsResolution()) { + if (outHighType != outvn->getType()) + outHighType->resolveInFlow(op, -1); // Last chance to resolve data-type based on flow + outHighResolve = outHighType->findResolve(op, -1); // Finish fetching DefFacing data-type + } if (outvn->isImplied()) { // implied varnode must have parse type if (outvn->isTypeLock()) { @@ -2392,25 +2398,25 @@ int4 ActionSetCasts::castOutput(PcodeOp *op,Funcdata &data,CastStrategy *castStr // The Varnode input to a CPUI_RETURN is marked as implied but // casting should act as if it were explicit if (outOp == (PcodeOp *)0 || outOp->code() != CPUI_RETURN) { - force = !isOpIdentical(outHighType, tokenct); + force = !isOpIdentical(outHighResolve, tokenct); } } - else if (outHighType->getMetatype() != TYPE_PTR) { // If implied varnode has an atomic (non-pointer) type + else if (outHighResolve->getMetatype() != TYPE_PTR) { // If implied varnode has an atomic (non-pointer) type outvn->updateType(tokenct,false,false); // Ignore it in favor of the token type - outHighType = outvn->getHighTypeDefFacing(); + outHighResolve = outvn->getHighTypeDefFacing(); } else if (tokenct->getMetatype() == TYPE_PTR) { // If the token is a pointer AND implied varnode is pointer - outct = ((TypePointer *)outHighType)->getPtrTo(); + outct = ((TypePointer *)outHighResolve)->getPtrTo(); type_metatype meta = outct->getMetatype(); // Preserve implied pointer if it points to a composite if ((meta!=TYPE_ARRAY)&&(meta!=TYPE_STRUCT)&&(meta!=TYPE_UNION)) { outvn->updateType(tokenct,false,false); // Otherwise ignore it in favor of the token type - outHighType = outvn->getHighTypeDefFacing(); + outHighResolve = outvn->getHighTypeDefFacing(); } } } if (!force) { - outct = outHighType; // Type of result + outct = outHighResolve; // Type of result ct = castStrategy->castStandard(outct,tokenct,false,true); if (ct == (Datatype *)0) return 0; } @@ -2427,8 +2433,10 @@ int4 ActionSetCasts::castOutput(PcodeOp *op,Funcdata &data,CastStrategy *castStr data.opSetInput(newop,vn,0); data.opSetOutput(op,vn); data.opInsertAfter(newop,op); // Cast comes AFTER this operation + if (tokenct->needsResolution()) + data.forceFacingType(tokenct, -1, newop, 0); if (outHighType->needsResolution()) - data.forceFacingType(outHighType, -1, newop, -1); + data.inheritWriteResolution(outHighType, newop, op); return 1; } @@ -4483,6 +4491,12 @@ bool ActionInferTypes::propagateTypeEdge(TypeFactory *typegrp,PcodeOp *op,int4 i { Varnode *invn,*outvn; + invn = (inslot==-1) ? op->getOut() : op->getIn(inslot); + Datatype *alttype = invn->getTempType(); + if (alttype->needsResolution()) { + // Always give incoming data-type a chance to resolve, even if it would not otherwise propagate + alttype = alttype->resolveInFlow(op, inslot); + } if (inslot == outslot) return false; // don't backtrack if (outslot < 0) outvn = op->getOut(); @@ -4490,11 +4504,6 @@ bool ActionInferTypes::propagateTypeEdge(TypeFactory *typegrp,PcodeOp *op,int4 i outvn = op->getIn(outslot); if (outvn->isAnnotation()) return false; } - invn = (inslot==-1) ? op->getOut() : op->getIn(inslot); - Datatype *alttype = invn->getTempType(); - if (alttype->needsResolution()) { - alttype = alttype->resolveInFlow(op, inslot); - } if (outvn->isTypeLock()) return false; // Can't propagate through typelock if (outvn->stopsUpPropagation() && outslot >= 0) return false; // Propagation is blocked diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc index 28c7dbf36b..4704eb1d34 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc @@ -1525,6 +1525,8 @@ SymbolEntry *Scope::addSymbol(const string &nm,Datatype *ct, { Symbol *sym; + if (ct->hasStripped()) + ct = ct->getStripped(); sym = new Symbol(owner,nm,ct); addSymbolInternal(sym); return addMapPoint(sym,addr,usepoint); @@ -1713,6 +1715,29 @@ Symbol *Scope::addEquateSymbol(const string &nm,uint4 format,uintb value,const A return sym; } +/// \brief Create a symbol forcing a field interpretation for a specific access to a variable with \e union data-type +/// +/// The symbol is attached to a specific Varnode and a PcodeOp that reads or writes to it. The Varnode, +/// in the context of the PcodeOp, is forced to have the data-type of the selected field, and field's name is used +/// to represent the Varnode in output. +/// \param nm is the name of the symbol +/// \param dt is the union data-type containing the field to force +/// \param fieldNum is the index of the desired field, or -1 if the whole union should be forced +/// \param addr is the address of the p-code op reading/writing the Varnode +/// \param hash is the dynamic hash identifying the Varnode +/// \return the new UnionFacetSymbol +Symbol *Scope::addUnionFacetSymbol(const string &nm,Datatype *dt,int4 fieldNum,const Address &addr,uint8 hash) + +{ + Symbol *sym = new UnionFacetSymbol(owner,nm,dt,fieldNum); + addSymbolInternal(sym); + RangeList rnglist; + if (!addr.isInvalid()) + rnglist.insertRange(addr.getSpace(),addr.getOffset(),addr.getOffset()); + addDynamicMapInternal(sym,Varnode::mapped,hash,0,1,rnglist); + return sym; +} + /// Create default name given information in the Symbol and possibly a representative Varnode. /// This method extracts the crucial properties and then uses the buildVariableName method to /// construct the actual name. @@ -2129,6 +2154,8 @@ void ScopeInternal::renameSymbol(Symbol *sym,const string &newname) void ScopeInternal::retypeSymbol(Symbol *sym,Datatype *ct) { + if (ct->hasStripped()) + ct = ct->getStripped(); if ((sym->type->getSize() == ct->getSize())||(sym->mapentry.empty())) { // If size is the same, or no mappings nothing special to do sym->type = ct; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh index df0dfdec00..f2ea83a151 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh @@ -762,6 +762,7 @@ public: LabSymbol *addCodeLabel(const Address &addr,const string &nm); Symbol *addDynamicSymbol(const string &nm,Datatype *ct,const Address &caddr,uint8 hash); Symbol *addEquateSymbol(const string &nm,uint4 format,uintb value,const Address &addr,uint8 hash); + Symbol *addUnionFacetSymbol(const string &nm,Datatype *dt,int4 fieldNum,const Address &addr,uint8 hash); string buildDefaultName(Symbol *sym,int4 &base,Varnode *vn) const; ///< Create a default name for the given Symbol bool isReadOnly(const Address &addr,int4 size,const Address &usepoint) const; void printBounds(ostream &s) const { rangetree.printBounds(s); } ///< Print a description of \b this Scope's \e owned memory ranges diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index e6b282ac74..6ec15c4863 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -858,50 +858,31 @@ bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool typesyes) return updateoccurred; } -/// If the Varnode is a partial Symbol with \e union data-type, the best description of the Varnode's -/// data-type is delayed until data-type propagation is started. -/// We attempt to resolve this description and also lay down any facing resolutions for the Varnode +/// If the Varnode is a partial of a Symbol with a \e union data-type component, we assign +/// a partial union data-type (TypePartialUnion) to the Varnode, so that facing resolutions +/// can be provided. /// \param vn is the given Varnode -/// \return the best data-type or null +/// \return the partial data-type or null Datatype *Funcdata::checkSymbolType(Varnode *vn) { if (vn->isTypeLock()) return vn->getType(); SymbolEntry *entry = vn->getSymbolEntry(); Symbol *sym = entry->getSymbol(); - if (sym->getType()->getMetatype() != TYPE_UNION) + Datatype *curType = sym->getType(); + if (curType->getSize() == vn->getSize()) return (Datatype *)0; - TypeUnion *unionType = (TypeUnion *)sym->getType(); - int4 off = (int4)(vn->getOffset() - entry->getAddr().getOffset()) + entry->getOffset(); - if (off == 0 && unionType->getSize() == vn->getSize()) + int4 curOff = (vn->getAddr().getOffset() - entry->getAddr().getOffset()) + entry->getOffset(); + // Drill down until we hit something that isn't a containing structure + while(curType != (Datatype *)0 && curType->getMetatype() == TYPE_STRUCT && curType->getSize() > vn->getSize()) { + uintb newOff; + curType = curType->getSubType(curOff, &newOff); + curOff = newOff; + } + if (curType == (Datatype *)0 || curType->getSize() <= vn->getSize() || curType->getMetatype() != TYPE_UNION) return (Datatype *)0; - const TypeField *finalField = (const TypeField *)0; - uintb finalOff = 0; - list::const_iterator iter; - for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { - PcodeOp *op = *iter; - const TypeField *field = unionType->resolveTruncation(off, op, op->getSlot(vn),off); - if (field != (const TypeField *)0) { - finalField = field; - finalOff = off; - } - } - if (vn->isWritten()) { - const TypeField *field = unionType->resolveTruncation(off, vn->getDef(), -1, off); - if (field != (const TypeField *)0) { - finalField = field; - finalOff = off; - } - } - if (finalField != (const TypeField *)0) { // If any use of the Varnode resolves to a specific field - // Try to truncate down to a final data-type to assign to the Varnode - Datatype *ct = finalField->type; - while(ct != (Datatype *)0 && (finalOff != 0 || ct->getSize() != vn->getSize())) { - ct = ct->getSubType(finalOff, &finalOff); - } - return ct; - } - return (Datatype *)0; + // If we hit a containing union + return glb->types->getTypePartialUnion((TypeUnion *)curType, curOff, vn->getSize()); } /// A Varnode overlaps the given SymbolEntry. Make sure the Varnode is part of the variable diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc index 9c73542903..b8d8afee03 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.cc @@ -57,6 +57,7 @@ void IfaceDecompCapability::registerCommands(IfaceStatus *status) status->registerCom(new IfcMapexternalref(),"map","externalref"); status->registerCom(new IfcMaplabel(),"map","label"); status->registerCom(new IfcMapconvert(),"map","convert"); + status->registerCom(new IfcMapunionfacet(), "map", "unionfacet"); status->registerCom(new IfcPrintdisasm(),"disassemble"); status->registerCom(new IfcDecompile(),"decompile"); status->registerCom(new IfcDump(),"dump"); @@ -711,6 +712,39 @@ void IfcMapconvert::execute(istream &s) dcp->fd->getScopeLocal()->addEquateSymbol("", format, value, addr, hash); } +/// \class IfcMapunionfacet +/// \brief Create a union field forcing directive: `map facet
` +/// +/// Creates a \e facet directive that associates a given field of a \e union data-type with +/// a varnode in the context of a specific p-code op accessing it. The varnode and p-code op +/// are specified by dynamic hash. +void IfcMapunionfacet::execute(istream &s) + +{ + Datatype *ct; + string unionName; + int4 fieldNum; + int4 size; + uint8 hash; + + if (dcp->fd == (Funcdata *)0) + throw IfaceExecutionError("No function loaded"); + s >> ws >> unionName; + ct = dcp->conf->types->findByName(unionName); + if (ct == (Datatype *)0 || ct->getMetatype() != TYPE_UNION) + throw IfaceParseError("Bad union data-type: " + unionName); + s >> ws >> dec >> fieldNum; + if (fieldNum < -1 || fieldNum >= ct->numDepend()) + throw IfaceParseError("Bad field index"); + Address addr = parse_machaddr(s,size,*dcp->conf->types); // Read pc address of hash + + s >> hex >> hash; // Parse the hash value + ostringstream s2; + s2 << "unionfacet" << dec << (fieldNum + 1) << '_' << hex << addr.getOffset(); + Symbol *sym = dcp->fd->getScopeLocal()->addUnionFacetSymbol(s2.str(), ct, fieldNum, addr, hash); + dcp->fd->getScopeLocal()->setAttribute(sym, Varnode::typelock | Varnode::namelock); +} + /// \class IfcPrintdisasm /// \brief Print disassembly of a memory range: `disassemble [ ]` /// diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh index 37264d20c3..8c3386c5f8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ifacedecomp.hh @@ -181,6 +181,11 @@ public: virtual void execute(istream &s); }; +class IfcMapunionfacet : public IfaceDecompCommand { +public: + virtual void execute(istream &s); +}; + class IfcPrintdisasm : public IfaceDecompCommand { public: virtual void execute(istream &s); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc index 903a567fd4..20fe5c18b0 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.cc @@ -345,23 +345,30 @@ void Merge::mergeByDatatype(VarnodeLocSet::const_iterator startiter,VarnodeLocSe /// output is created. /// \param inVn is the given input Varnode for the new COPY /// \param addr is the address associated with the new COPY +/// \param trimOp is an exemplar PcodeOp whose read is being trimmed /// \return the newly allocated COPY -PcodeOp *Merge::allocateCopyTrim(Varnode *inVn,const Address &addr) +PcodeOp *Merge::allocateCopyTrim(Varnode *inVn,const Address &addr,PcodeOp *trimOp) { PcodeOp *copyOp = data.newOp(1,addr); data.opSetOpcode(copyOp,CPUI_COPY); Datatype *ct = inVn->getType(); - Varnode *outVn = data.newUnique(inVn->getSize(),ct); - data.opSetOutput(copyOp,outVn); - data.opSetInput(copyOp,inVn,0); - copyTrims.push_back(copyOp); if (ct->needsResolution()) { // If the data-type needs resolution if (inVn->isWritten()) { int4 fieldNum = data.inheritWriteResolution(ct, copyOp, inVn->getDef()); data.forceFacingType(ct, fieldNum, copyOp, 0); } + else { + int4 slot = trimOp->getSlot(inVn); + const ResolvedUnion *resUnion = data.getUnionField(ct, trimOp, slot); + int4 fieldNum = (resUnion == (const ResolvedUnion *)0) ? -1 : resUnion->getFieldNum(); + data.forceFacingType(ct, fieldNum, copyOp, 0); + } } + Varnode *outVn = data.newUnique(inVn->getSize(),ct); + data.opSetOutput(copyOp,outVn); + data.opSetInput(copyOp,inVn,0); + copyTrims.push_back(copyOp); return copyOp; } @@ -397,7 +404,7 @@ void Merge::snipReads(Varnode *vn,list &markedop) else afterop = vn->getDef(); } - copyop = allocateCopyTrim(vn, pc); + copyop = allocateCopyTrim(vn, pc, markedop.front()); if (afterop == (PcodeOp *)0) data.opInsertBegin(copyop,bl); else @@ -565,8 +572,15 @@ void Merge::trimOpOutput(PcodeOp *op) else afterop = op; vn = op->getOut(); - uniq = data.newUnique(vn->getSize(),vn->getTypeDefFacing()); + Datatype *ct = vn->getType(); copyop = data.newOp(1,op->getAddr()); + if (ct->needsResolution()) { + int4 fieldNum = data.inheritWriteResolution(ct, copyop, op); + data.forceFacingType(ct, fieldNum, copyop, 0); + if (ct->getMetatype() == TYPE_PARTIALUNION) + ct = vn->getTypeDefFacing(); + } + uniq = data.newUnique(vn->getSize(),ct); data.opSetOutput(op,uniq); // Output of op is now stubby uniq data.opSetOpcode(copyop,CPUI_COPY); data.opSetOutput(copyop,vn); // Original output is bumped forward slightly @@ -596,7 +610,7 @@ void Merge::trimOpInput(PcodeOp *op,int4 slot) else pc = op->getAddr(); vn = op->getIn(slot); - copyop = allocateCopyTrim(vn, pc); + copyop = allocateCopyTrim(vn, pc, op); data.opSetInput(op,copyop->getOut(),slot); if (op->code() == CPUI_MULTIEQUAL) data.opInsertEnd(copyop,(BlockBasic *)op->getParent()->getIn(slot)); @@ -752,7 +766,7 @@ void Merge::snipIndirect(PcodeOp *indop) // an instance of the output high must // all intersect so the varnodes must all be // traceable via COPY to the same root - snipop = allocateCopyTrim(refvn, op->getAddr()); + snipop = allocateCopyTrim(refvn, op->getAddr(), correctable.front()); data.opInsertBefore(snipop,op); list::iterator oiter; int4 i,slot; @@ -789,7 +803,7 @@ void Merge::mergeIndirect(PcodeOp *indop) PcodeOp *newop; - newop = allocateCopyTrim(invn0, indop->getAddr()); + newop = allocateCopyTrim(invn0, indop->getAddr(), indop); SymbolEntry *entry = outvn->getSymbolEntry(); if (entry != (SymbolEntry *)0 && entry->getSymbol()->getType()->needsResolution()) { data.inheritWriteResolution(entry->getSymbol()->getType(), newop, indop); @@ -1063,20 +1077,28 @@ void Merge::buildDominantCopy(HighVariable *high,vector ©,int4 po for(int4 i=0;igetParent()); BlockBasic *domBl = (BlockBasic *)FlowBlock::findCommonBlock(blockSet); - Varnode *rootVn = copy[pos]->getIn(0); + PcodeOp *domCopy = copy[pos]; + Varnode *rootVn = domCopy->getIn(0); + Varnode *domVn = domCopy->getOut(); bool domCopyIsNew; - PcodeOp *domCopy; - Varnode *domVn; - if (domBl == copy[pos]->getParent()) { + if (domBl == domCopy->getParent()) { domCopyIsNew = false; - domCopy = copy[pos]; - domVn = domCopy->getOut(); } else { domCopyIsNew = true; + PcodeOp *oldCopy = domCopy; domCopy = data.newOp(1,domBl->getStop()); data.opSetOpcode(domCopy, CPUI_COPY); - domVn = data.newUnique(rootVn->getSize(), rootVn->getType()); + Datatype *ct = rootVn->getType(); + if (ct->needsResolution()) { + const ResolvedUnion *resUnion = data.getUnionField(ct, oldCopy, 0); + int4 fieldNum = (resUnion == (const ResolvedUnion *)0) ? -1 : resUnion->getFieldNum(); + data.forceFacingType(ct, fieldNum, domCopy, 0); + data.forceFacingType(ct, fieldNum, domCopy, -1); + if (ct->getMetatype() == TYPE_PARTIALUNION) + ct = rootVn->getTypeReadFacing(oldCopy); + } + domVn = data.newUnique(rootVn->getSize(), ct); data.opSetOutput(domCopy,domVn); data.opSetInput(domCopy,rootVn,0); data.opInsertEnd(domCopy, domBl); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh index 0eb7ddde49..18c9e0b158 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/merge.hh @@ -96,7 +96,7 @@ class Merge { void collectCovering(vector &vlist,HighVariable *high,PcodeOp *op); bool collectCorrectable(const vector &vlist,list &oplist,vector &slotlist, PcodeOp *op); - PcodeOp *allocateCopyTrim(Varnode *inVn,const Address &addr); + PcodeOp *allocateCopyTrim(Varnode *inVn,const Address &addr,PcodeOp *trimOp); void snipReads(Varnode *vn,list &markedop); void snipIndirect(PcodeOp *indop); void eliminateIntersect(Varnode *vn,const vector &blocksort); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index c58ba80e1c..75a21fb886 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -876,7 +876,7 @@ void PrintC::opPtrsub(const PcodeOp *op) fieldtype = fld->type; } else { // TYPE_STRUCT - const TypeField *fld = ((TypeStruct*)ct)->resolveTruncation((int4)suboff,0,&newoff); + const TypeField *fld = ct->findTruncation((int4)suboff,0,op,0,newoff); if (fld == (const TypeField*)0) { if (ct->getSize() <= suboff) { clear(); @@ -1691,6 +1691,7 @@ void PrintC::pushConstant(uintb val,const Datatype *ct, case TYPE_STRUCT: case TYPE_UNION: case TYPE_PARTIALSTRUCT: + case TYPE_PARTIALUNION: break; } // Default printing @@ -1863,7 +1864,7 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, break; // Turns out we don't resolve to the field } const TypeField *field; - field = ((TypeStruct *)ct)->resolveTruncation(off,sz,&off); + field = ct->findTruncation(off,sz,op,inslot,off); if (field != (const TypeField *)0) { stack.emplace_back(); PartialSymbolEntry &entry( stack.back() ); @@ -1894,7 +1895,7 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, } else if (ct->getMetatype() == TYPE_UNION) { const TypeField *field; - field = ((TypeUnion *)ct)->findTruncation(off,op,inslot,off); + field = ct->findTruncation(off,sz,op,inslot,off); if (field != (const TypeField*)0) { stack.emplace_back(); PartialSymbolEntry &entry(stack.back()); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index b8dbcd55bb..316074389c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -18,9 +18,9 @@ /// The base propagation ordering associated with each meta-type. /// The array elements correspond to the ordering of #type_metatype. -sub_metatype Datatype::base2sub[14] = { - SUB_UNION, SUB_STRUCT, SUB_PARTIALSTRUCT, SUB_ARRAY, SUB_PTRREL, SUB_PTR, SUB_FLOAT, SUB_CODE, SUB_BOOL, - SUB_UINT_PLAIN, SUB_INT_PLAIN, SUB_UNKNOWN, SUB_SPACEBASE, SUB_VOID +sub_metatype Datatype::base2sub[15] = { + SUB_PARTIALUNION, SUB_PARTIALSTRUCT, SUB_UNION, SUB_STRUCT, SUB_ARRAY, SUB_PTRREL, SUB_PTR, SUB_FLOAT, SUB_CODE, + SUB_BOOL, SUB_UINT_PLAIN, SUB_INT_PLAIN, SUB_UNKNOWN, SUB_SPACEBASE, SUB_VOID }; AttributeId ATTRIB_ALIGNMENT = AttributeId("alignment",47); @@ -142,6 +142,24 @@ void Datatype::printRaw(ostream &s) const s << "unkbyte" << dec << size; } +/// \brief Find an immediate subfield of \b this data-type +/// +/// Given a byte range within \b this data-type, determine the field it is contained in +/// and pass back the renormalized offset. This method applies to TYPE_STRUCT, TYPE_UNION, and +/// TYPE_PARTIALUNION, data-types that have field components. For TYPE_UNION and TYPE_PARTIALUNION, the +/// field may depend on the p-code op extracting or writing the value. +/// \param off is the byte offset into \b this +/// \param sz is the size of the byte range +/// \param op is the PcodeOp reading/writing the data-type +/// \param slot is the index of the Varnode being accessed, -1 for the output, >=0 for an input +/// \param newoff points to the renormalized offset to pass back +/// \return the containing field or NULL if the range is not contained +const TypeField *Datatype::findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const + +{ + return (const TypeField *)0; +} + /// Given an offset into \b this data-type, return the component data-type at that offset. /// Also, pass back a "renormalized" offset suitable for recursize getSubType() calls: /// i.e. if the original offset hits the exact start of the sub-type, 0 is passed back. @@ -231,7 +249,10 @@ void metatype2string(type_metatype metatype,string &res) res = "array"; break; case TYPE_PARTIALSTRUCT: - res = "part"; + res = "partstruct"; + break; + case TYPE_PARTIALUNION: + res = "partunion"; break; case TYPE_STRUCT: res = "struct"; @@ -275,10 +296,12 @@ type_metatype string2metatype(const string &metastring) case 'p': if (metastring=="ptr") return TYPE_PTR; - else if (metastring=="part") - return TYPE_PARTIALSTRUCT; else if (metastring=="ptrrel") return TYPE_PTRREL; + else if (metastring=="partunion") + return TYPE_PARTIALUNION; + else if (metastring=="partstruct") + return TYPE_PARTIALSTRUCT; break; case 'a': if (metastring=="array") @@ -461,6 +484,24 @@ int4 Datatype::findCompatibleResolve(Datatype *ct) const return -1; } +/// \brief Resolve which union field is being used for a given PcodeOp when a truncation is involved +/// +/// This method applies to the TYPE_UNION and TYPE_PARTIALUNION data-types, when a Varnode is backed +/// by a larger Symbol with a union data-type, or if the Varnode is produced by a CPUI_SUBPIECE where +/// the input Varnode has a union data-type. +/// Scoring is done to compute the best field and the result is cached with the function. +/// The record of the best field is returned or null if there is no appropriate field +/// \param offset is the byte offset into the union we are truncating to +/// \param op is either the PcodeOp reading the truncated Varnode or the CPUI_SUBPIECE doing the truncation +/// \param slot is either the input slot of the reading PcodeOp or the artificial SUBPIECE slot: 1 +/// \param newoff is used to pass back how much offset is left to resolve +/// \return the field of the union best associated with the truncation or null +const TypeField *Datatype::resolveTruncation(int4 offset,PcodeOp *op,int4 slot,int4 &newoff) + +{ + return (const TypeField *)0; +} + /// Restore the basic properties (name,size,id) of a data-type from an XML element /// Properties are read from the attributes of the element /// \param decoder is the stream decoder @@ -1332,13 +1373,7 @@ int4 TypeStruct::getLowerBoundField(int4 off) const return -1; } -/// Given a byte range within \b this data-type, determine the field it is contained in -/// and pass back the renormalized offset. -/// \param off is the byte offset into \b this -/// \param sz is the size of the byte range -/// \param newoff points to the renormalized offset to pass back -/// \return the containing field or NULL if the range is not contained -const TypeField *TypeStruct::resolveTruncation(int4 off,int4 sz,int4 *newoff) const +const TypeField *TypeStruct::findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const { int4 i; @@ -1350,7 +1385,7 @@ const TypeField *TypeStruct::resolveTruncation(int4 off,int4 sz,int4 *newoff) co noff = off - curfield.offset; if (noff+sz > curfield.type->getSize()) // Requested piece spans more than one field return (const TypeField *)0; - *newoff = noff; + newoff = noff; return &curfield; } @@ -1785,17 +1820,6 @@ Datatype* TypeUnion::findResolve(const PcodeOp *op,int4 slot) return this; } -/// \brief Resolve which union field is being used for a given PcodeOp when a truncation is involved -/// -/// This is used either when a Varnode is backed by a larger Symbol with a union data-type, -/// or if the Varnode is produced by a CPUI_SUBPIECE where the input Varnode has a union data-type. -/// Scoring is done to compute the best field and the result is cached with the function. -/// The record of the best field is returned or null if there is no appropriate field -/// \param offset is the byte offset into the union we are truncating to -/// \param op is either the PcodeOp reading the truncated Varnode or the CPUI_SUBPIECE doing the truncation -/// \param slot is either the input slot of the reading PcodeOp or the artificial SUBPIECE slot: 1 -/// \param newoff is used to pass back how much offset is left to resolve -/// \return the field of the union best associated with the truncation or null const TypeField *TypeUnion::resolveTruncation(int4 offset,PcodeOp *op,int4 slot,int4 &newoff) { @@ -1826,23 +1850,23 @@ const TypeField *TypeUnion::resolveTruncation(int4 offset,PcodeOp *op,int4 slot, return (const TypeField *)0; } -/// \brief Return a precalculated field associated with a truncation -/// -/// This is the \e const version of resolveTruncation(). No new scoring is done, but if a cached result -/// is available, return it. /// \param offset is the byte offset of the truncation +/// \param sz is the number of bytes in the resulting truncation /// \param op is the PcodeOp reading the truncated value /// \param slot is the input slot being read /// \param newoff is used to pass back any remaining offset into the field which still must be resolved /// \return the field to use with truncation or null if there is no appropriate field -const TypeField *TypeUnion::findTruncation(int4 offset,const PcodeOp *op,int4 slot,int4 &newoff) const +const TypeField *TypeUnion::findTruncation(int4 offset,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const { + // No new scoring is done, but if a cached result is available, return it. const Funcdata *fd = op->getParent()->getFuncdata(); const ResolvedUnion *res = fd->getUnionField(this, op, slot); if (res != (ResolvedUnion *)0 && res->getFieldNum() >= 0) { const TypeField *field = getField(res->getFieldNum()); newoff = offset - field->offset; + if (newoff + sz > field->type->getSize()) + return (const TypeField *)0; // Truncation spans more than one field return field; } return (const TypeField *)0; @@ -1870,6 +1894,142 @@ int4 TypeUnion::findCompatibleResolve(Datatype *ct) const return -1; } +TypePartialUnion::TypePartialUnion(const TypePartialUnion &op) + : Datatype(op) +{ + stripped = op.stripped; + container = op.container; + offset = op.offset; +} + +TypePartialUnion::TypePartialUnion(TypeUnion *contain,int4 off,int4 sz,Datatype *strip) + : Datatype(sz,TYPE_PARTIALUNION) +{ + flags |= (needs_resolution | has_stripped); + stripped = strip; + container = contain; + offset = off; +} + +void TypePartialUnion::printRaw(ostream &s) const + +{ + container->printRaw(s); + s << "[off=" << dec << offset << ",sz=" << size << ']'; +} + +const TypeField *TypePartialUnion::findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const + +{ + return container->findTruncation(off + offset, sz, op, slot, newoff); +} + +int4 TypePartialUnion::numDepend(void) + +{ + return container->numDepend(); +} + +Datatype *TypePartialUnion::getDepend(int4 index) + +{ + // Treat dependents as coming from the underlying union + Datatype *res = container->getDepend(index); + if (res->getSize() != size) // But if the size doesn't match + return stripped; // Return the stripped data-type + return res; +} + +int4 TypePartialUnion::compare(const Datatype &op,int4 level) const + +{ + int4 res = Datatype::compare(op,level); + if (res != 0) return res; + // Both must be partial unions + TypePartialUnion *tp = (TypePartialUnion *) &op; + if (offset != tp->offset) return (offset < tp->offset) ? -1 : 1; + level -= 1; + if (level < 0) { + if (id == op.getId()) return 0; + return (id < op.getId()) ? -1 : 1; + } + return container->compare(*tp->container,level); // Compare the underlying union +} + +int4 TypePartialUnion::compareDependency(const Datatype &op) const + +{ + if (submeta != op.getSubMeta()) return (submeta < op.getSubMeta()) ? -1 : 1; + TypePartialUnion *tp = (TypePartialUnion *) &op; // Both must be partial unions + if (container != tp->container) return (container < tp->container) ? -1 : 1; // Compare absolute pointers + if (offset != tp->offset) return (offset < tp->offset) ? -1 : 1; + return (op.getSize()-size); +} + +void TypePartialUnion::encode(Encoder &encoder) const + +{ + encoder.openElement(ELEM_TYPE); + encodeBasic(metatype,encoder); + encoder.writeSignedInteger(ATTRIB_OFFSET, offset); + container->encodeRef(encoder); + encoder.closeElement(ELEM_TYPE); +} + +Datatype *TypePartialUnion::resolveInFlow(PcodeOp *op,int4 slot) + +{ + Datatype *curType = container; + int4 curOff = offset; + while(curType != (Datatype *)0 && curType->getSize() > size) { + if (curType->getMetatype() == TYPE_UNION) { + const TypeField *field = curType->resolveTruncation(curOff, op, slot, curOff); + curType = (field == (const TypeField *)0) ? (Datatype *)0 : field->type; + } + else { + uintb newOff; + curType = curType->getSubType(curOff, &newOff); + curOff = newOff; + } + } + if (curType != (Datatype *)0 && curType->getSize() == size) + return curType; + return stripped; +} + +Datatype* TypePartialUnion::findResolve(const PcodeOp *op,int4 slot) + +{ + Datatype *curType = container; + int4 curOff = offset; + while(curType != (Datatype *)0 && curType->getSize() > size) { + if (curType->getMetatype() == TYPE_UNION) { + Datatype *newType = curType->findResolve(op, slot); + curType = (newType == curType) ? (Datatype *)0 : newType; + } + else { + uintb newOff; + curType = curType->getSubType(curOff, &newOff); + curOff = newOff; + } + } + if (curType != (Datatype *)0 && curType->getSize() == size) + return curType; + return stripped; +} + +int4 TypePartialUnion::findCompatibleResolve(Datatype *ct) const + +{ + return container->findCompatibleResolve(ct); +} + +const TypeField *TypePartialUnion::resolveTruncation(int4 off,PcodeOp *op,int4 slot,int4 &newoff) + +{ + return container->resolveTruncation(off + offset, op, slot, newoff); +} + /// Parse a \ element with children describing the data-type being pointed to /// and the parent data-type. /// \param decoder is the stream decoder @@ -3222,6 +3382,14 @@ TypeUnion *TypeFactory::getTypeUnion(const string &n) return (TypeUnion *) findAdd(tmp); } +TypePartialUnion *TypeFactory::getTypePartialUnion(TypeUnion *contain,int4 off,int4 sz) + +{ + Datatype *strip = getBase(sz, TYPE_UNKNOWN); + TypePartialUnion tpu(contain,off,sz,strip); + return (TypePartialUnion *) findAdd(tpu); +} + /// The created enumeration will have no named values and a default configuration /// Named values must be added later. /// \param n is the name of the enumeration diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index 3ec138a1dc..78b6fe4d2f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -73,50 +73,52 @@ extern void print_data(ostream &s,uint1 *buffer,int4 size,const Address &baseadd //extern bool print_string(ostream &s,uint1 *buffer,int4 size); /// The core meta-types supported by the decompiler. These are sizeless templates -/// for the elements making up the type algebra. +/// for the elements making up the type algebra. Index is important for Datatype::base2sub array. enum type_metatype { - TYPE_VOID = 13, ///< Standard "void" type, absence of type - TYPE_SPACEBASE = 12, ///< Placeholder for symbol/type look-up calculations - TYPE_UNKNOWN = 11, ///< An unknown low-level type. Treated as an unsigned integer. - TYPE_INT = 10, ///< Signed integer. Signed is considered less specific than unsigned in C - TYPE_UINT = 9, ///< Unsigned integer - TYPE_BOOL = 8, ///< Boolean - TYPE_CODE = 7, ///< Data is actual executable code - TYPE_FLOAT = 6, ///< Floating-point + TYPE_VOID = 14, ///< Standard "void" type, absence of type + TYPE_SPACEBASE = 13, ///< Placeholder for symbol/type look-up calculations + TYPE_UNKNOWN = 12, ///< An unknown low-level type. Treated as an unsigned integer. + TYPE_INT = 11, ///< Signed integer. Signed is considered less specific than unsigned in C + TYPE_UINT = 10, ///< Unsigned integer + TYPE_BOOL = 9, ///< Boolean + TYPE_CODE = 8, ///< Data is actual executable code + TYPE_FLOAT = 7, ///< Floating-point - TYPE_PTR = 5, ///< Pointer data-type - TYPE_PTRREL = 4, ///< Pointer relative to another data-type (specialization of TYPE_PTR) - TYPE_ARRAY = 3, ///< Array data-type, made up of a sequence of "element" datatype - TYPE_PARTIALSTRUCT = 2, ///< Part of a structure, stored separately from the whole - TYPE_STRUCT = 1, ///< Structure data-type, made up of component datatypes - TYPE_UNION = 0 ///< An overlapping union of multiple datatypes + TYPE_PTR = 6, ///< Pointer data-type + TYPE_PTRREL = 5, ///< Pointer relative to another data-type (specialization of TYPE_PTR) + TYPE_ARRAY = 4, ///< Array data-type, made up of a sequence of "element" datatype + TYPE_STRUCT = 3, ///< Structure data-type, made up of component datatypes + TYPE_UNION = 2, ///< An overlapping union of multiple datatypes + TYPE_PARTIALSTRUCT = 1, ///< Part of a structure, stored separately from the whole + TYPE_PARTIALUNION = 0 ///< Part of a union }; /// Specializations of the core meta-types. Each enumeration is associated with a specific #type_metatype. /// Ordering is important: The lower the number, the more \b specific the data-type, affecting propagation. enum sub_metatype { - SUB_VOID = 21, ///< Compare as a TYPE_VOID - SUB_SPACEBASE = 20, ///< Compare as a TYPE_SPACEBASE - SUB_UNKNOWN = 19, ///< Compare as a TYPE_UNKNOWN - SUB_INT_CHAR = 18, ///< Signed 1-byte character, sub-type of TYPE_INT - SUB_UINT_CHAR = 17, ///< Unsigned 1-byte character, sub-type of TYPE_UINT - SUB_INT_PLAIN = 16, ///< Compare as a plain TYPE_INT - SUB_UINT_PLAIN = 15, ///< Compare as a plain TYPE_UINT - SUB_INT_ENUM = 14, ///< Signed enum, sub-type of TYPE_INT - SUB_UINT_ENUM = 13, ///< Unsigned enum, sub-type of TYPE_UINT - SUB_INT_UNICODE = 12, ///< Signed wide character, sub-type of TYPE_INT - SUB_UINT_UNICODE = 11, ///< Unsigned wide character, sub-type of TYPE_UINT - SUB_BOOL = 10, ///< Compare as TYPE_BOOL - SUB_CODE = 9, ///< Compare as TYPE_CODE - SUB_FLOAT = 8, ///< Compare as TYPE_FLOAT - SUB_PTRREL_UNK = 7, ///< Pointer to unknown field of struct, sub-type of TYPE_PTR - SUB_PTR = 6, ///< Compare as TYPE_PTR - SUB_PTRREL = 5, ///< Pointer relative to another data-type, sub-type of TYPE_PTR - SUB_PTR_STRUCT = 4, ///< Pointer into struct, sub-type of TYPE_PTR - SUB_ARRAY = 3, ///< Compare as TYPE_ARRAY - SUB_PARTIALSTRUCT = 2, ///< Compare as TYPE_PARTIALSTRUCT - SUB_STRUCT = 1, ///< Compare as TYPE_STRUCT - SUB_UNION = 0 ///< Compare as TYPE_UNION + SUB_VOID = 22, ///< Compare as a TYPE_VOID + SUB_SPACEBASE = 21, ///< Compare as a TYPE_SPACEBASE + SUB_UNKNOWN = 20, ///< Compare as a TYPE_UNKNOWN + SUB_INT_CHAR = 19, ///< Signed 1-byte character, sub-type of TYPE_INT + SUB_UINT_CHAR = 18, ///< Unsigned 1-byte character, sub-type of TYPE_UINT + SUB_INT_PLAIN = 17, ///< Compare as a plain TYPE_INT + SUB_UINT_PLAIN = 16, ///< Compare as a plain TYPE_UINT + SUB_INT_ENUM = 15, ///< Signed enum, sub-type of TYPE_INT + SUB_UINT_ENUM = 14, ///< Unsigned enum, sub-type of TYPE_UINT + SUB_INT_UNICODE = 13, ///< Signed wide character, sub-type of TYPE_INT + SUB_UINT_UNICODE = 12, ///< Unsigned wide character, sub-type of TYPE_UINT + SUB_BOOL = 11, ///< Compare as TYPE_BOOL + SUB_CODE = 10, ///< Compare as TYPE_CODE + SUB_FLOAT = 9, ///< Compare as TYPE_FLOAT + SUB_PTRREL_UNK = 8, ///< Pointer to unknown field of struct, sub-type of TYPE_PTR + SUB_PTR = 7, ///< Compare as TYPE_PTR + SUB_PTRREL = 6, ///< Pointer relative to another data-type, sub-type of TYPE_PTR + SUB_PTR_STRUCT = 5, ///< Pointer into struct, sub-type of TYPE_PTR + SUB_ARRAY = 4, ///< Compare as TYPE_ARRAY + SUB_PARTIALSTRUCT = 3, ///< Compare as TYPE_PARTIALSTRUCT + SUB_STRUCT = 2, ///< Compare as TYPE_STRUCT + SUB_UNION = 1, ///< Compare as TYPE_UNION + SUB_PARTIALUNION = 0 ///< Compare as a TYPE_PARTIALUNION }; /// Convert type \b meta-type to name extern void metatype2string(type_metatype metatype,string &res); @@ -128,6 +130,7 @@ class Architecture; // Forward declarations class PcodeOp; class Scope; class TypeFactory; +class TypeField; struct DatatypeCompare; /// \brief The base datatype class for the decompiler. @@ -135,7 +138,7 @@ struct DatatypeCompare; /// Used for symbols, function prototypes, type propagation etc. class Datatype { protected: - static sub_metatype base2sub[14]; + static sub_metatype base2sub[15]; /// Boolean properties of datatypes enum { coretype = 1, ///< This is a basic type which will never be redefined @@ -200,6 +203,7 @@ public: const string &getName(void) const { return name; } ///< Get the type name Datatype *getTypedef(void) const { return typedefImm; } ///< Get the data-type immediately typedefed by \e this (or null) virtual void printRaw(ostream &s) const; ///< Print a description of the type to stream + virtual const TypeField *findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const; virtual Datatype *getSubType(uintb off,uintb *newoff) const; ///< Recover component data-type one-level down virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const; virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const; @@ -214,6 +218,7 @@ public: virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); ///< Tailor data-type propagation based on Varnode use virtual Datatype* findResolve(const PcodeOp *op,int4 slot); ///< Find a previously resolved sub-type virtual int4 findCompatibleResolve(Datatype *ct) const; ///< Find a resolution compatible with the given data-type + virtual const TypeField *resolveTruncation(int4 offset,PcodeOp *op,int4 slot,int4 &newoff); int4 typeOrder(const Datatype &op) const { if (this==&op) return 0; return compare(op,10); } ///< Order this with -op- datatype int4 typeOrderBool(const Datatype &op) const; ///< Order \b this with -op-, treating \e bool data-type as special void encodeRef(Encoder &encoder) const; ///< Encode a reference of \b this to a stream @@ -434,7 +439,7 @@ public: TypeStruct(void) : Datatype(0,TYPE_STRUCT) { flags |= type_incomplete; } ///< Construct incomplete/empty TypeStruct vector::const_iterator beginField(void) const { return field.begin(); } ///< Beginning of fields vector::const_iterator endField(void) const { return field.end(); } ///< End of fields - const TypeField *resolveTruncation(int4 off,int4 sz,int4 *newoff) const; ///< Get field based on offset + virtual const TypeField *findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const; virtual Datatype *getSubType(uintb off,uintb *newoff) const; virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const; virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const; @@ -464,7 +469,8 @@ public: TypeUnion(const TypeUnion &op); ///< Construct from another TypeUnion TypeUnion(void) : Datatype(0,TYPE_UNION) { flags |= (type_incomplete | needs_resolution); } ///< Construct incomplete TypeUnion const TypeField *getField(int4 i) const { return &field[i]; } ///< Get the i-th field of the union -// virtual Datatype *getSubType(uintb off,uintb *newoff) const; + virtual const TypeField *findTruncation(int4 offset,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const; + // virtual Datatype *getSubType(uintb off,uintb *newoff) const; virtual int4 numDepend(void) const { return field.size(); } virtual Datatype *getDepend(int4 index) const { return field[index].type; } virtual int4 compare(const Datatype &op,int4 level) const; // For tree structure @@ -474,11 +480,43 @@ public: virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); virtual Datatype* findResolve(const PcodeOp *op,int4 slot); virtual int4 findCompatibleResolve(Datatype *ct) const; - const TypeField *resolveTruncation(int4 offset,PcodeOp *op,int4 slot,int4 &newoff); - const TypeField *findTruncation(int4 offset,const PcodeOp *op,int4 slot,int4 &newoff) const; + virtual const TypeField *resolveTruncation(int4 offset,PcodeOp *op,int4 slot,int4 &newoff); }; -/// The other data, the \b container, is typically a TypeStruct or TypeArray. Even though \b this pointer +/// \brief An internal data-type for holding information about a variable's relative position within a union data-type +/// +/// This is a data-type that can be assigned to a Varnode offset into a Symbol, where either the Symbol itself or +/// a sub-field is a TypeUnion. In these cases, we know the Varnode is properly contained within a TypeUnion, +/// but the lack of context prevents us from deciding which field of the TypeUnion applies (and possibly +/// the sub-field of the field). +class TypePartialUnion : public Datatype { +protected: + friend class TypeFactory; + Datatype *stripped; ///< The \e undefined data-type to use if a formal data-type is required. + TypeUnion *container; ///< Union data-type containing \b this partial data-type + int4 offset; ///< Offset (in bytes) into the \e container union +public: + TypePartialUnion(const TypePartialUnion &op); ///< Construct from another TypePartialUnion + TypePartialUnion(TypeUnion *contain,int4 off,int4 sz,Datatype *strip); ///< Constructor + TypeUnion *getParentUnion(void) const { return container; } ///< Get the union which \b this is part of + virtual void printRaw(ostream &s) const; ///< Print a description of the type to stream + virtual const TypeField *findTruncation(int4 off,int4 sz,const PcodeOp *op,int4 slot,int4 &newoff) const; + virtual int4 numDepend(void); + virtual Datatype *getDepend(int4 index); + virtual int4 compare(const Datatype &op,int4 level) const; + virtual int4 compareDependency(const Datatype &op) const; + virtual Datatype *clone(void) const { return new TypePartialUnion(*this); } + virtual void encode(Encoder &encoder) const; + virtual Datatype *getStripped(void) const { return stripped; } + virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); + virtual Datatype* findResolve(const PcodeOp *op,int4 slot); + virtual int4 findCompatibleResolve(Datatype *ct) const; + virtual const TypeField *resolveTruncation(int4 off,PcodeOp *op,int4 slot,int4 &newoff); +}; + +/// \brief Relative pointer: A pointer with a fixed offset into a specific structure or other data-type +/// +/// The other data-type, the \b container, is typically a TypeStruct or TypeArray. Even though \b this pointer /// does not point directly to the start of the container, it is possible to access the container through \b this, /// as the distance (the \b offset) to the start of the container is explicitly known. class TypePointerRel : public TypePointer { @@ -643,6 +681,7 @@ public: TypeArray *getTypeArray(int4 as,Datatype *ao); ///< Construct an array data-type TypeStruct *getTypeStruct(const string &n); ///< Create an (empty) structure TypeUnion *getTypeUnion(const string &n); ///< Create an (empty) union + TypePartialUnion *getTypePartialUnion(TypeUnion *contain,int4 off,int4 sz); ///< Create a partial union TypeEnum *getTypeEnum(const string &n); ///< Create an (empty) enumeration TypeSpacebase *getTypeSpacebase(AddrSpace *id,const Address &addr); ///< Create a "spacebase" type TypeCode *getTypeCode(ProtoModel *model,Datatype *outtype, diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc index 404ed9ad78..11000806f1 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc @@ -1791,7 +1791,18 @@ TypeOpMulti::TypeOpMulti(TypeFactory *t) : TypeOp(t,CPUI_MULTIEQUAL,"?") Datatype *TypeOpMulti::propagateType(Datatype *alttype,PcodeOp *op,Varnode *invn,Varnode *outvn, int4 inslot,int4 outslot) { - if ((inslot!=-1)&&(outslot!=-1)) return (Datatype *)0; // Must propagate input <-> output + if ((inslot!=-1)&&(outslot!=-1)) { + if (invn == outvn && outvn->getTempType()->needsResolution()) { + // If same Varnode occupies two input slots of the MULTIEQUAL + // the second input slot should inherit the resolution of the first + Funcdata *fd = op->getParent()->getFuncdata(); + Datatype *unionType = outvn->getTempType(); + const ResolvedUnion *res = fd->getUnionField(unionType, op, inslot); + if (res != (const ResolvedUnion *)0) + fd->setUnionField(unionType, op, outslot, *res); + } + return (Datatype *)0; // Must propagate input <-> output + } Datatype *newtype; if (invn->isSpacebase()) { AddrSpace *spc = tlst->getArch()->getDefaultDataSpace(); @@ -1939,15 +1950,16 @@ Datatype *TypeOpSubpiece::propagateType(Datatype *alttype,PcodeOp *op,Varnode *i int4 byteOff; int4 newoff; const TypeField *field; - if (alttype->getMetatype() == TYPE_UNION) { + type_metatype meta = alttype->getMetatype(); + if (meta == TYPE_UNION || meta == TYPE_PARTIALUNION) { // NOTE: We use an artificial slot here to store the field being truncated to // as the facing data-type for slot 0 is already to the parent (this TYPE_UNION) byteOff = computeByteOffsetForComposite(op); - field = ((TypeUnion *)alttype)->resolveTruncation(byteOff,op,1,newoff); + field = alttype->resolveTruncation(byteOff,op,1,newoff); } else if (alttype->getMetatype() == TYPE_STRUCT) { int4 byteOff = computeByteOffsetForComposite(op); - field = ((TypeStruct *)alttype)->resolveTruncation(byteOff, outvn->getSize(), &newoff); + field = alttype->findTruncation(byteOff, outvn->getSize(), op, 1, newoff); } else return (Datatype *)0; @@ -1972,22 +1984,13 @@ const TypeField *TypeOpSubpiece::testExtraction(bool useHigh,const PcodeOp *op,D { const Varnode *vn = op->getIn(0); - Datatype *ct = useHigh ? vn->getHigh()->getType() : vn->getType(); - if (ct->getMetatype() == TYPE_STRUCT) { - parent = ct; - int4 byteOff = computeByteOffsetForComposite(op); - return ((TypeStruct *)ct)->resolveTruncation(byteOff,op->getOut()->getSize(),&offset); - } - else if (ct->getMetatype() == TYPE_UNION) { - const Funcdata *fd = op->getParent()->getFuncdata(); - const ResolvedUnion *res = fd->getUnionField(ct, op, 1); // Use artificial slot - if (res != (const ResolvedUnion *)0 && res->getFieldNum() >= 0) { - parent = ct; - offset = 0; - return ((TypeUnion *)ct)->getField(res->getFieldNum()); - } - } - return (const TypeField *)0; + Datatype *ct = useHigh ? vn->getHighTypeReadFacing(op) : vn->getTypeReadFacing(op); + type_metatype meta = ct->getMetatype(); + if (meta != TYPE_STRUCT && meta != TYPE_UNION && meta != TYPE_PARTIALUNION) + return (const TypeField *)0; + parent = ct; + int4 byteOff = computeByteOffsetForComposite(op); + return ct->findTruncation(byteOff,op->getOut()->getSize(),op,1,offset); // Use artificial slot } /// \brief Compute the byte offset into an assumed composite data-type produced by the given CPUI_SUBPIECE diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.cc index 74ad895b8b..940d333225 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/unionresolve.cc @@ -39,6 +39,8 @@ ResolvedUnion::ResolvedUnion(Datatype *parent) ResolvedUnion::ResolvedUnion(Datatype *parent,int4 fldNum,TypeFactory &typegrp) { + if (parent->getMetatype() == TYPE_PARTIALUNION) + parent = ((TypePartialUnion *)parent)->getParentUnion(); baseType = parent; fieldNum = fldNum; lock = false; @@ -67,6 +69,8 @@ ResolveEdge::ResolveEdge(const Datatype *parent,const PcodeOp *op,int4 slot) typeId = ((TypePointer *)parent)->getPtrTo()->getId(); // Strip pointer encoding += 0x1000; // Encode the fact that a pointer is getting accessed } + else if (parent->getMetatype() == TYPE_PARTIALUNION) + typeId = ((TypePartialUnion *)parent)->getParentUnion()->getId(); else typeId = parent->getId(); } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc index c8c886d272..ff5678c042 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/variable.cc @@ -159,7 +159,7 @@ void HighVariable::updateType(void) const vn = getTypeRepresentative(); type = vn->getType(); - if (type->hasStripped()) + if (type->hasStripped() && type->getMetatype() != TYPE_PARTIALUNION) type = type->getStripped(); // Update lock flags diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/partialunion.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/partialunion.xml new file mode 100644 index 0000000000..489cba0888 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/partialunion.xml @@ -0,0 +1,47 @@ + + + + + 554889e5f30f +1145fcf30f1045fcf30f110578092000 +8b05760920005dc3 + + + 554889e5897dfc89 +75f8837dfc01750b8b45f889055b0920 +00eb33837dfc02750e8b45f883c00789 +0543092000eb1f837dfc0475198b45f8 +890536092000f30f100516010000f30f +110522092000905dc3 + + + 0000003f + + + + + +globvar\.b\.bval1 = val; +return globvar\.a\.aval2; +Var1 = globvar\.a\.aval2; +globvar\.a\.aval1 = param_2 \+ 7; +globvar\.b\.bval1 = 0\.5; +globvar\.a\.aval2 = .Var1; + diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompInterface.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompInterface.java index 6755c6c9c2..d59a8fbc21 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompInterface.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompInterface.java @@ -31,6 +31,7 @@ import ghidra.program.model.lang.*; import ghidra.program.model.listing.Function; import ghidra.program.model.listing.Program; import ghidra.program.model.pcode.*; +import ghidra.util.Msg; import ghidra.util.task.CancelledListener; import ghidra.util.task.TaskMonitor; @@ -775,14 +776,9 @@ public class DecompInterface { AddressXML.encode(activeSet.mainQuery, funcEntry); decompProcess.sendCommandTimeout("decompileAt", timeoutSecs, activeSet); decompileMessage = decompCallback.getNativeMessage(); - if (debug != null) { - XmlEncode xmlEncode = new XmlEncode(); - options.encode(xmlEncode, this); - debug.shutdown(pcodelanguage, xmlEncode.toString()); - debug = null; - } } catch (Exception ex) { + decoder.clear(); // Clear any partial result decompileMessage = "Exception while decompiling " + func.getEntryPoint() + ": " + ex.getMessage() + '\n'; } @@ -792,6 +788,17 @@ public class DecompInterface { } } + try { + if (debug != null) { + XmlEncode xmlEncode = new XmlEncode(); + options.encode(xmlEncode, this); + debug.shutdown(pcodelanguage, xmlEncode.toString()); + debug = null; + } + } + catch (IOException e) { + Msg.error(debug, "Could not dump debug info"); + } DecompileProcess.DisposeState processState; if (decompProcess != null) { processState = decompProcess.getDisposeState(); diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/ForceUnionAction.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/ForceUnionAction.java index f3422f32a2..41f0d70d9f 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/ForceUnionAction.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/ForceUnionAction.java @@ -93,6 +93,12 @@ public class ForceUnionAction extends AbstractDecompilerAction { if (innerType instanceof Pointer) { innerType = ((Pointer) innerType).getDataType(); } + else if (innerType instanceof PartialUnion) { + innerType = ((PartialUnion) innerType).getParent(); + if (innerType instanceof TypeDef) { + innerType = ((TypeDef) innerType).getBaseDataType(); + } + } if (innerType == unionDt) { return dt; } @@ -168,17 +174,26 @@ public class ForceUnionAction extends AbstractDecompilerAction { /** * Build a list of all the union field names for the user to select from, when determining * which data-type to force. Two lists are produced. The first contains every possible - * field name. The second list is filtered by the size of the Varnode being forced, - * which must match the size of the selected field data-type. + * field name. The second list is filtered by the size and offset of the Varnode being forced. * @param allFields will hold the unfiltered list of names - * @param size is the size of the Varnode to filter on * @return the filtered list of names */ - private String[] buildFieldOptions(ArrayList allFields, int size) { + private String[] buildFieldOptions(ArrayList allFields) { + int size = accessVn.getSize(); + int startOff = 0; + boolean exactMatch = true; + if (parentDt instanceof Pointer) { + size = 0; + } + if (parentDt instanceof PartialUnion) { + startOff = ((PartialUnion) parentDt).getOffset(); + exactMatch = false; + } + int endOff = startOff + size; DataTypeComponent[] components = unionDt.getDefinedComponents(); ArrayList res = new ArrayList<>(); allFields.add("(no field)"); - if (size == 0 || unionDt.getLength() == size) { + if (size == 0 || !exactMatch || size == parentDt.getLength()) { res.add("(no field)"); } for (DataTypeComponent component : components) { @@ -187,7 +202,11 @@ public class ForceUnionAction extends AbstractDecompilerAction { nm = component.getDefaultFieldName(); } allFields.add(nm); - if (size == 0 || component.getDataType().getLength() == size) { + int compStart = component.getOffset(); + int compEnd = compStart + component.getLength(); + + if (size == 0 || (exactMatch && startOff == compStart && endOff == compEnd) || + (!exactMatch && startOff >= compStart && endOff <= compEnd)) { res.add(nm); } } @@ -206,12 +225,8 @@ public class ForceUnionAction extends AbstractDecompilerAction { * @return the index of the selected field or -1 if "no field" was selected */ private boolean selectFieldNumber(String defaultFieldName) { - int size = 0; - if (!(parentDt instanceof Pointer)) { - size = accessVn.getSize(); - } ArrayList allFields = new ArrayList<>(); - String[] choices = buildFieldOptions(allFields, size); + String[] choices = buildFieldOptions(allFields); if (choices.length < 2) { // If only one field fits the Varnode OkDialog.show("No Field Choices", "Only one field fits the selected variable"); return false; diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighFunctionDBUtil.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighFunctionDBUtil.java index dc6e90d837..ff7473569b 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighFunctionDBUtil.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/HighFunctionDBUtil.java @@ -742,7 +742,7 @@ public class HighFunctionDBUtil { * pieces for building the dynamic LocalVariable. This method clears out any preexisting * union facet with the same dynamic hash and firstUseOffset. * @param function is the function affected by the union facet - * @param dt is the parent data-type, either the union or a pointer to it + * @param dt is the parent data-type; a union, a pointer to a union, or a partial union * @param fieldNum is the ordinal of the desired union field * @param addr is the first use address of the facet * @param hash is the dynamic hash @@ -752,6 +752,9 @@ public class HighFunctionDBUtil { */ public static void writeUnionFacet(Function function, DataType dt, int fieldNum, Address addr, long hash, SourceType source) throws InvalidInputException, DuplicateNameException { + if (dt instanceof PartialUnion) { + dt = ((PartialUnion) dt).getParent(); + } int firstUseOffset = (int) addr.subtract(function.getEntryPoint()); String symbolName = UnionFacetSymbol.buildSymbolName(fieldNum, addr); boolean nameCollision = false; diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PartialUnion.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PartialUnion.java new file mode 100644 index 0000000000..4c8cd70dd3 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PartialUnion.java @@ -0,0 +1,122 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.program.model.pcode; + +import javax.help.UnsupportedOperationException; + +import ghidra.docking.settings.Settings; +import ghidra.docking.settings.SettingsDefinition; +import ghidra.program.model.data.*; +import ghidra.program.model.mem.MemBuffer; + +/** + * A data-type representing an unspecified piece of a parent Union data-type. This is used + * internally by the decompiler to label Varnodes representing partial symbols, where the + * part is known to be contained in a Union data-type. Within the isolated context of a Varnode, + * its not possible to resolve to a specific field of the Union because the Varnode may be used + * in multiple ways. + */ +public class PartialUnion extends AbstractDataType { + private DataType unionDataType; // Either a Union or a Typedef of a Union + private int offset; // Offset in bytes of partial within parent + private int size; // Number of bytes in partial + + PartialUnion(DataTypeManager dtm, DataType parent, int off, int sz) { + super(CategoryPath.ROOT, "partialunion", dtm); + unionDataType = parent; + offset = off; + size = sz; + } + + /** + * @return the Union data-type of which this is a part + */ + public DataType getParent() { + return unionDataType; + } + + /** + * @return the offset, in bytes, of this part within its parent Union + */ + public int getOffset() { + return offset; + } + + @Override + public DataType clone(DataTypeManager dtm) { + // Internal to the PcodeDataTypeManager + throw new UnsupportedOperationException("may not be cloned"); + } + + @Override + public int getLength() { + return size; + } + + @Override + public String getDescription() { + return "Partial Union (internal)"; + } + + @Override + public Object getValue(MemBuffer buf, Settings settings, int length) { + return null; // Should not be placed on memory + } + + @Override + public String getRepresentation(MemBuffer buf, Settings settings, int length) { + return null; // Should not be placed on memory + } + + @Override + public SettingsDefinition[] getSettingsDefinitions() { + return unionDataType.getSettingsDefinitions(); + } + + @Override + public Settings getDefaultSettings() { + return unionDataType.getDefaultSettings(); + } + + @Override + public DataType copy(DataTypeManager dtm) { + // Internal to the PcodeDataTypeManager + throw new UnsupportedOperationException("may not be copied"); + } + + @Override + public Class getValueClass(Settings settings) { + return unionDataType.getValueClass(settings); + } + + @Override + public boolean isEquivalent(DataType dt) { + if (dt == null || !(dt instanceof PartialUnion)) { + return false; + } + PartialUnion op = (PartialUnion) dt; + if (offset != op.offset || size != op.size) { + return false; + } + return unionDataType.isEquivalent(op.unionDataType); + } + + @Override + public int getAlignment() { + return 0; + } + +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java index d3e59b6767..f512cbbc93 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java @@ -251,6 +251,13 @@ public class PcodeDataTypeManager { decoder.closeElement(el); return AbstractFloatDataType.getFloatDataType(size, progDataTypes); } + else if (meta.equals("partunion")) { + int size = (int) decoder.readSignedInteger(ATTRIB_SIZE); + int offset = (int) decoder.readSignedInteger(ATTRIB_OFFSET); + DataType dt = decodeDataType(decoder); + decoder.closeElement(el); + return new PartialUnion(progDataTypes, dt, offset, size); + } else { // We typically reach here if the decompiler invents a new type // probably an unknown with a non-standard size int size = (int) decoder.readSignedInteger(ATTRIB_SIZE);