From 12d3da029b2e718b565aad43425dc7b7cf1a30fb Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Mon, 8 Jul 2019 13:54:03 -0400 Subject: [PATCH] Refactor ParamEntry look-up allowing "contained by" discovery --- .../Decompiler/src/decompile/cpp/address.cc | 16 + .../Decompiler/src/decompile/cpp/address.hh | 1 + .../Decompiler/src/decompile/cpp/database.cc | 44 +- .../Decompiler/src/decompile/cpp/database.hh | 23 +- .../src/decompile/cpp/database_ghidra.hh | 2 - .../Decompiler/src/decompile/cpp/fspec.cc | 400 ++++++++++++++---- .../Decompiler/src/decompile/cpp/fspec.hh | 104 ++++- .../src/decompile/cpp/funcdata_op.cc | 4 +- .../src/decompile/cpp/funcdata_varnode.cc | 26 +- .../Decompiler/src/decompile/cpp/heritage.cc | 63 ++- .../Decompiler/src/decompile/cpp/heritage.hh | 7 + .../Decompiler/src/decompile/cpp/rangemap.hh | 246 ++++++----- .../x86/data/languages/x86-64-gcc.cspec | 40 +- 13 files changed, 690 insertions(+), 286 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc index 985262f383..aa16170c7c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc @@ -95,6 +95,22 @@ void Address::toPhysical(void) base = phys; } +/// Return \b true if the range starting at \b this extending the given number of bytes +/// is contained by the second given range. +/// \param sz is the given number of bytes in \b this range +/// \param op2 is the start of the second given range +/// \param sz2 is the number of bytes in the second given range +/// \return \b true if the second given range contains \b this range +bool Address::containedBy(int4 sz,const Address &op2,int4 sz2) const + +{ + if (base != op2.base) return false; + if (op2.offset > offset) return false; + uintb off1 = offset + (sz-1); + uintb off2 = op2.offset + (sz2-1); + return (off2 >= off1); +} + /// Return -1 if (\e op2,\e sz2) is not properly contained in (\e this,\e sz). /// If it is contained, return the endian aware offset of (\e op2,\e sz2) /// I.e. if the least significant byte of the \e op2 range falls on the least significant diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh index db8cd5abe7..1f8e9159ea 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh @@ -75,6 +75,7 @@ public: Address operator+(int4 off) const; ///< Increment address by a number of bytes Address operator-(int4 off) const; ///< Decrement address by a number of bytes friend ostream &operator<<(ostream &s,const Address &addr); ///< Write out an address to stream + bool containedBy(int4 sz,const Address &op2,int4 sz2) const; ///< Determine if \e op2 range contains \b this range int4 justifiedContain(int4 sz,const Address &op2,int4 sz2,bool forceleft) const; ///< Determine if \e op2 is the least significant part of \e this. int4 overlap(int4 skip,const Address &op,int4 size) const; ///< Determine how two address ranges overlap bool isContiguous(int4 sz,const Address &loaddr,int4 losz) const; ///< Does \e this form a contigous range with \e loaddr diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc index 84819cb547..ea01246477 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.cc @@ -29,17 +29,6 @@ SymbolEntry::SymbolEntry(Symbol *sym) size = -1; } -/// This constructor is for use with rangemap container. It must be followed -/// by an initialize() call. -/// \param a is the first offset covered by the new SymbolEntry -/// \param b is the last offset covered -SymbolEntry::SymbolEntry(uintb a,uintb b) - -{ - addr = Address(addr.getSpace(),a); - size = (b-a)+1; -} - /// This is used specifically for \e dynamic Symbol objects, where the storage location /// is attached to a temporary register or a constant. The main address field (\b addr) /// is set to \e invalid, and the \b hash becomes the primary location information. @@ -64,12 +53,15 @@ SymbolEntry::SymbolEntry(Symbol *sym,uint4 exfl,uint8 h,int4 off,int4 sz,const R /// Assuming the boundary offsets have been specified with /// the constructor, fill in the rest of the data. /// \param data contains the raw initialization data -void SymbolEntry::initialize(const EntryInitData &data) +/// \param a is the starting offset of the entry +/// \param b is the ending offset of the entry +void SymbolEntry::initialize(const EntryInitData &data,uintb a,uintb b) { + addr = Address(data.space,a); + size = (b-a)+1; symbol = data.symbol; extraflags = data.extraflags; - addr = Address(data.space,addr.getOffset()); offset = data.offset; uselimit = data.uselimit; } @@ -2005,32 +1997,6 @@ SymbolEntry *ScopeInternal::findOverlap(const Address &addr,int4 size) const return (SymbolEntry *)0; } -SymbolEntry *ScopeInternal::findBefore(const Address &addr) const - -{ - EntryMap *rangemap = maptable[ addr.getSpace()->getIndex() ]; - if (rangemap != (EntryMap *)0) { - EntryMap::const_iterator iter; - iter = rangemap->find_lastbefore(addr.getOffset()); - if (iter != rangemap->end()) - return &(*iter); - } - return (SymbolEntry *)0; -} - -SymbolEntry *ScopeInternal::findAfter(const Address &addr) const - -{ - EntryMap *rangemap = maptable[ addr.getSpace()->getIndex() ]; - if (rangemap != (EntryMap *)0) { - EntryMap::const_iterator iter; - iter = rangemap->find_firstafter(addr.getOffset()); - if (iter != rangemap->end()) - return &(*iter); - } - return (SymbolEntry *)0; -} - void ScopeInternal::findByName(const string &name,vector &res) const { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh index e32b4869ca..430a374304 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database.hh @@ -112,7 +112,7 @@ public: typedef EntrySubsort subsorttype; ///< The sub-sort object for a rangemap typedef EntryInitData inittype; ///< Initialization data for a SymbolEntry in a rangemap - SymbolEntry(uintb a,uintb b); ///< Construct given just the offset range + SymbolEntry(void) {} ///< Constructor for use with rangemap SymbolEntry(Symbol *sym,uint4 exfl,uint8 h,int4 off,int4 sz,const RangeList &rnglist); ///< Construct a dynamic SymbolEntry bool isPiece(void) const { return ((extraflags&(Varnode::precislo|Varnode::precishi))!=0); } ///< Is \b this a high or low piece of the whole Symbol bool isDynamic(void) const { return addr.isInvalid(); } ///< Is \b storage \e dynamic @@ -122,7 +122,7 @@ public: uintb getFirst(void) const { return addr.getOffset(); } ///< Get the first offset of \b this storage location uintb getLast(void) const { return (addr.getOffset()+size-1); } ///< Get the last offset of \b this storage location subsorttype getSubsort(void) const; ///< Get the sub-sort object - void initialize(const EntryInitData &data); ///< Fully initialize \b this + void initialize(const EntryInitData &data,uintb a,uintb b); ///< Fully initialize \b this Symbol *getSymbol(void) const { return symbol; } ///< Get the Symbol associated with \b this const Address &getAddr(void) const { return addr; } ///< Get the starting address of \b this storage uint8 getHash(void) const { return hash; } ///< Get the hash used to identify \b this storage @@ -577,18 +577,6 @@ public: /// \return an overlapping SymbolEntry or NULL if none exists virtual SymbolEntry *findOverlap(const Address &addr,int4 size) const=0; - /// \brief Find first Symbol before (but not containing) a given address - /// - /// \param addr is the given address - /// \return the SymbolEntry occurring immediately before or NULL if none exists - virtual SymbolEntry *findBefore(const Address &addr) const=0; - - /// \brief Find first Symbol after (but not containing) a given address - /// - /// \param addr is the given address - /// \return a SymbolEntry occurring immediately after or NULL if none exists - virtual SymbolEntry *findAfter(const Address &addr) const=0; - /// \brief Find a Symbol by name within \b this Scope /// /// If there are multiple Symbols with the same name, all are passed back. @@ -741,8 +729,6 @@ public: virtual ExternRefSymbol *findExternalRef(const Address &addr) const; virtual LabSymbol *findCodeLabel(const Address &addr) const; virtual SymbolEntry *findOverlap(const Address &addr,int4 size) const; - virtual SymbolEntry *findBefore(const Address &addr) const; - virtual SymbolEntry *findAfter(const Address &addr) const; virtual void findByName(const string &name,vector &res) const; virtual Funcdata *resolveExternalRefFunction(ExternRefSymbol *sym) const; @@ -786,12 +772,13 @@ private: Address first; ///< The first address of the range Address last; ///< The last address of the range public: - ScopeMapper(Address f,Address l) { first=f; last=l; } ///< Construct given an address range + ScopeMapper(void) {} ///< Constructor for use with rangemap Address getFirst(void) const { return first; } ///< Get the first address in the range Address getLast(void) const { return last; } ///< Get the last address in the range NullSubsort getSubsort(void) const { return NullSubsort(); } ///< Get the sub-subsort object Scope *getScope(void) const { return scope; } ///< Get the Scope owning this address range - void initialize(const inittype &data) { scope = data; } ///< Initialize the range (with the owning Scope) + void initialize(const inittype &data,const Address &f,const Address &l) { + scope = data; first = f; last = l; } ///< Initialize the range (with the owning Scope) }; typedef rangemap ScopeResolve; ///< A map from address to the owning Scope diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/database_ghidra.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/database_ghidra.hh index 1fdd352bb5..f15d5897da 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/database_ghidra.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/database_ghidra.hh @@ -88,8 +88,6 @@ public: virtual Funcdata *resolveExternalRefFunction(ExternRefSymbol *sym) const; virtual SymbolEntry *findOverlap(const Address &addr,int4 size) const { throw LowlevelError("findOverlap unimplemented"); } - virtual SymbolEntry *findBefore(const Address &addr) const { throw LowlevelError("findBefore unimplemented"); } - virtual SymbolEntry *findAfter(const Address &addr) const { throw LowlevelError("findAfter unimplemented"); } virtual void findByName(const string &name,vector &res) const { throw LowlevelError("findByName unimplemented"); } virtual MapIterator begin(void) const { throw LowlevelError("begin unimplemented"); } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc index c38d6859ee..4cd948c50d 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc @@ -71,6 +71,19 @@ bool ParamEntry::contains(const ParamEntry &op2) const return true; } +/// \param addr is the starting address of the potential containing range +/// \param sz is the number of bytes in the range +/// \return \b true if the entire ParamEntry fits inside the range +bool ParamEntry::containedBy(const Address &addr,int4 sz) const + +{ + if (spaceid != addr.getSpace()) return false; + if (addressbase < addr.getOffset()) return false; + uintb entryoff = addressbase + size-1; + uintb rangeoff = addr.getOffset() + sz-1; + return (entryoff <= rangeoff); +} + /// Check if the given memory range is contained in \b this. /// If it is contained, return the endian aware offset of the containment. /// I.e. if the least significant byte of the given range falls on the least significant @@ -379,17 +392,18 @@ void ParamEntry::restoreXml(const Element *el,const AddrSpaceManager *manage,boo /// what portion(s) of the joined parameter are overlapped. This method sets flags on \b this /// to indicate the overlap. /// \param entry is the full parameter list to check for overlaps with \b this -void ParamEntry::extraChecks(vector &entry) +void ParamEntry::extraChecks(list &entry) { if (joinrec == (JoinRecord *)0) return; // Nothing to do if not multiprecision if (joinrec->numPieces() != 2) return; const VarnodeData &highPiece(joinrec->getPiece(0)); bool seenOnce = false; - for(int4 i=0;i::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { // Search for high piece, used as whole/low in another entry + AddrSpace *spc = (*iter).getSpace(); + uintb off = (*iter).getBase(); + int4 sz = (*iter).getSize(); if ((highPiece.offset == off)&&(highPiece.space == spc)&&(highPiece.size == sz)) { if (seenOnce) throw LowlevelError("Extra check hits twice"); seenOnce = true; @@ -410,21 +424,76 @@ ParamListStandard::ParamListStandard(const ParamListStandard &op2) pointermax = op2.pointermax; thisbeforeret = op2.thisbeforeret; nonfloatgroup = op2.nonfloatgroup; + populateResolver(); +} + +ParamListStandard::~ParamListStandard(void) + +{ + for(int4 i=0;i size) continue; - if (entry[i].justifiedContain(loc,size)==0) // Make sure the range is properly justified in entry - return i; + int4 index = loc.getSpace()->getIndex(); + if (index >= resolverMap.size()) + return (const ParamEntry *)0; + ParamEntryResolver *resolver = resolverMap[index]; + if (resolver == (ParamEntryResolver *)0) + return (const ParamEntry *)0; + pair res; + res = resolver->find(loc.getOffset()); + while(res.first != res.second) { + const ParamEntry *testEntry = (*res.first).getParamEntry(); + ++res.first; + if (testEntry->getMinSize() > size) continue; + if (testEntry->justifiedContain(loc,size)==0) // Make sure the range is properly justified in entry + return testEntry; } - return -1; + return (const ParamEntry *)0; +} + +int4 ParamListStandard::characterizeAsParam(const Address &loc,int4 size) const + +{ + int4 index = loc.getSpace()->getIndex(); + if (index >= resolverMap.size()) + return 0; + ParamEntryResolver *resolver = resolverMap[index]; + if (resolver == (ParamEntryResolver *)0) + return 0; + pair iterpair; + iterpair = resolver->find(loc.getOffset()); + int4 res = 0; + while(iterpair.first != iterpair.second) { + const ParamEntry *testEntry = (*iterpair.first).getParamEntry(); + if (testEntry->getMinSize() <= size && testEntry->justifiedContain(loc, size)==0) + return 1; + if (testEntry->containedBy(loc, size)) + res = 2; + ++iterpair.first; + } + if (res != 2 && iterpair.first != resolver->end()) { + iterpair.second = resolver->find_end(loc.getOffset() + (size-1)); + while(iterpair.first != iterpair.second) { + const ParamEntry *testEntry = (*iterpair.first).getParamEntry(); + if (testEntry->containedBy(loc, size)) { + res = 2; + break; + } + ++iterpair.first; + } + } + return res; } /// Given the next data-type and the status of previously allocated slots, @@ -437,17 +506,19 @@ int4 ParamListStandard::findEntry(const Address &loc,int4 size) const Address ParamListStandard::assignAddress(const Datatype *tp,vector &status) const { - for(int4 i=0;i::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + const ParamEntry &curEntry( *iter ); + int4 grp = curEntry.getGroup(); if (status[grp]<0) continue; - if ((entry[i].getType() != TYPE_UNKNOWN)&& - tp->getMetatype() != entry[i].getType()) + if ((curEntry.getType() != TYPE_UNKNOWN)&& + tp->getMetatype() != curEntry.getType()) continue; // Wrong type - Address res = entry[i].getAddrBySlot(status[grp],tp->getSize()); + Address res = curEntry.getAddrBySlot(status[grp],tp->getSize()); if (res.isInvalid()) continue; // If -tp- doesn't fit an invalid address is returned - if (entry[i].isExclusion()) { - int4 maxgrp = grp + entry[i].getGroupSize(); + if (curEntry.isExclusion()) { + int4 maxgrp = grp + curEntry.getGroupSize(); for(int4 j=grp;jgetNumTrials();++i) { ParamTrial ¶mtrial(active->getTrial(i)); - int4 entslot = findEntry(paramtrial.getAddress(),paramtrial.getSize()); + const ParamEntry *entrySlot = findEntry(paramtrial.getAddress(),paramtrial.getSize()); // Note: if a trial is "definitely not used" but there is a matching entry, // we still include it in the map - if (entslot == -1) + if (entrySlot == (const ParamEntry *)0) paramtrial.markNoUse(); else { - const ParamEntry *curentry = &(entry[entslot]); - paramtrial.setEntry( curentry, 0 ); // Keep track of entry recovered for this trial + paramtrial.setEntry( entrySlot, 0 ); // Keep track of entry recovered for this trial - if (curentry->getType() == TYPE_FLOAT) + if (entrySlot->getType() == TYPE_FLOAT) seenfloattrial = true; else seeninttrial = true; // Make sure we list that the entries group is marked - int4 grp = curentry->getGroup(); + int4 grp = entrySlot->getGroup(); while(hitlist.size() <= grp) hitlist.push_back((const ParamEntry *)0); const ParamEntry *lastentry = hitlist[grp]; if (lastentry == (const ParamEntry *)0) - hitlist[grp] = curentry; // This is the first hit for this group + hitlist[grp] = entrySlot; // This is the first hit for this group } } @@ -548,8 +618,9 @@ void ParamListStandard::buildTrialMap(ParamActive *active) const const ParamEntry *curentry = hitlist[i]; if (curentry == (const ParamEntry *)0) { - for(int4 j=0;j::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + curentry = &(*iter); if (curentry->getGroup() == i) break; // Find first entry of the missing group } if ((!seenfloattrial)&&(curentry->getType()==TYPE_FLOAT)) @@ -749,13 +820,43 @@ void ParamListStandard::calcDelay(void) { maxdelay = 0; - for(int4 i=0;igetDelay(); + list::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + int4 delay = (*iter).getSpace()->getDelay(); if (delay > maxdelay) maxdelay = delay; } } +/// Enter all the ParamEntry objects into an interval map (based on address space) +void ParamListStandard::populateResolver(void) + +{ + int4 maxid = -1; + list::iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + int4 id = (*iter).getSpace()->getIndex(); + if (id > maxid) + maxid = id; + } + resolverMap.resize(maxid+1, (ParamEntryResolver *)0); + int4 position = 0; + for(iter=entry.begin();iter!=entry.end();++iter) { + ParamEntry *paramEntry = &(*iter); + int4 spaceId = paramEntry->getSpace()->getIndex(); + ParamEntryResolver *resolver = resolverMap[spaceId]; + if (resolver == (ParamEntryResolver *)0) { + resolver = new ParamEntryResolver(); + resolverMap[spaceId] = resolver; + } + uintb first = paramEntry->getBase(); + uintb last = first + (paramEntry->getSize() - 1); + ParamEntryResolver::inittype initData(position,paramEntry); + position += 1; + resolver->insert(initData,first,last); + } +} + void ParamListStandard::fillinMap(ParamActive *active) const { @@ -782,25 +883,24 @@ void ParamListStandard::fillinMap(ParamActive *active) const bool ParamListStandard::checkJoin(const Address &hiaddr,int4 hisize,const Address &loaddr,int4 losize) const { - int4 enthi = findEntry(hiaddr,hisize); - if (enthi < 0) return false; - int4 entlo = findEntry(loaddr,losize); - if (entlo < 0) return false; - const ParamEntry &entryhi( entry[ enthi ] ); - const ParamEntry &entrylo( entry[ entlo ] ); - if (entryhi.getGroup() == entrylo.getGroup()) { - if (entryhi.isExclusion()||entrylo.isExclusion()) return false; + const ParamEntry *entryHi = findEntry(hiaddr,hisize); + if (entryHi == (const ParamEntry *)0) return false; + const ParamEntry *entryLo = findEntry(loaddr,losize); + if (entryLo == (const ParamEntry *)0) return false; + if (entryHi->getGroup() == entryLo->getGroup()) { + if (entryHi->isExclusion()||entryLo->isExclusion()) return false; if (!hiaddr.isContiguous(hisize,loaddr,losize)) return false; - if ((hiaddr.getOffset() % entryhi.getAlign()) != 0) return false; - if ((loaddr.getOffset() % entrylo.getAlign()) != 0) return false; + if ((hiaddr.getOffset() % entryHi->getAlign()) != 0) return false; + if ((loaddr.getOffset() % entryLo->getAlign()) != 0) return false; return true; } else { int4 sizesum = hisize + losize; - for(int4 i=0;i::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + if ((*iter).getSize() < sizesum) continue; + if ((*iter).justifiedContain(loaddr,losize)!=0) continue; + if ((*iter).justifiedContain(hiaddr,hisize)!=losize) continue; return true; } } @@ -812,44 +912,77 @@ bool ParamListStandard::checkSplit(const Address &loc,int4 size,int4 splitpoint) { Address loc2 = loc + splitpoint; int4 size2 = size - splitpoint; - int4 entnum = findEntry(loc,splitpoint); - if (entnum == -1) return false; - entnum = findEntry(loc2,size2); - if (entnum == -1) return false; + const ParamEntry *entryNum = findEntry(loc,splitpoint); + if (entryNum == (const ParamEntry *)0) return false; + entryNum = findEntry(loc2,size2); + if (entryNum == (const ParamEntry *)0) return false; return true; } bool ParamListStandard::possibleParam(const Address &loc,int4 size) const { - return (-1 != findEntry(loc,size)); + return ((const ParamEntry *)0 != findEntry(loc,size)); } bool ParamListStandard::possibleParamWithSlot(const Address &loc,int4 size,int4 &slot,int4 &slotsize) const { - int4 num = findEntry(loc,size); - if (num == -1) return false; - const ParamEntry &curentry( entry[num] ); - slot = curentry.getSlot(loc,0); - if (curentry.isExclusion()) { - slotsize = curentry.getGroupSize(); + const ParamEntry *entryNum = findEntry(loc,size); + if (entryNum == (const ParamEntry *)0) return false; + slot = entryNum->getSlot(loc,0); + if (entryNum->isExclusion()) { + slotsize = entryNum->getGroupSize(); } else { - slotsize = ((size-1) / curentry.getAlign()) + 1; + slotsize = ((size-1) / entryNum->getAlign()) + 1; } return true; } +bool ParamListStandard::getBiggestContainedParam(const Address &loc,int4 size,VarnodeData &res) const + +{ + int4 index = loc.getSpace()->getIndex(); + if (index >= resolverMap.size()) + return false; + ParamEntryResolver *resolver = resolverMap[index]; + if (resolver == (ParamEntryResolver *)0) + return false; + const ParamEntry *maxEntry = (const ParamEntry *)0; + ParamEntryResolver::const_iterator iter = resolver->find_begin(loc.getOffset()); + ParamEntryResolver::const_iterator enditer = resolver->find_end(loc.getOffset() + (size-1)); + while(iter != enditer) { + const ParamEntry *testEntry = (*iter).getParamEntry(); + ++iter; + if (testEntry->containedBy(loc, size)) { + if (maxEntry == (const ParamEntry *)0) + maxEntry = testEntry; + else if (testEntry->getSize() > maxEntry->getSize()) + maxEntry = testEntry; + } + } + if (!maxEntry->isExclusion()) + return false; + if (maxEntry != (const ParamEntry *)0) { + res.space = maxEntry->getSpace(); + res.offset = maxEntry->getBase(); + res.size = maxEntry->getSize(); + return true; + } + return false; +} + bool ParamListStandard::unjustifiedContainer(const Address &loc,int4 size,VarnodeData &res) const { - for(int4 i=0;i size) continue; - int4 just = entry[i].justifiedContain(loc,size); + list::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + if ((*iter).getMinSize() > size) continue; + int4 just = (*iter).justifiedContain(loc,size); if (just < 0) continue; if (just == 0) return false; - entry[i].getContainer(loc,size,res); + (*iter).getContainer(loc,size,res); return true; } return false; @@ -858,9 +991,10 @@ bool ParamListStandard::unjustifiedContainer(const Address &loc,int4 size,Varnod OpCode ParamListStandard::assumedExtension(const Address &addr,int4 size,VarnodeData &res) const { - for(int4 i=0;i size) continue; - OpCode ext = entry[i].assumedExtension(addr,size,res); + list::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + if ((*iter).getMinSize() > size) continue; + OpCode ext = (*iter).assumedExtension(addr,size,res); if (ext != CPUI_COPY) return ext; } @@ -870,11 +1004,11 @@ OpCode ParamListStandard::assumedExtension(const Address &addr,int4 size,Varnode void ParamListStandard::getRangeList(AddrSpace *spc,RangeList &res) const { - for(int4 i=0;i::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + if ((*iter).getSpace() != spc) continue; + uintb baseoff = (*iter).getBase(); + uintb endoff = baseoff + (*iter).getSize() - 1; res.insertRange(spc,baseoff,endoff); } } @@ -932,6 +1066,7 @@ void ParamListStandard::restoreXml(const Element *el,const AddrSpaceManager *man } } calcDelay(); + populateResolver(); } ParamList *ParamListStandard::clone(void) const @@ -978,13 +1113,14 @@ void ParamListStandardOut::fillinMap(ParamActive *active) const { if (active->getNumTrials() == 0) return; // No trials to check - int4 bestentry = -1; + const ParamEntry *bestentry = (const ParamEntry *)0; int4 bestcover = 0; type_metatype bestmetatype = TYPE_PTR; // Find entry which is best covered by the active trials - for(int4 i=0;i::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + const ParamEntry *curentry = &(*iter); bool putativematch = false; for(int4 j=0;jgetNumTrials();++j) { // Evaluate all trials in terms of current ParamEntry ParamTrial ¶mtrial(active->getTrial(j)); @@ -1022,24 +1158,23 @@ void ParamListStandardOut::fillinMap(ParamActive *active) const // Prefer a more generic type restriction if we have it // prefer the larger coverage if ((k==active->getNumTrials())&&((curentry->getType() > bestmetatype)||(offmatch > bestcover))) { - bestentry = i; + bestentry = curentry; bestcover = offmatch; bestmetatype = curentry->getType(); } } - if (bestentry==-1) { + if (bestentry==(const ParamEntry *)0) { for(int4 i=0;igetNumTrials();++i) active->getTrial(i).markNoUse(); } else { - const ParamEntry *curentry = &(entry[bestentry]); for(int4 i=0;igetNumTrials();++i) { ParamTrial ¶mtrial(active->getTrial(i)); if (paramtrial.isActive()) { - int4 res = curentry->justifiedContain(paramtrial.getAddress(),paramtrial.getSize()); + int4 res = bestentry->justifiedContain(paramtrial.getAddress(),paramtrial.getSize()); if (res >= 0) { paramtrial.markUsed(); // Only actives are ever marked used - paramtrial.setEntry(curentry,res); + paramtrial.setEntry(bestentry,res); } else { paramtrial.markNoUse(); @@ -1058,8 +1193,9 @@ void ParamListStandardOut::fillinMap(ParamActive *active) const bool ParamListStandardOut::possibleParam(const Address &loc,int4 size) const { - for(int4 i=0;i=0) + list::const_iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + if ((*iter).justifiedContain(loc,size)>=0) return true; } return false; @@ -1070,8 +1206,9 @@ void ParamListStandardOut::restoreXml(const Element *el,const AddrSpaceManager * { ParamListStandard::restoreXml(el,manage,effectlist,normalstack); // Check for double precision entries - for(int4 i=0;i::iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) + (*iter).extraChecks(entry); } ParamList *ParamListStandardOut::clone(void) const @@ -1089,12 +1226,11 @@ void ParamListRegister::fillinMap(ParamActive *active) const // Mark anything active as used for(int4 i=0;igetNumTrials();++i) { ParamTrial ¶mtrial(active->getTrial(i)); - int4 entslot = findEntry(paramtrial.getAddress(),paramtrial.getSize()); - if (entslot == -1) // There may be no matching entry (if the model was recovered late) + const ParamEntry *entrySlot = findEntry(paramtrial.getAddress(),paramtrial.getSize()); + if (entrySlot == (const ParamEntry *)0) // There may be no matching entry (if the model was recovered late) paramtrial.markNoUse(); else { - const ParamEntry *curentry = &(entry[entslot]); - paramtrial.setEntry( curentry,0 ); // Keep track of entry recovered for this trial + paramtrial.setEntry( entrySlot,0 ); // Keep track of entry recovered for this trial if (paramtrial.isActive()) paramtrial.markUsed(); } @@ -1123,30 +1259,31 @@ void ParamListMerged::foldIn(const ParamListStandard &op2) } if ((spacebase != op2.getSpacebase())&&(op2.getSpacebase() != (AddrSpace *)0)) throw LowlevelError("Cannot merge prototype models with different stacks"); - - for(int4 i=0;i::const_iterator iter2; + for(iter2=op2.getEntry().begin();iter2!=op2.getEntry().end();++iter2) { + const ParamEntry &opentry( *iter2 ); int4 typeint = 0; - for(j=0;j::iterator iter; + for(iter=entry.begin();iter!=entry.end();++iter) { + if ((*iter).contains(opentry)) { typeint = 2; break; } - if (opentry.contains( entry[j] )) { + if (opentry.contains( *iter )) { typeint = 1; break; } } if (typeint==2) { - if (entry[j].getMinSize() != opentry.getMinSize()) + if ((*iter).getMinSize() != opentry.getMinSize()) typeint = 0; } else if (typeint == 1) { - if (entry[j].getMinSize() != opentry.getMinSize()) + if ((*iter).getMinSize() != opentry.getMinSize()) typeint = 0; else - entry[j] = opentry; // Replace with the containing entry + *iter = opentry; // Replace with the containing entry } if (typeint == 0) entry.push_back(opentry); @@ -2114,6 +2251,8 @@ void ProtoModelMerged::restoreXml(const Element *el) foldIn(mymodel); modellist.push_back(mymodel); } + ((ParamListMerged *)input)->finalize(); + ((ParamListMerged *)output)->finalize(); } void ParameterBasic::setTypeLock(bool val) @@ -3239,6 +3378,44 @@ const VarnodeData &FuncProto::getLikelyTrash(int4 i) const return likelytrash[i]; } +/// \brief Decide whether a given storage location could be, or could hold, an input parameter +/// +/// If the input is locked, check if the location overlaps one of the current parameters. +/// Otherwise, check if the location overlaps an entry in the prototype model. +/// Return: +/// - 0 if the location neither contains or is contained by a parameter storage location +/// - 1 if the location is contained by a parameter storage location +/// - 2 if the location contains a parameter storage location +/// \param addr is the starting address of the given storage location +/// \param size is the number of bytes in the storage +/// \return the characterization code +int4 FuncProto::characterizeAsInputParam(const Address &addr,int4 size) const + +{ + if (!isDotdotdot()) { // If the proto is varargs, go straight to the model + if ((flags&voidinputlock)!=0) return 0; + int4 num = numParams(); + if (num > 0) { + bool locktest = false; // Have tested against locked symbol + int4 characterCode = 0; + for(int4 i=0;iisTypeLocked()) continue; + locktest = true; + Address iaddr = param->getAddress(); + // If the parameter already exists, the varnode must be justified in the parameter relative + // to the endianness of the space, irregardless of the forceleft flag + if (iaddr.justifiedContain(param->getSize(),addr,size,false)==0) + return 1; + if (iaddr.containedBy(param->getSize(), addr, size)) + characterCode = 2; + } + if (locktest) return characterCode; + } + } + return model->characterizeAsInputParam(addr, size); +} + /// \brief Decide whether a given storage location could be an input parameter /// /// If the input is locked, check if the location matches one of the current parameters. @@ -3336,6 +3513,41 @@ bool FuncProto::unjustifiedInputParam(const Address &addr,int4 size,VarnodeData return model->unjustifiedInputParam(addr,size,res); } +/// \brief Pass-back the biggest input parameter contained within the given range +/// +/// \param loc is the starting address of the given range +/// \param size is the number of bytes in the range +/// \param res will hold the parameter storage description being passed back +/// \return \b true if there is at least one parameter contained in the range +bool FuncProto::getBiggestContainedInputParam(const Address &loc,int4 size,VarnodeData &res) const + +{ + if (!isDotdotdot()) { // If the proto is varargs, go straight to the model + if ((flags&voidinputlock)!=0) return false; + int4 num = numParams(); + if (num > 0) { + bool locktest = false; // Have tested against locked symbol + res.size = 0; + for(int4 i=0;iisTypeLocked()) continue; + locktest = true; + Address iaddr = param->getAddress(); + if (iaddr.containedBy(param->getSize(), loc, size)) { + if (param->getSize() > res.size) { + res.space = iaddr.getSpace(); + res.offset = iaddr.getOffset(); + res.size = param->getSize(); + } + } + } + if (locktest) + return (res.size == 0); + } + } + return model->getBiggestContainedInputParam(loc,size,res); +} + /// \brief Decide if \b this can be safely restricted to match another prototype /// /// Do \b this and another given function prototype share enough of diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh index 7afcda59c5..1322980b56 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.hh @@ -20,6 +20,7 @@ #define __CPUI_FSPEC__ #include "op.hh" +#include "rangemap.hh" class JoinRecord; @@ -85,6 +86,7 @@ public: bool isExclusion(void) const { return (alignment==0); } ///< Return \b true if this holds a single parameter exclusively bool isReverseStack(void) const { return ((flags & reverse_stack)!=0); } ///< Return \b true if parameters are allocated in reverse order bool contains(const ParamEntry &op2) const; ///< Does \b this contain the indicated entry. + bool containedBy(const Address &addr,int4 sz) const; ///< Is this entry contained by the given range int4 justifiedContain(const Address &addr,int4 sz) const; ///< Calculate endian aware containment bool getContainer(const Address &addr,int4 sz,VarnodeData &res) const; OpCode assumedExtension(const Address &addr,int4 sz,VarnodeData &res) const; @@ -93,11 +95,51 @@ public: uintb getBase(void) const { return addressbase; } ///< Get the starting offset of \b this entry Address getAddrBySlot(int4 &slot,int4 sz) const; void restoreXml(const Element *el,const AddrSpaceManager *manage,bool normalstack); - void extraChecks(vector &entry); + void extraChecks(list &entry); bool isParamCheckHigh(void) const { return ((flags & extracheck_high)!=0); } ///< Return \b true if there is a high overlap bool isParamCheckLow(void) const { return ((flags & extracheck_low)!=0); } ///< Return \b true if there is a low overlap }; +/// \brief Class for storing ParamEntry objects in an interval range (rangemap) +class ParamEntryRange { + uintb first; ///< Starting offset of the ParamEntry's range + uintb last; ///< Ending offset of the ParamEntry's range + int4 position; ///< Position of the ParamEntry within the entire prototype list + ParamEntry *entry; ///< Pointer to the actual ParamEntry + + /// \brief Helper class for initializing ParamEntryRange in a range map + class InitData { + friend class ParamEntryRange; + int4 position; ///< Position (within the full list) being assigned to the ParamEntryRange + ParamEntry *entry; ///< Underlying ParamEntry being assigned to the ParamEntryRange + public: + InitData(int4 pos,ParamEntry *e) { position = pos; entry = e; } ///< Constructor + }; + + /// \brief Helper class for subsorting on position + class SubsortPosition { + int4 position; ///< The position value + public: + SubsortPosition(void) {} ///< Constructor for use with rangemap + SubsortPosition(int4 pos) { position = pos; } ///< Construct given position + SubsortPosition(bool val) { position = val ? 1000000 : 0; } ///< Constructor minimal/maximal subsort + bool operator<(const SubsortPosition &op2) { return position < op2.position; } ///< Compare operation + }; +public: + typedef uintb linetype; ///< The linear element for a rangemap + typedef SubsortPosition subsorttype; ///< The sub-sort object for a rangemap + typedef InitData inittype; ///< Initialization data for a ScopeMapper + + ParamEntryRange(void) {} ///< Constructor for use with rangemap + void initialize(const inittype &data,uintb f,uintb l) { + first = f; last = l; position = data.position; entry = data.entry; } ///< Initialize the range + uintb getFirst(void) const { return first; } ///< Get the first address in the range + uintb getLast(void) const { return last; } ///< Get the last address in the range + subsorttype getSubsort(void) const { return SubsortPosition(position); } ///< Get the sub-subsort object + ParamEntry *getParamEntry(void) const { return entry; } ///< Get pointer to actual ParamEntry +}; +typedef rangemap ParamEntryResolver; ///< A map from offset to ParamEntry + /// \brief A register or memory register that may be used to pass a parameter or return value /// /// The parameter recovery utilities (see ParamActive) use this to denote a putative @@ -344,6 +386,18 @@ public: /// \return \b true if the storage location can be split virtual bool checkSplit(const Address &loc,int4 size,int4 splitpoint) const=0; + /// \brief Characterize whether the given range overlaps parameter storage + /// + /// Does the range naturally fit inside a potential parameter entry from this list or does + /// it contain a parameter entry. Return one of three values indicating this characterization: + /// - 0 means there is no intersection between the range and any parameter in this list + /// - 1 means that at least one parameter contains the range in a properly justified manner + /// - 2 means no parameter contains the range, but the range contains at least one ParamEntry + /// \param loc is the starting address of the given range + /// \param size is the number of bytes in the given range + /// \return the characterization code + virtual int4 characterizeAsParam(const Address &loc,int4 size) const=0; + /// \brief Does the given storage location make sense as a parameter /// /// Within \b this model, decide if the storage location can be considered a parameter. @@ -363,6 +417,14 @@ public: /// \return \b true if the location can be a parameter virtual bool possibleParamWithSlot(const Address &loc,int4 size,int4 &slot,int4 &slotsize) const=0; + /// \brief Pass-back the biggest parameter contained within the given range + /// + /// \param loc is the starting address of the given range + /// \param size is the number of bytes in the range + /// \param res will hold the parameter storage description being passed back + /// \return \b true if there is at least one parameter contained in the range + virtual bool getBiggestContainedParam(const Address &loc,int4 size,VarnodeData &res) const=0; + /// \brief Check if the given storage location looks like an \e unjustified parameter /// /// The storage for a value may be contained in a normal parameter location but be @@ -433,9 +495,10 @@ protected: int4 pointermax; ///< If non-zero, maximum size of a data-type before converting to a pointer bool thisbeforeret; ///< Does a \b this parameter come before a hidden return parameter int4 nonfloatgroup; ///< Group of first entry which is not marked float - vector entry; ///< The ordered list of parameter entries + list entry; ///< The ordered list of parameter entries + vector resolverMap; ///< Map from space id to resolver AddrSpace *spacebase; ///< Address space containing relative offset parameters - int4 findEntry(const Address &loc,int4 size) const; ///< Given storage location find matching ParamEntry + const ParamEntry *findEntry(const Address &loc,int4 size) const; ///< Given storage location find matching ParamEntry Address assignAddress(const Datatype *tp,vector &status) const; ///< Assign storage for given parameter data-type void buildTrialMap(ParamActive *active) const; ///< Build map from parameter trials to model ParamEntrys void separateFloat(ParamActive *active,int4 &floatstart,int4 &floatstop,int4 &start,int4 &stop) const; @@ -443,18 +506,22 @@ protected: void forceNoUse(ParamActive *active,int4 start,int4 stop) const; void forceInactiveChain(ParamActive *active,int4 maxchain,int4 start,int4 stop) const; void calcDelay(void); ///< Calculate the maximum heritage delay for any potential parameter in this list + void populateResolver(void); ///< Build the ParamEntry resolver maps public: ParamListStandard(void) {} ///< Construct for use with restoreXml() ParamListStandard(const ParamListStandard &op2); ///< Copy constructor - const vector &getEntry(void) const { return entry; } ///< Get the list of parameter entries + virtual ~ParamListStandard(void); + const list &getEntry(void) const { return entry; } ///< Get the list of parameter entries virtual uint4 getType(void) const { return p_standard; } virtual void assignMap(const vector &proto,bool isinput, TypeFactory &typefactory,vector &res) const; virtual void fillinMap(ParamActive *active) const; virtual bool checkJoin(const Address &hiaddr,int4 hisize,const Address &loaddr,int4 losize) const; virtual bool checkSplit(const Address &loc,int4 size,int4 splitpoint) const; + virtual int4 characterizeAsParam(const Address &loc,int4 size) const; virtual bool possibleParam(const Address &loc,int4 size) const; virtual bool possibleParamWithSlot(const Address &loc,int4 size,int4 &slot,int4 &slotsize) const; + virtual bool getBiggestContainedParam(const Address &loc,int4 size,VarnodeData &res) const; virtual bool unjustifiedContainer(const Address &loc,int4 size,VarnodeData &res) const; virtual OpCode assumedExtension(const Address &addr,int4 size,VarnodeData &res) const; virtual AddrSpace *getSpacebase(void) const { return spacebase; } @@ -513,6 +580,7 @@ public: ParamListMerged(void) : ParamListStandard() {} ///< Constructor for use with restoreXml ParamListMerged(const ParamListMerged &op2) : ParamListStandard(op2) {} ///< Copy constructor void foldIn(const ParamListStandard &op2); ///< Add another model to the union + void finalize(void) { populateResolver(); } ///< Fold-ins are finished, finalize \b this virtual uint4 getType(void) const { return p_merged; } virtual void assignMap(const vector &proto,bool isinput, TypeFactory &typefactory,vector &res) const { @@ -636,6 +704,20 @@ public: int4 numLikelyTrash(void) const { return likelytrash.size(); } ///< Get the number of \e likelytrash locations const VarnodeData &getLikelyTrash(int4 i) const { return likelytrash[i]; } ///< Get the i-th \e likelytrashh location + /// \brief Characterize whether the given range overlaps parameter storage + /// + /// Does the range naturally fit inside a potential parameter entry from this model or does + /// it contain a parameter entry. Return one of three values indicating this characterization: + /// - 0 means there is no intersection between the range and any ParamEntry + /// - 1 means that at least one ParamEntry contains the range in a properly justified manner + /// - 2 means no ParamEntry contains the range, but the range contains at least one ParamEntry + /// \param loc is the starting address of the given range + /// \param size is the number of bytes in the given range + /// \return the characterization code + int4 characterizeAsInputParam(const Address &loc,int4 size) const { + return input->characterizeAsParam(loc, size); + } + /// \brief Does the given storage location make sense as an input parameter /// /// Within \b this model, decide if the storage location can be considered an input parameter. @@ -716,6 +798,16 @@ public: OpCode assumedOutputExtension(const Address &addr,int4 size,VarnodeData &res) const { return output->assumedExtension(addr,size,res); } + /// \brief Pass-back the biggest input parameter contained within the given range + /// + /// \param loc is the starting address of the given range + /// \param size is the number of bytes in the range + /// \param res will hold the parameter storage description being passed back + /// \return \b true if there is at least one parameter contained in the range + bool getBiggestContainedInputParam(const Address &loc,int4 size,VarnodeData &res) const { + return input->getBiggestContainedParam(loc, size, res); + } + AddrSpace *getSpacebase(void) const { return input->getSpacebase(); } ///< Get the stack space associated with \b this model bool isStackGrowsNegative(void) const { return stackgrowsnegative; } ///< Return \b true if the stack \e grows toward smaller addresses bool hasThisPointer(void) const { return hasThis; } ///< Is \b this a model for (non-static) class methods @@ -1245,6 +1337,7 @@ public: vector::const_iterator effectEnd(void) const; ///< Get iterator to end of EffectRecord list int4 numLikelyTrash(void) const; ///< Get the number of \e likely-trash locations const VarnodeData &getLikelyTrash(int4 i) const; ///< Get the i-th \e likely-trash location + int4 characterizeAsInputParam(const Address &addr,int4 size) const; bool possibleInputParam(const Address &addr,int4 size) const; bool possibleOutputParam(const Address &addr,int4 size) const; @@ -1292,6 +1385,9 @@ public: OpCode assumedOutputExtension(const Address &addr,int4 size,VarnodeData &res) const { return model->assumedOutputExtension(addr,size,res); } + /// \brief Pass-back the biggest potential input parameter contained within the given range + bool getBiggestContainedInputParam(const Address &loc,int4 size,VarnodeData &res) const; + bool isCompatible(const FuncProto &op2) const; AddrSpace *getSpacebase(void) const { return model->getSpacebase(); } ///< Get the \e stack address space void printRaw(const string &funcname,ostream &s) const; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc index 7486aa5139..17b4a0322d 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_op.cc @@ -972,8 +972,8 @@ void Funcdata::overrideFlow(const Address &addr,uint4 type) } /// Do in-place replacement of -/// - `#c <= x` with `#c-1 < x` OR -/// - `x <= #c` with `x < #c+1` +/// - `c <= x` with `c-1 < x` OR +/// - `x <= c` with `x < c+1` /// /// \param data is the function being analyzed /// \param op is comparison PcodeOp diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index 4ccf2648d1..bce59397ab 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -924,7 +924,7 @@ bool Funcdata::updateFlags(VarnodeLocSet::const_iterator &iter,uint4 flags,Datat /// The Symbol is really attached to the Varnode's HighVariable (which must exist). /// The only reason a Symbol doesn't get set is if, the HighVariable /// is global and there is no pre-existing Symbol. (see mapGlobals()) -/// \param is the given Varnode +/// \param vn is the given Varnode /// \return the associated Symbol or NULL Symbol *Funcdata::linkSymbol(Varnode *vn) @@ -1440,11 +1440,29 @@ int4 AncestorRealistic::enterNode(State &state) stateStack.push_back(State(op,0)); return enter_node; // Enter the new node case CPUI_SUBPIECE: + // Extracting to a temporary, or to the same storage location, or otherwise incidental + // are viewed as just another node on the path to traverse + if (op->getOut()->getSpace()->getType()==IPTR_INTERNAL||op->getIn(0)->isIncidentalCopy() + || (op->getOut()->overlap(*op->getIn(0)) == (int4)op->getIn(1)->getOffset())) { + stateStack.push_back(State(op,0)); + return enter_node; // Push into the new node + } + // For other SUBPIECES, do a minimal traversal to rule out unaffected or other invalid inputs, + // but otherwise treat it as valid, active, movement into the parameter + do { + Varnode *vn = op->getIn(0); + if ((!vn->isMark())&&(vn->isInput())) { + if (vn->isUnaffected()||(!vn->isDirectWrite())) + return pop_fail; + } + op = vn->getDef(); + } while((op!=(PcodeOp *)0)&&((op->code() == CPUI_COPY)||(op->code()==CPUI_SUBPIECE))); + return pop_solid; // treat the COPY as a solid movement case CPUI_COPY: // Copies to a temporary, or between varnodes with same storage location, or otherwise incidental // are viewed as just another node on the path to traverse - if ((op->getOut()->getSpace()->getType()==IPTR_INTERNAL) - ||(op->getOut()->getAddr() == op->getIn(0)->getAddr())||(op->getIn(0)->isIncidentalCopy())) { + if (op->getOut()->getSpace()->getType()==IPTR_INTERNAL||op->getIn(0)->isIncidentalCopy() + || (op->getOut()->getAddr() == op->getIn(0)->getAddr())) { stateStack.push_back(State(op,0)); return enter_node; // Push into the new node } @@ -1523,7 +1541,7 @@ int4 AncestorRealistic::uponPop(State &state,int4 pop_command) /// \param op is the CALL or RETURN to test parameter passing for /// \param slot is the index of the particular input varnode to test /// \param t is the ParamTrial object corresponding to the varnode -/// \param allowFailingPath is true if we allow and test for failing paths due to conditional execution +/// \param allowFail is \b true if we allow and test for failing paths due to conditional execution /// \return \b true if the varnode has realistic ancestors for a parameter passing location bool AncestorRealistic::execute(PcodeOp *op,int4 slot,ParamTrial *t,bool allowFail) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc index 0f07124895..c8e20762b2 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.cc @@ -672,6 +672,13 @@ bool LoadGuard::isGuarded(const Address &addr) const return true; } +/// \brief Make final determination of what range new LoadGuards are protecting +/// +/// Actual LOAD operations are guarded with an initial version of the LoadGuard record. +/// Now that heritage has completed, a full analysis of each LOAD is conducted, using +/// value set analysis, to reach a conclusion about what range of stack values the +/// LOAD might actually alias. All new LoadGuard records are updated with the analysis, +/// which then informs handling of LOAD COPYs and possible later heritage passes. void Heritage::analyzeNewLoadGuards(void) { @@ -809,7 +816,7 @@ bool Heritage::protectFreeStores(AddrSpace *spc,vector &freeStores) /// \brief Trace input stack-pointer to any indexed loads /// -/// Look for expressions of the form val = *(SP(i) + vn + #c), where the base stack +/// Look for expressions of the form val = *(SP(i) + vn + \#c), where the base stack /// pointer has an (optional) constant added to it and a non-constant index, then a /// value is loaded from the resulting address. The LOAD operations are added to the list /// of ops that potentially need to be guarded during a heritage pass. The routine can @@ -1029,6 +1036,39 @@ void Heritage::guard(const Address &addr,int4 size,vector &read,vecto } } +/// \brief Guard an address range that is larger than any single parameter +/// +/// In this situation, an address range is being heritaged, but only a piece of +/// it can be a parameter for a given call. We have to construct a SUBPIECE that +/// pulls out the potential parameter. +/// \param fc is the call site potentially taking a parameter +/// \param addr is the starting address of the range +/// \param size is the size of the range in bytes +void Heritage::guardCallOverlappingInput(FuncCallSpecs *fc,const Address &addr,int4 size) + +{ + VarnodeData vData; + + if (fc->getBiggestContainedInputParam(addr, size, vData)) { + ParamActive *active = fc->getActiveInput(); + Address taddr(vData.space,vData.offset); + if (active->whichTrial(taddr, size) < 0) { // If not already a trial + int4 truncateAmount = addr.justifiedContain(size, taddr, vData.size, false); + PcodeOp *op = fc->getOp(); + PcodeOp *subpieceOp = fd->newOp(2,op->getAddr()); + fd->opSetOpcode(subpieceOp, CPUI_SUBPIECE); + Varnode *wholeVn = fd->newVarnode(size,addr); + wholeVn->setActiveHeritage(); + fd->opSetInput(subpieceOp,wholeVn,0); + fd->opSetInput(subpieceOp,fd->newConstant(4,truncateAmount),1); + Varnode *vn = fd->newVarnodeOut(vData.size, taddr, subpieceOp); + fd->opInsertBefore(subpieceOp,op); + active->registerTrial(taddr, vData.size); + fd->opInsertInput(op, vn, op->numInput()); + } + } +} + /// \brief Guard CALL/CALLIND ops in preparation for renaming algorithm /// /// For the given address range, we decide what the data-flow effect is @@ -1082,15 +1122,20 @@ void Heritage::guardCalls(uint4 flags,const Address &addr,int4 size,vectorpossibleInputParam(taddr,size)) { - ParamActive *active = fc->getActiveInput(); - if (active->whichTrial(taddr,size)<0) { // If not already a trial - PcodeOp *op = fc->getOp(); - active->registerTrial(taddr,size); - Varnode *vn = fd->newVarnode(size,addr); - vn->setActiveHeritage(); - fd->opInsertInput(op,vn,op->numInput()); + if (tryregister) { + int4 inputCharacter = fc->characterizeAsInputParam(taddr,size); + if (inputCharacter == 1) { // Call could be using this range as an input parameter + ParamActive *active = fc->getActiveInput(); + if (active->whichTrial(taddr,size)<0) { // If not already a trial + PcodeOp *op = fc->getOp(); + active->registerTrial(taddr,size); + Varnode *vn = fd->newVarnode(size,addr); + vn->setActiveHeritage(); + fd->opInsertInput(op,vn,op->numInput()); + } } + else if (inputCharacter == 2) // Call may be using part of this range as an input parameter + guardCallOverlappingInput(fc, addr, size); } } // We do not guard the call if the effect is "unaffected" or "reload" diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh index 3bb9fcd35e..1aa7391bc1 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/heritage.hh @@ -73,6 +73,7 @@ public: }; class Funcdata; +class FuncCallSpecs; /// \brief Information about heritage passes performed for a specific address space /// @@ -183,6 +184,11 @@ class Heritage { uintb offset; ///< Offset relative to base uint4 traversals; ///< What kind of operations has this pointer accumulated list::const_iterator iter; ///< Next PcodeOp to follow + + /// \brief Constructor + /// \param v is the Varnode being visited + /// \param o is the current offset from the base pointer + /// \param trav indicates what configurations were seen along the path to this Varnode StackNode(Varnode *v,uintb o,uint4 trav) { vn = v; offset = o; @@ -239,6 +245,7 @@ class Heritage { void reprocessFreeStores(AddrSpace *spc,vector &freeStores); void guard(const Address &addr,int4 size,vector &read,vector &write,vector &inputvars); void guardInput(const Address &addr,int4 size,vector &input); + void guardCallOverlappingInput(FuncCallSpecs *fc,const Address &addr,int4 size); void guardCalls(uint4 flags,const Address &addr,int4 size,vector &write); void guardStores(const Address &addr,int4 size,vector &write); void guardLoads(uint4 flags,const Address &addr,int4 size,vector &write); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/rangemap.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/rangemap.hh index 79fc2bfd50..f1b2679f0b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/rangemap.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/rangemap.hh @@ -14,28 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -// A container for records occupying (possibly overlapping) -// intervals. I.e. a map from a linear ordered domain to -// (multiple) records. -// recordtype is the type of a record -// must support -// constructor(first,last) -// getFirst() beginning of range -// getLast() end of range (inclusive) -// getSubsort() -// initialize() initialization with inittype object -// must define types -// linetype -// subsorttype -// inittype -// linetype is the type of elements in the linear domain -// must support <,<=,==,!=, +(integer) -(integer) -// subsorttype - overlapping intervals can be subsorted -// must suport < -// null or false initialization produces minimal value -// true initialization produces maximal value -// copy constructor -// inittype is extra initialization data for the recordtype +/// \file rangemap.hh +/// \brief Templates to define interval map containers #ifndef __RANGEMAP__ #define __RANGEMAP__ @@ -43,102 +23,160 @@ #include #include +/// \brief An interval map container +/// +/// A container for records occupying (possibly overlapping) +/// intervals. I.e. a map from a linear ordered domain to +/// (multiple) records. +/// The \b recordtype is the main object in the container, it must support: +/// - recordtype() a constructor taking no parameters +/// - getFirst() beginning of range +/// - getLast() end of range (inclusive) +/// - getSubsort() retrieve the subsorttype object (see below) +/// - initialize(inittype,linetype,linetype) an initializer routine +/// +/// The \b recordtype must define data-types: +/// - linetype +/// - subsorttype +/// - inittype +/// +/// \b linetype is the data-type of elements in the linear domain. It +/// must support: +/// - <,<= Comparisons +/// - ==,!= Equality +/// - + \ Addition of integers +/// - - \ Subtraction of integers +/// +/// \b subsorttype describes how overlapping intervals can be sub-sorted. It +/// must support: +/// - < +/// - subsorttype(\b false) constructor with \b false produces a minimal value +/// - subsorttype(\b true) constructor with \b true produces a maximal value +/// - copy constructor +/// +/// \b inittype is extra initialization data for the \b recordtype +/// +/// The main interval map is implemented as a \e multiset of disjoint sub-ranges mapping +/// to the \b recordtype objects. After deduping the sub-ranges form the common refinement +/// of all the possibly overlapping \b recordtype ranges. A sub-range is duplicated for each +/// distinct \b recordtype that overlaps that sub-range. The sub-range multiset is updated +/// with every insertion or deletion of \b recordtype objects into the container, which +/// may insert new or delete existing boundary points separating the disjoint subranges. template class rangemap { - // A class for describing a disjoint partition public: - typedef typename _recordtype::linetype linetype; - typedef typename _recordtype::subsorttype subsorttype; - typedef typename _recordtype::inittype inittype; + typedef typename _recordtype::linetype linetype; ///< Integer data-type defining the linear domain + typedef typename _recordtype::subsorttype subsorttype; ///< The data-type used for subsorting + typedef typename _recordtype::inittype inittype; ///< The data-type containing initialization data for records private: + /// \brief The internal \e sub-range object for the interval map + /// + /// It defines a disjoint range within the common refinement of all ranges + /// in the container. It also knows about its containing range and \b recordtype. class AddrRange { friend class rangemap<_recordtype>; friend class PartIterator; - mutable linetype first; // Part of range contained in partition - linetype last; - mutable linetype a,b; // Range occupied by the entire record - mutable subsorttype subsort; - AddrRange(linetype l) : subsort(false) { last = l; } - AddrRange(linetype l,const subsorttype &s) : subsort(s) { last = l; } + mutable linetype first; ///< Start of the disjoint sub-range + linetype last; ///< End of the disjoint sub-range + mutable linetype a; ///< Start of full range occupied by the entire \b recordtype + mutable linetype b; ///< End of full range occupied by the entire \b recordtype + mutable subsorttype subsort; ///< How \b this should be sub-sorted + mutable typename std::list<_recordtype>::iterator value; ///< Iterator pointing at the actual \b recordtype + AddrRange(linetype l) : subsort(false) { last = l; } ///< (Partial) constructor + AddrRange(linetype l,const subsorttype &s) : subsort(s) { last = l; } ///< (Partial) constructor given a subsort public: - mutable typename std::list<_recordtype>::iterator value; bool operator<(const AddrRange &op2) const { if (last != op2.last) return (last < op2.last); return (subsort < op2.subsort); - } - typename std::list<_recordtype>::iterator getValue(void) const { return value; } + } ///< Comparison method based on ending boundary point + typename std::list<_recordtype>::iterator getValue(void) const { return value; } ///< Retrieve the \b recordtype }; public: - class PartIterator { // Iterator over partitions - typename std::multiset::const_iterator iter; + /// \brief An iterator into the interval map container + /// + /// This is really an iterator to the underlying multiset, but dereferencing it returns the + /// \b recordtype. Iteration occurs over the disjoint sub-ranges, thus the same \b recordtype + /// may be visited multiple times by the iterator, depending on how much it overlaps other + /// \b recordtypes. The sub-ranges are sorted in linear order, then depending on the \b subsorttype. + class PartIterator { + typename std::multiset::const_iterator iter; ///< The underlying multiset iterator public: - PartIterator(void) {} - PartIterator(typename std::multiset::const_iterator i) { iter=i; } - _recordtype &operator*(void) { return *(*iter).value; } - PartIterator &operator++(void) { ++iter; return *this; } + PartIterator(void) {} ///< Constructor + PartIterator(typename std::multiset::const_iterator i) { iter=i; } ///< Construct given iterator + _recordtype &operator*(void) { return *(*iter).value; } ///< Dereference to the \b recordtype object + PartIterator &operator++(void) { ++iter; return *this; } ///< Pre-increment the iterator PartIterator operator++(int i) { - PartIterator orig(iter); ++iter; return orig; } - PartIterator &operator--(void) { --iter; return *this; } + PartIterator orig(iter); ++iter; return orig; } ///< Post-increment the iterator + PartIterator &operator--(void) { --iter; return *this; } ///< Pre-decrement the iterator PartIterator operator--(int i) { - PartIterator orig(iter); --iter; return orig; } + PartIterator orig(iter); --iter; return orig; } ///< Post-decrement the iterator PartIterator &operator=(const PartIterator &op2) { iter = op2.iter; return *this; - } + } ///< Assign to the iterator bool operator==(const PartIterator &op2) const { return (iter==op2.iter); - } + } ///< Test equality of iterators bool operator!=(const PartIterator &op2) const { return (iter!=op2.iter); - } - typename std::list<_recordtype>::iterator getValueIter(void) const { return (*iter).getValue(); } + } ///< Test inequality of iterators + typename std::list<_recordtype>::iterator getValueIter(void) const { + return (*iter).getValue(); } ///< Get the \b recordtype iterator }; - typedef PartIterator const_iterator; + typedef PartIterator const_iterator; ///< The main sub-range iterator data-type private: - std::multiset tree; - std::list<_recordtype> record; + std::multiset tree; ///< The underlying multiset of sub-ranges + std::list<_recordtype> record; ///< Storage for the actual record objects - void zip(linetype i,typename std::multiset::iterator iter); - void unzip(linetype i,typename std::multiset::iterator iter); + void zip(linetype i,typename std::multiset::iterator iter); ///< Remove the given partition boundary + void unzip(linetype i,typename std::multiset::iterator iter); ///< Insert the given partition boundary public: - bool empty(void) const { return record.empty(); } - void clear(void) { tree.clear(); record.clear(); } - typename std::list<_recordtype>::const_iterator begin_list(void) const { return record.begin(); } - typename std::list<_recordtype>::const_iterator end_list(void) const { return record.end(); } - typename std::list<_recordtype>::iterator begin_list(void) { return record.begin(); } - typename std::list<_recordtype>::iterator end_list(void) { return record.end(); } + bool empty(void) const { return record.empty(); } ///< Return \b true if the container is empty + void clear(void) { tree.clear(); record.clear(); } ///< Clear all records from the container + typename std::list<_recordtype>::const_iterator begin_list(void) const { return record.begin(); } ///< Beginning of records + typename std::list<_recordtype>::const_iterator end_list(void) const { return record.end(); } ///< End of records + typename std::list<_recordtype>::iterator begin_list(void) { return record.begin(); } ///< Beginning of records + typename std::list<_recordtype>::iterator end_list(void) { return record.end(); } ///< End of records - const_iterator begin(void) const { return PartIterator(tree.begin()); } - const_iterator end(void) const { return PartIterator(tree.end()); } + const_iterator begin(void) const { return PartIterator(tree.begin()); } ///< Beginning of sub-ranges + const_iterator end(void) const { return PartIterator(tree.end()); } ///< Ending of sub-ranges - // Find range of intervals intersecting a + /// \brief Find sub-ranges intersecting the given boundary point std::pair find(linetype a) const; - // Find range of intervals intersecting a, with subsort - // between (subsort1,subsort2) + /// \brief Find sub-ranges intersecting given boundary point, and between given \e subsorts std::pair find(linetype a,const subsorttype &subsort1,const subsorttype &subsort2) const; - // Find first interval after point, that does not intersect it - const_iterator find_firstafter(linetype point) const; + /// \brief Find beginning of sub-ranges that contain the given boundary point + const_iterator find_begin(linetype point) const; - // Find last interval after point, that does not intersect it - const_iterator find_lastbefore(linetype point) const; + /// \brief Find ending of sub-ranges that contain the given boundary point + const_iterator find_end(linetype point) const; - // Find first interval overlapping given interval + /// \brief Find first record overlapping given interval const_iterator find_overlap(linetype point,linetype end) const; + /// \brief Insert a new record into the container typename std::list<_recordtype>::iterator insert(const inittype &data,linetype a,linetype b); + + /// \brief Erase a given record from the container void erase(typename std::list<_recordtype>::iterator v); + + /// \brief Erase a record given an iterator void erase(const_iterator iter) { erase( iter.getValueIter() ); } }; +/// All sub-ranges that end with the given boundary point are deleted, and all sub-ranges +/// that begin with the given boundary point (+1) are extended to cover the deleted sub-range. +/// This should run in O(k). +/// \param i is the given boundary point +/// \param iter points to the first sub-range that ends with the given boundary point template void rangemap<_recordtype>::zip(linetype i,typename std::multiset::iterator iter) -{ // Remove the partition boundary occurring right after i - // This should run in O(k) +{ linetype f = (*iter).first; while((*iter).last == i) tree.erase(iter++); @@ -149,13 +187,15 @@ void rangemap<_recordtype>::zip(linetype i,typename std::multiset::it } } +/// All sub-ranges that contain the boundary point will be split into a sub-range +/// that ends at the boundary point and a sub-range that begins with the boundary point (+1). +/// This should run in O(k), where k is the number of intervals intersecting the boundary point. +/// \param i is the given boundary point +/// \param iter points to the first sub-range containing the boundary point template void rangemap<_recordtype>::unzip(linetype i,typename std::multiset::iterator iter) -{ // Create a new partition boundary right after i - // This should run in O(k), where k is the number - // of intervals intersecting the point i - // iter should be the first interval containing i +{ typename std::multiset::iterator hint = iter; if ((*iter).last == i) return; // Can't split size 1 (i.e. split already present) linetype f; @@ -174,11 +214,15 @@ void rangemap<_recordtype>::unzip(linetype i,typename std::multiset:: } } +/// \param data is other initialization data for the new record +/// \param a is the start of the range occupied by the new record +/// \param b is the (inclusive) end of the range +/// \return an iterator to the new record template typename std::list<_recordtype>::iterator rangemap<_recordtype>::insert(const inittype &data,linetype a,linetype b) -{ // Insert a new record into the container at inclusive range [a,b] +{ linetype f=a; typename std::list<_recordtype>::iterator liter; typename std::multiset::iterator low = tree.lower_bound(AddrRange(f)); @@ -188,8 +232,8 @@ rangemap<_recordtype>::insert(const inittype &data,linetype a,linetype b) unzip(f-1,low); // If so do the refinement } - record.push_front( _recordtype(a,b) ); - record.front().initialize( data ); + record.push_front( _recordtype() ); + record.front().initialize( data, a, b ); liter = record.begin(); AddrRange addrrange(b,(*liter).getSubsort()); @@ -233,6 +277,7 @@ rangemap<_recordtype>::insert(const inittype &data,linetype a,linetype b) return liter; } +/// \param v is the iterator to the record to be erased template void rangemap<_recordtype>::erase(typename std::list<_recordtype>::iterator v) @@ -281,11 +326,13 @@ void rangemap<_recordtype>::erase(typename std::list<_recordtype>::iterator v) record.erase(v); } +/// \param point is the given boundary point +/// \return begin/end iterators over all intersecting sub-ranges template std::pair::const_iterator,typename rangemap<_recordtype>::const_iterator> rangemap<_recordtype>::find(linetype point) const -{ // Get range of intervals which intersect point +{ AddrRange addrrange(point); typename std::multiset::const_iterator iter1,iter2; @@ -300,6 +347,10 @@ rangemap<_recordtype>::find(linetype point) const return std::pair(PartIterator(iter1),PartIterator(iter2)); } +/// \param point is the given boundary point +/// \param sub1 is the starting subsort +/// \param sub2 is the ending subsort +/// \return begin/end iterators over all intersecting and bounded sub-ranges template std::pair::const_iterator,typename rangemap<_recordtype>::const_iterator> rangemap<_recordtype>::find(linetype point,const subsorttype &sub1,const subsorttype &sub2) const @@ -318,39 +369,44 @@ rangemap<_recordtype>::find(linetype point,const subsorttype &sub1,const subsort return std::pair(PartIterator(iter1),PartIterator(iter2)); } +/// \param point is the given boundary point +/// \return iterator to first sub-range of intersects the boundary point template typename rangemap<_recordtype>::const_iterator -rangemap<_recordtype>::find_lastbefore(linetype point) const +rangemap<_recordtype>::find_begin(linetype point) const { AddrRange addrrange(point); typename std::multiset::const_iterator iter; - - // First interval with last >= point + iter = tree.lower_bound(addrrange); - if (iter==tree.begin()) - return tree.end(); - --iter; return iter; } +/// \param point is the given boundary point +/// \return iterator to first sub-range after that does not intersect the boundary point template typename rangemap<_recordtype>::const_iterator -rangemap<_recordtype>::find_firstafter(linetype point) const +rangemap<_recordtype>::find_end(linetype point) const { - AddrRange addrrange(point,subsorttype(true)); + AddrRange addrend(point,subsorttype(true)); typename std::multiset::const_iterator iter; - iter = tree.upper_bound(addrrange); - while(iter != tree.end()) { - if (point < (*iter).a) - return iter; - ++iter; - } - return tree.end(); + iter = tree.upper_bound(addrend); + if ((iter==tree.end())||(point < (*iter).first)) + return iter; + + // If we reach here, (*iter).last is bigger than point (as per upper_bound) but + // point >= than (*iter).first, i.e. point is contained in the sub-range. + // So we have to do one more search for first sub-range after the containing sub-range. + AddrRange addrbeyond((*iter).last,subsorttype(true)); + return tree.upper_bound(addrbeyond); } +/// \param point is the start of interval to test +/// \param end is the end of the interval to test +/// \return iterator to first sub-range of an intersecting record (or \b end) template typename rangemap<_recordtype>::const_iterator rangemap<_recordtype>::find_overlap(linetype point,linetype end) const @@ -362,7 +418,7 @@ rangemap<_recordtype>::find_overlap(linetype point,linetype end) const // First range where right boundary is equal to or past point iter = tree.lower_bound(addrrange); if (iter==tree.end()) return iter; - if (((*iter).first <= point)||((*iter).first<=end)) + if ((*iter).first<=end) return iter; return tree.end(); } diff --git a/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec b/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec index 2191210a34..98f7cd44a0 100644 --- a/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec +++ b/Ghidra/Processors/x86/data/languages/x86-64-gcc.cspec @@ -31,29 +31,29 @@ - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + @@ -77,9 +77,9 @@ - - - + + + @@ -89,7 +89,9 @@ + +