From 5e75fb19e79e4996e252bf1872ffd3ad93377784 Mon Sep 17 00:00:00 2001 From: caheckman <48068198+caheckman@users.noreply.github.com> Date: Wed, 2 Oct 2024 20:31:29 +0000 Subject: [PATCH] GP-4985 Check for overlapping structure fields and issue warnings --- .../Decompiler/src/decompile/cpp/funcdata.cc | 15 +- .../Decompiler/src/decompile/cpp/funcdata.hh | 11 +- .../Decompiler/src/decompile/cpp/grammar.cc | 24 +- .../Decompiler/src/decompile/cpp/grammar.y | 24 +- .../Decompiler/src/decompile/cpp/type.cc | 389 ++++++++++-------- .../Decompiler/src/decompile/cpp/type.hh | 50 ++- .../model/pcode/PcodeDataTypeManager.java | 8 +- 7 files changed, 306 insertions(+), 215 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc index 21abe0dd85..491bd3815e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -172,6 +172,8 @@ void Funcdata::stopProcessing(void) { flags |= processing_complete; obank.destroyDead(); // Free up anything in the dead list + if (!isJumptableRecoveryOn()) + issueDatatypeWarnings(); #ifdef CPUI_STATISTICS glb->stats->process(*this); #endif @@ -470,6 +472,15 @@ void Funcdata::clearCallSpecs(void) qlst.clear(); // Delete list of pointers } +void Funcdata::issueDatatypeWarnings(void) + +{ + list::const_iterator iter; + for(iter=glb->types->beginWarnings();iter!=glb->types->endWarnings();++iter) { + warningHeader((*iter).getWarning()); + } +} + FuncCallSpecs *Funcdata::getCallSpecs(const PcodeOp *op) const { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index c1cb07ab00..4ff51cf49a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -122,16 +122,17 @@ class Funcdata { JumpTable::RecoveryMode stageJumpTable(Funcdata &partial,JumpTable *jt,PcodeOp *op,FlowInfo *flow); void switchOverJumpTables(const FlowInfo &flow); ///< Convert jump-table addresses to basic block indices void clearJumpTables(void); ///< Clear any jump-table information - - void sortCallSpecs(void); ///< Sort calls using a dominance based order - void deleteCallSpecs(PcodeOp *op); ///< Remove the specification for a particular call - void clearCallSpecs(void); ///< Remove all call specifications - BlockBasic *nodeSplitBlockEdge(BlockBasic *b,int4 inedge); PcodeOp *nodeSplitCloneOp(PcodeOp *op); void nodeSplitCloneVarnode(PcodeOp *op,PcodeOp *newop); void nodeSplitRawDuplicate(BlockBasic *b,BlockBasic *bprime); void nodeSplitInputPatch(BlockBasic *b,BlockBasic *bprime,int4 inedge); + + void sortCallSpecs(void); ///< Sort calls using a dominance based order + void deleteCallSpecs(PcodeOp *op); ///< Remove the specification for a particular call + void clearCallSpecs(void); ///< Remove all call specifications + void issueDatatypeWarnings(void); ///< Add warning headers for any data-types that have been modified + static bool descendantsOutside(Varnode *vn); static void encodeVarnode(Encoder &encoder,VarnodeLocSet::const_iterator iter,VarnodeLocSet::const_iterator enditer); static bool checkIndirectUse(Varnode *vn); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc index 998ea56c98..697888ee26 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -2792,9 +2792,11 @@ Datatype *CParse::newStruct(const string &ident,vector *declis sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); } - TypeStruct::assignFieldOffsets(sublist); try { - glb->types->setFields(sublist,res,-1,-1,0); + int4 newSize; + int4 newAlign; + TypeStruct::assignFieldOffsets(sublist,newSize,newAlign); + glb->types->setFields(sublist,res,newSize,newAlign,0); } catch (LowlevelError &err) { setError(err.explain); @@ -2830,7 +2832,10 @@ Datatype *CParse::newUnion(const string &ident,vector *declist } try { - glb->types->setFields(sublist,res,-1,-1,0); + int4 newSize; + int4 newAlign; + TypeUnion::assignFieldOffsets(sublist,newSize,newAlign,res); + glb->types->setFields(sublist,res,newSize,newAlign,0); } catch (LowlevelError &err) { setError(err.explain); @@ -2886,8 +2891,13 @@ Datatype *CParse::newEnum(const string &ident,vector *vecenum) vallist.push_back(enumer->value); assignlist.push_back(enumer->constantassigned); } - if (!glb->types->setEnumValues(namelist,vallist,assignlist,res)) { - setError("Bad enumeration values"); + try { + map namemap; + TypeEnum::assignValues(namemap,namelist,vallist,assignlist,res); + glb->types->setEnumValues(namemap, res); + } + catch (LowlevelError &err) { + setError(err.explain); glb->types->destroyType(res); return (Datatype *)0; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y index c95526decb..85591b2357 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -1046,9 +1046,11 @@ Datatype *CParse::newStruct(const string &ident,vector *declis sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); } - TypeStruct::assignFieldOffsets(sublist); try { - glb->types->setFields(sublist,res,-1,-1,0); + int4 newSize; + int4 newAlign; + TypeStruct::assignFieldOffsets(sublist,newSize,newAlign); + glb->types->setFields(sublist,res,newSize,newAlign,0); } catch (LowlevelError &err) { setError(err.explain); @@ -1084,7 +1086,10 @@ Datatype *CParse::newUnion(const string &ident,vector *declist } try { - glb->types->setFields(sublist,res,-1,-1,0); + int4 newSize; + int4 newAlign; + TypeUnion::assignFieldOffsets(sublist,newSize,newAlign,res); + glb->types->setFields(sublist,res,newSize,newAlign,0); } catch (LowlevelError &err) { setError(err.explain); @@ -1140,8 +1145,13 @@ Datatype *CParse::newEnum(const string &ident,vector *vecenum) vallist.push_back(enumer->value); assignlist.push_back(enumer->constantassigned); } - if (!glb->types->setEnumValues(namelist,vallist,assignlist,res)) { - setError("Bad enumeration values"); + try { + map namemap; + TypeEnum::assignValues(namemap,namelist,vallist,assignlist,res); + glb->types->setEnumValues(namemap, res); + } + catch (LowlevelError &err) { + setError(err.explain); glb->types->destroyType(res); return (Datatype *)0; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index 7cda43127f..229d07c0c6 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -513,16 +513,17 @@ void Datatype::encodeTypedef(Encoder &encoder) const encoder.closeElement(ELEM_DEF); } -/// Calculate \b size rounded up to be a multiple of \b alignment. -/// This value is returned by getAlignSize(). -void Datatype::calcAlignSize(void) +/// Calculate size rounded up to be a multiple of \b align. +/// \param sz is the number of bytes in the data-type before padding +/// \param align is the alignment of the data-type +/// \return the aligned size +int4 Datatype::calcAlignSize(int4 sz,int4 align) { - int4 mod = size % alignment; + int4 mod = sz % align; if (mod != 0) - alignSize = size + (alignment - mod); - else - alignSize = size; + return sz + (align - mod); + return sz; } /// A CPUI_PTRSUB must act on a pointer data-type where the given offset addresses a component. @@ -1485,13 +1486,15 @@ void TypeEnum::encode(Encoder &encoder) const /// Parse a \ element with children describing each specific enumeration value. /// \param decoder is the stream decoder /// \param typegrp is the factory owning \b this data-type -void TypeEnum::decode(Decoder &decoder,TypeFactory &typegrp) +/// \return any warning associated with the enum +string TypeEnum::decode(Decoder &decoder,TypeFactory &typegrp) { // uint4 elemId = decoder.openElement(); decodeBasic(decoder); submeta = (metatype == TYPE_INT) ? SUB_INT_ENUM : SUB_UINT_ENUM; map nmap; + string warning; for(;;) { uint4 childId = decoder.openElement(); @@ -1510,11 +1513,59 @@ void TypeEnum::decode(Decoder &decoder,TypeFactory &typegrp) } if (nm.size() == 0) throw LowlevelError(name + ": TypeEnum field missing name attribute"); - nmap[val] = nm; + if (nmap.find(val) != nmap.end()) { + if (warning.empty()) + warning = "Enum \"" + name + "\": Some values do not have unique names"; + } + else + nmap[val] = nm; decoder.closeElement(childId); } setNameMap(nmap); // decoder.closeElement(elemId); + return warning; +} + +/// Establish unique enumeration values for a TypeEnum. +/// Fill in any values for any names that weren't explicitly assigned and check for duplicates. +/// \param nmap will contain the map from values to names +/// \param namelist is the list of names in the enumeration +/// \param vallist is the corresponding list of values assigned to names in namelist +/// \param assignlist is true if the corresponding name in namelist has an assigned value +/// \param te is the TypeEnum that will eventually hold the enumeration values +void TypeEnum::assignValues(map &nmap,const vector &namelist,vector &vallist, + const vector &assignlist,const TypeEnum *te) +{ + map::iterator mapiter; + + uintb mask = calc_mask(te->getSize()); + uintb maxval = 0; + for(uint4 i=0;i maxval) + maxval = val; + val &= mask; + mapiter = nmap.find(val); + if (mapiter != nmap.end()) { + throw LowlevelError("Enum \""+te->name+"\": \""+namelist[i]+"\" is a duplicate value"); + } + nmap[val] = namelist[i]; + } + } + for(uint4 i=0;i0) is passed in. -/// Alignment is calculated from fields unless a \b fixedAlign (>0) is passed in. /// \param fd is the list of fields to copy in -/// \param fixedSize (if > 0) indicates an overriding size in bytes -/// \param fixedAlign (if > 0) indicates an overriding alignment in bytes -void TypeStruct::setFields(const vector &fd,int4 fixedSize,int4 fixedAlign) +/// \param newSize is the final size of the structure in bytes +/// \param newAlign is the final alignment of the structure +void TypeStruct::setFields(const vector &fd,int4 newSize,int4 newAlign) { - vector::const_iterator iter; - int4 end; - // Need to calculate size and alignment - int4 calcSize = 0; - int4 calcAlign = 1; - for(iter=fd.begin();iter!=fd.end();++iter) { - field.push_back(*iter); - Datatype *fieldType = (*iter).type; - end = (*iter).offset + fieldType->getSize(); - if (end > calcSize) - calcSize = end; - int4 curAlign = fieldType->getAlignment(); - if (curAlign > calcAlign) - calcAlign = curAlign; - } + field = fd; + size = newSize; + alignment = newAlign; if (field.size() == 1) { // A single field - if (field[0].type->getSize() == calcSize) // that fills the whole structure + if (field[0].type->getSize() == size) // that fills the whole structure flags |= needs_resolution; // needs special attention } - if (fixedSize > 0) { // Try to force a size - if (fixedSize < calcSize) // If the forced size is smaller, this is an error - throw LowlevelError("Trying to force too small a size on "+name); - size = fixedSize; - } - else - size = calcSize; - alignment = (fixedAlign < 1) ? calcAlign : fixedAlign; - calcAlignSize(); - if (fixedSize <= 0) { // Unless specifically overridden - size = alignSize; // pad out structure to with alignment bytes - } + alignSize = calcAlignSize(size,alignment); } /// Find the proper subfield given an offset. Return the index of that field @@ -1813,25 +1839,49 @@ void TypeStruct::encode(Encoder &encoder) const encoder.closeElement(ELEM_TYPE); } -/// Children of the structure element describe each field. +/// Read children of the structure element describing each field. Alignment is calculated from fields unless +/// the \b alignment field is already >0. The fields must be in order, fit within the \b size field, have a +/// valid name, and have a valid data-type, or an exception is thrown. Any fields that overlap their previous +/// field are thrown out and a warning message is returned. /// \param decoder is the stream decoder /// \param typegrp is the factory owning the new structure -void TypeStruct::decodeFields(Decoder &decoder,TypeFactory &typegrp) +/// \return any warning associated with the structure +string TypeStruct::decodeFields(Decoder &decoder,TypeFactory &typegrp) { int4 calcAlign = 1; - int4 maxoffset = 0; + int4 calcSize = 0; + int4 lastOff = -1; + string warning; while(decoder.peekElement() != 0) { field.emplace_back(decoder,typegrp); - int4 trialmax = field.back().offset + field.back().type->getSize(); - if (trialmax > maxoffset) - maxoffset = trialmax; - if (maxoffset > size) { + TypeField &curField(field.back()); + if (curField.type == (Datatype *)0 || curField.type->getMetatype() == TYPE_VOID) + throw LowlevelError("Bad field data-type for structure: "+getName()); + if (curField.name.size() == 0) + throw LowlevelError("Bad field name for structure: "+getName()); + if (curField.offset < lastOff) + throw LowlevelError("Fields are out of order"); + lastOff = curField.offset; + if (curField.offset < calcSize) { ostringstream s; - s << "Field " << field.back().name << " does not fit in structure " + name; + if (warning.empty()) { + s << "Struct \"" << name << "\": ignoring overlapping field \"" << curField.name << "\""; + } + else { + s << "Struct \"" << name << "\": ignoring multiple overlapping fields"; + } + warning = s.str(); + field.pop_back(); // Throw out the overlapping field + continue; + } + calcSize = curField.offset + curField.type->getSize(); + if (calcSize > size) { + ostringstream s; + s << "Field " << curField.name << " does not fit in structure " + name; throw LowlevelError(s.str()); } - int4 curAlign = field.back().type->getAlignment(); + int4 curAlign = curField.type->getAlignment(); if (curAlign > calcAlign) calcAlign = curAlign; } @@ -1845,7 +1895,8 @@ void TypeStruct::decodeFields(Decoder &decoder,TypeFactory &typegrp) } if (alignment < 1) alignment = calcAlign; - calcAlignSize(); + alignSize = calcAlignSize(size, alignment); + return warning; } /// If this method is called, the given data-type has a single component that fills it entirely @@ -1932,21 +1983,30 @@ int4 TypeStruct::findCompatibleResolve(Datatype *ct) const /// Assign an offset to fields in order so that each field starts at an aligned offset within the structure /// \param list is the list of fields -void TypeStruct::assignFieldOffsets(vector &list) +/// \param newSize passes back the calculated size of the structure +/// \param newAlign passes back the calculated alignment +void TypeStruct::assignFieldOffsets(vector &list,int4 &newSize,int4 &newAlign) { int4 offset = 0; + newAlign = 1; vector::iterator iter; for(iter=list.begin();iter!=list.end();++iter) { + if ((*iter).type->getMetatype() == TYPE_VOID) + throw LowlevelError("Illegal field data-type: void"); if ((*iter).offset != -1) continue; int4 cursize = (*iter).type->getAlignSize(); - int4 align = (*iter).type->getAlignment() - 1; + int4 align = (*iter).type->getAlignment(); + if (align > newAlign) + newAlign = align; + align -= 1; if (align > 0 && (offset & align)!=0) offset = (offset-(offset & align) + (align+1)); (*iter).offset = offset; (*iter).ident = offset; offset += cursize; } + newSize = calcAlignSize(offset, newAlign); } /// Copy a list of fields into this union, establishing its size. @@ -1954,34 +2014,15 @@ void TypeStruct::assignFieldOffsets(vector &list) /// Size is calculated from the fields unless a \b fixedSize (>0) is passed in. /// Alignment is calculated from fields unless a \b fixedAlign (>0) is passed in. /// \param fd is the list of fields to copy in -/// \param fixedSize (if > 0) indicates an overriding size in bytes -/// \param fixedAlign (if > 0) indicates an overriding alignment in bytes -void TypeUnion::setFields(const vector &fd,int4 fixedSize,int4 fixedAlign) +/// \param newSize is new size in bytes of the union +/// \param newAlign is the new alignment +void TypeUnion::setFields(const vector &fd,int4 newSize,int4 newAlign) { - vector::const_iterator iter; - // Need to calculate size and alignment - int4 calcSize = 0; - int4 calcAlign = 1; - for(iter=fd.begin();iter!=fd.end();++iter) { - field.push_back(*iter); - Datatype *fieldType = field.back().type; - int4 end = fieldType->getSize(); - if (end > calcSize) - calcSize = end; - int4 curAlign = fieldType->getAlignment(); - if (curAlign > calcAlign) - calcAlign = curAlign; - } - if (fixedSize > 0) { // If the caller is trying to force a size - if (fixedSize < calcSize) // If the forced size is smaller, this is an error - throw LowlevelError("Trying to force too small a size on "+name); - size = fixedSize; - } - else - size = calcSize; - alignment = (fixedAlign < 1) ? calcAlign : fixedAlign; - calcAlignSize(); + field = fd; + size = newSize; + alignment = newAlign; + alignSize = calcAlignSize(size,alignment); } /// Parse children of the \ element describing each field. @@ -2008,7 +2049,7 @@ void TypeUnion::decodeFields(Decoder &decoder,TypeFactory &typegrp) markComplete(); // Otherwise the union is complete if (alignment < 1) alignment = calcAlign; - calcAlignSize(); + alignSize = calcAlignSize(size,alignment); } TypeUnion::TypeUnion(const TypeUnion &op) @@ -2196,6 +2237,30 @@ int4 TypeUnion::findCompatibleResolve(Datatype *ct) const return -1; } +void TypeUnion::assignFieldOffsets(vector &list,int4 &newSize,int4 &newAlign,TypeUnion *tu) + +{ + vector::iterator iter; + + newSize = 0; + newAlign = 1; + for(iter=list.begin();iter!=list.end();++iter) { + Datatype *ct = (*iter).type; + // Do some sanity checks on the field + if (ct == (Datatype *)0 || ct->getMetatype() == TYPE_VOID) + throw LowlevelError("Bad field data-type for union: "+tu->getName()); + else if ((*iter).name.size() == 0) + throw LowlevelError("Bad field name for union: "+tu->getName()); + (*iter).offset = 0; + int4 end = ct->getSize(); + if (end > newSize) + newSize = end; + int4 curAlign = ct->getAlignment(); + if (curAlign > newAlign) + newAlign = curAlign; + } +} + TypePartialStruct::TypePartialStruct(const TypePartialStruct &op) : Datatype(op) { @@ -2209,7 +2274,7 @@ TypePartialStruct::TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype { #ifdef CPUI_DEBUG if (contain->getMetatype() != TYPE_STRUCT && contain->getMetatype() != TYPE_ARRAY) - throw LowlevelError("Parent of partial struct is not a struture or array"); + throw LowlevelError("Parent of partial struct is not a structure or array"); #endif flags |= has_stripped; stripped = strip; @@ -3118,6 +3183,7 @@ void TypeFactory::clear(void) tree.clear(); nametree.clear(); clearCache(); + warnings.clear(); } /// Delete anything that isn't a core type @@ -3138,6 +3204,7 @@ void TypeFactory::clearNoncore(void) tree.erase(iter++); delete ct; } + warnings.clear(); } TypeFactory::~TypeFactory(void) @@ -3325,44 +3392,21 @@ void TypeFactory::setDisplayFormat(Datatype *ct,uint4 format) ct->setDisplayFormat(format); } -/// Make sure all the offsets are fully established then set fields of the structure -/// If \b fixedsize is greater than 0, force the final structure to have that size. +/// Set fields on a structure data-type, establishing its size, alignment, and other properties. /// This method should only be used on an incomplete structure. It will mark the structure as complete. /// \param fd is the list of fields to set /// \param ot is the TypeStruct object to modify -/// \param fixedsize is -1 or the forced size of the structure -/// \param fixedalign is -1 or the forced alignment for the structure +/// \param newSize is the new size of the structure in bytes +/// \param newAlign is the new alignment of the structure /// \param flags are other flags to set on the structure -void TypeFactory::setFields(vector &fd,TypeStruct *ot,int4 fixedsize,int4 fixedalign,uint4 flags) +void TypeFactory::setFields(const vector &fd,TypeStruct *ot,int4 newSize,int4 newAlign,uint4 flags) { if (!ot->isIncomplete()) throw LowlevelError("Can only set fields on an incomplete structure"); - int4 offset = 0; - vector::iterator iter; - - // Find the maximum offset, from the explicitly set offsets - for(iter=fd.begin();iter!=fd.end();++iter) { - Datatype *ct = (*iter).type; - // Do some sanity checks on the field - if (ct == (Datatype *)0 || ct->getMetatype() == TYPE_VOID) - throw LowlevelError("Bad field data-type for structure: "+ot->getName()); - else if ((*iter).name.size() == 0) - throw LowlevelError("Bad field name for structure: "+ot->getName()); - - if ((*iter).offset != -1) { - int4 end = (*iter).offset + ct->getSize(); - if (end > offset) - offset = end; - } - } - - sort(fd.begin(),fd.end()); // Sort fields by offset - - // We could check field overlapping here tree.erase(ot); - ot->setFields(fd,fixedsize,fixedalign); + ot->setFields(fd,newSize,newAlign); ot->flags &= ~(uint4)Datatype::type_incomplete; ot->flags |= (flags & (Datatype::opaque_string | Datatype::variable_length | Datatype::type_incomplete)); tree.insert(ot); @@ -3370,33 +3414,20 @@ void TypeFactory::setFields(vector &fd,TypeStruct *ot,int4 fixedsize, recalcPointerSubmeta(ot, SUB_PTR_STRUCT); } -/// If \b fixedsize is greater than 0, force the final union to have that size. /// This method should only be used on an incomplete union. It will mark the union as complete. /// \param fd is the list of fields to set /// \param ot is the TypeUnion object to modify -/// \param fixedsize is -1 or the forced size of the union -/// \param fixedalign is -1 or the forced alignment for the union +/// \param newSize is the size to associate with the union in bytes +/// \param newAlign is the alignment to set /// \param flags are other flags to set on the union -void TypeFactory::setFields(vector &fd,TypeUnion *ot,int4 fixedsize,int4 fixedalign,uint4 flags) +void TypeFactory::setFields(const vector &fd,TypeUnion *ot,int4 newSize,int4 newAlign,uint4 flags) { if (!ot->isIncomplete()) throw LowlevelError("Can only set fields on an incomplete union"); - vector::iterator iter; - - for(iter=fd.begin();iter!=fd.end();++iter) { - Datatype *ct = (*iter).type; - // Do some sanity checks on the field - if (ct == (Datatype *)0 || ct->getMetatype() == TYPE_VOID) - throw LowlevelError("Bad field data-type for union: "+ot->getName()); - else if ((*iter).offset != 0) - throw LowlevelError("Non-zero field offset for union: "+ot->getName()); - else if ((*iter).name.size() == 0) - throw LowlevelError("Bad field name for union: "+ot->getName()); - } tree.erase(ot); - ot->setFields(fd,fixedsize,fixedalign); + ot->setFields(fd,newSize,newAlign); ot->flags &= ~(uint4)Datatype::type_incomplete; ot->flags |= (flags & (Datatype::variable_length | Datatype::type_incomplete)); tree.insert(ot); @@ -3419,52 +3450,14 @@ void TypeFactory::setPrototype(const FuncProto *fp,TypeCode *newCode,uint4 flags tree.insert(newCode); } -/// Set the list of enumeration values and identifiers for a TypeEnum -/// Fill in any values for any names that weren't explicitly assigned -/// and check for duplicates. -/// \param namelist is the list of names in the enumeration -/// \param vallist is the corresponding list of values assigned to names in namelist -/// \param assignlist is true if the corresponding name in namelist has an assigned value -/// \param te is the enumeration object to modify -/// \return true if the modification is successful (no duplicate names) -bool TypeFactory::setEnumValues(const vector &namelist, - const vector &vallist, - const vector &assignlist, - TypeEnum *te) +/// \param nmap is the mapping from integer value to name string +/// \param te is the enumeration whose values/names are set +void TypeFactory::setEnumValues(const map &nmap,TypeEnum *te) + { - map nmap; - map::iterator mapiter; - - uintb mask = calc_mask(te->getSize()); - uintb maxval = 0; - for(uint4 i=0;i maxval) - maxval = val; - val &= mask; - mapiter = nmap.find(val); - if (mapiter != nmap.end()) return false; // Duplicate value - nmap[val] = namelist[i]; - } - } - for(uint4 i=0;isetNameMap(nmap); tree.insert(te); - return true; } /// Recursively write out all the components of a data-type in dependency order @@ -3673,6 +3666,34 @@ void TypeFactory::recalcPointerSubmeta(Datatype *base,sub_metatype sub) } } +/// Add the data-type and string to the \b warnings container. +/// \param dt is the data-type associated with the warning +/// \param warn is the warning string to be displayed to the user +void TypeFactory::insertWarning(Datatype *dt,string warn) + +{ + if (dt->getId() == 0) + throw LowlevelError("Can only issue warnings for named data-types"); + dt->flags |= Datatype::warning_issued; + warnings.emplace_back(dt,warn); +} + +/// Run through the \b warnings and delete any matching the given data-type +/// \param dt is the given data-type +void TypeFactory::removeWarning(Datatype *dt) + +{ + list::iterator iter = warnings.begin(); + while(iter != warnings.end()) { + if ((*iter).dataType->getId() == dt->getId() && (*iter).dataType->getName() == dt->getName()) { + iter = warnings.erase(iter); + } + else { + ++iter; + } + } +} + /// Find or create a data-type identical to the given data-type except for its name and id. /// If the name and id already describe an incompatible data-type, an exception is thrown. /// \param ct is the given data-type to clone @@ -3977,6 +3998,8 @@ void TypeFactory::destroyType(Datatype *ct) { if (ct->isCoreType()) throw LowlevelError("Cannot destroy core type"); + if (ct->hasWarning()) + removeWarning(ct); nametree.erase(ct); tree.erase(ct); delete ct; @@ -4163,6 +4186,22 @@ Datatype *TypeFactory::decodeTypedef(Decoder &decoder) return getTypedef(defedType, nm, id, format); } +/// \param decoder is the stream decoder +/// \param forcecore is \b true if the data-type is considered core +/// \return the newly minted enumeration data-type +Datatype *TypeFactory::decodeEnum(Decoder &decoder,bool forcecore) + +{ + TypeEnum te(1,TYPE_INT); // size and metatype are replaced + string warning = te.decode(decoder,*this); + if (forcecore) + te.flags |= Datatype::coretype; + Datatype *res = findAdd(te); + if (!warning.empty()) + insertWarning(res, warning); + return res; +} + /// If necessary create a stub object before parsing the field descriptions, to deal with recursive definitions /// \param decoder is the stream decoder /// \param forcecore is \b true if the data-type is considered core @@ -4181,7 +4220,7 @@ Datatype* TypeFactory::decodeStruct(Decoder &decoder,bool forcecore) } else if (ct->getMetatype() != TYPE_STRUCT) throw LowlevelError("Trying to redefine type: " + ts.name); - ts.decodeFields(decoder,*this); + string warning = ts.decodeFields(decoder,*this); if (!ct->isIncomplete()) { // Structure of this name was already present if (0 != ct->compareDependency(ts)) throw LowlevelError("Redefinition of structure: " + ts.name); @@ -4189,6 +4228,8 @@ Datatype* TypeFactory::decodeStruct(Decoder &decoder,bool forcecore) else { // If structure is a placeholder stub setFields(ts.field,(TypeStruct*)ct,ts.size,ts.alignment,ts.flags); // Define structure now by copying fields } + if (!warning.empty()) + insertWarning(ct, warning); // decoder.closeElement(elemId); return ct; } @@ -4349,12 +4390,8 @@ Datatype *TypeFactory::decodeTypeNoRef(Decoder &decoder,bool forcecore) return ct; } else if (attribId == ATTRIB_ENUM && decoder.readBool()) { - TypeEnum te(1,TYPE_INT); // size and metatype are replaced decoder.rewindAttributes(); - te.decode(decoder,*this); - if (forcecore) - te.flags |= Datatype::coretype; - ct = findAdd(te); + ct = decodeEnum(decoder, forcecore); decoder.closeElement(elemId); return ct; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index 32cf96dbb0..0fb1a2891a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -177,7 +177,8 @@ protected: needs_resolution = 0x800, ///< Datatype (union, pointer to union) needs resolution before propagation force_format = 0x7000, ///< 3-bits encoding display format, 0=none, 1=hex, 2=dec, 3=oct, 4=bin, 5=char truncate_bigendian = 0x8000, ///< Pointer can be truncated and is big endian - pointer_to_array = 0x10000 ///< Data-type is a pointer to an array + pointer_to_array = 0x10000, ///< Data-type is a pointer to an array + warning_issued = 0x20000 ///< Data-type has an associated \e warning string }; friend class TypeFactory; friend struct DatatypeCompare; @@ -196,10 +197,11 @@ protected: void encodeTypedef(Encoder &encoder) const; ///< Encode \b this as a \e typedef element to a stream void markComplete(void) { flags &= ~(uint4)type_incomplete; } ///< Mark \b this data-type as completely defined void setDisplayFormat(uint4 format); ///< Set a specific display format - void calcAlignSize(void); ///< Calculate aligned size, assuming alignment is known virtual Datatype *clone(void) const=0; ///< Clone the data-type static uint8 hashName(const string &nm); ///< Produce a data-type id by hashing the type name static uint8 hashSize(uint8 id,int4 size); ///< Reversibly hash size into id +protected: + static int4 calcAlignSize(int4 sz,int4 align); ///< Calculate aligned size, given size and alignment of data-type public: /// Construct the base data-type copying low-level properties of another Datatype(const Datatype &op) { size = op.size; name=op.name; displayName=op.displayName; metatype=op.metatype; @@ -224,6 +226,7 @@ public: bool hasStripped(void) const { return (flags & has_stripped)!=0; } ///< Return \b true if \b this has a stripped form bool isIncomplete(void) const { return (flags & type_incomplete)!=0; } ///< Is \b this an incompletely defined data-type bool needsResolution(void) const { return (flags & needs_resolution)!=0; } ///< Is \b this a union or a pointer to union + bool hasWarning(void) const { return (flags & warning_issued)!=0; } ///< Has a \e warning been issued about \b this data-type uint4 getInheritable(void) const { return (flags & coretype); } ///< Get properties pointers inherit uint4 getDisplayFormat(void) const; ///< Get the display format for constants with \b this data-type type_metatype getMetatype(void) const { return metatype; } ///< Get the type \b meta-type @@ -468,7 +471,7 @@ protected: map namemap; ///< Map from integer to name vector masklist; ///< Masks for each bitfield within the enum void setNameMap(const map &nmap); ///< Establish the value -> name map - void decode(Decoder &decoder,TypeFactory &typegrp); ///< Restore \b this enum data-type from a stream + string decode(Decoder &decoder,TypeFactory &typegrp); ///< Restore \b this enum data-type from a stream public: /// Construct from another TypeEnum TypeEnum(const TypeEnum &op); @@ -485,6 +488,8 @@ public: virtual int4 compareDependency(const Datatype &op) const; virtual Datatype *clone(void) const { return new TypeEnum(*this); } virtual void encode(Encoder &encoder) const; + static void assignValues(map &nmap,const vector &namelist,vector &vallist, + const vector &assignlist,const TypeEnum *te); }; /// \brief A composite Datatype object: A \b structure with component \b fields @@ -495,7 +500,7 @@ protected: void setFields(const vector &fd,int4 fixedSize,int4 fixedAlign); ///< Establish fields for \b this int4 getFieldIter(int4 off) const; ///< Get index into field list int4 getLowerBoundField(int4 off) const; ///< Get index of last field before or equal to given offset - void decodeFields(Decoder &decoder,TypeFactory &typegrp); ///< Restore fields from a stream + string decodeFields(Decoder &decoder,TypeFactory &typegrp); ///< Restore fields from a stream public: TypeStruct(const TypeStruct &op); ///< Construct from another TypeStruct TypeStruct(void) : Datatype(0,-1,TYPE_STRUCT) { flags |= type_incomplete; } ///< Construct incomplete/empty TypeStruct @@ -515,7 +520,7 @@ public: virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); virtual Datatype* findResolve(const PcodeOp *op,int4 slot); virtual int4 findCompatibleResolve(Datatype *ct) const; - static void assignFieldOffsets(vector &list); ///< Assign field offsets + static void assignFieldOffsets(vector &list,int4 &newSize,int4 &newAlign); ///< Assign field offsets static int4 scoreSingleComponent(Datatype *parent,PcodeOp *op,int4 slot); ///< Determine best type fit for given PcodeOp use }; @@ -527,7 +532,7 @@ class TypeUnion : public Datatype { protected: friend class TypeFactory; vector field; ///< The list of fields - void setFields(const vector &fd,int4 fixedSize,int4 fixedAlign); ///< Establish fields for \b this + void setFields(const vector &fd,int4 newSize,int4 newAlign); ///< Establish fields for \b this void decodeFields(Decoder &decoder,TypeFactory &typegrp); ///< Restore fields from a stream public: TypeUnion(const TypeUnion &op); ///< Construct from another TypeUnion @@ -545,6 +550,7 @@ public: virtual Datatype* findResolve(const PcodeOp *op,int4 slot); virtual int4 findCompatibleResolve(Datatype *ct) const; virtual const TypeField *resolveTruncation(int8 offset,PcodeOp *op,int4 slot,int8 &newoff); + static void assignFieldOffsets(vector &list,int4 &newSize,int4 &newAlign,TypeUnion *tu); ///< Assign field offsets }; /// \brief A data-type that holds \e part of a TypeStruct or TypeArray @@ -698,6 +704,19 @@ public: virtual void encode(Encoder &encoder) const; }; +/// \brief A data-type associated with a \e warning string +/// +/// The warning should be presented to the user whenever the data-type is used. A warning is typically +/// issued for ill-formed data-types that have been modified to facilitate decompiler analysis. +class DatatypeWarning { + friend class TypeFactory; + Datatype *dataType; ///< Data-type associated with the warning + string warning; ///< An explanatory string which should be displayed to the user as a warning +public: + DatatypeWarning(Datatype *dt,string warn) { dataType = dt; warning = warn; } ///< Constructor + const string &getWarning(void) const { return warning; } ///< Get the warning string +}; + /// \brief Container class for all Datatype objects in an Architecture class TypeFactory { int4 sizeOfInt; ///< Size of the core "int" data-type @@ -716,6 +735,7 @@ class TypeFactory { Datatype *typecache16; ///< Specially cached 16-byte float type Datatype *type_nochar; ///< Same dimensions as char but acts and displays as an INT Datatype *charcache[5]; ///< Cached character data-types + list warnings; ///< Warnings for the user about data-types in \b this factory Datatype *findNoName(Datatype &ct); ///< Find data-type (in this container) by function void insert(Datatype *newtype); ///< Insert pointer into the cross-reference sets Datatype *findAdd(Datatype &ct); ///< Find data-type in this container or add it @@ -723,6 +743,7 @@ class TypeFactory { void decodeAlignmentMap(Decoder &decoder); ///< Parse a \ element void setDefaultAlignmentMap(void); ///< Provide default alignments for data-types Datatype *decodeTypedef(Decoder &decoder); ///< Restore a \ element describing a typedef + Datatype *decodeEnum(Decoder &decoder,bool forcecore); ///< Restore a \ element describing an enumeration Datatype *decodeStruct(Decoder &decoder,bool forcecore); ///< Restore a \ element describing a structure Datatype *decodeUnion(Decoder &decoder,bool forcecore); ///< Restore a \ element describing a union Datatype *decodeCode(Decoder &decoder,bool isConstructor,bool isDestructor,bool forcecore); ///< Restore an element describing a code object @@ -732,6 +753,8 @@ class TypeFactory { TypeUnicode *getTypeUnicode(const string &nm,int4 sz,type_metatype m); ///< Create a default "unicode" type TypeCode *getTypeCode(const string &n); ///< Create a default "code" type void recalcPointerSubmeta(Datatype *base,sub_metatype sub); ///< Recalculate submeta for pointers to given base data-type + void insertWarning(Datatype *dt,string warn); ///< Register a new data-type warning with \b this factory + void removeWarning(Datatype *dt); ///< Remove the warning associated with the given data-type protected: Architecture *glb; ///< The Architecture object that owns this TypeFactory Datatype *findByIdLocal(const string &nm,uint8 id) const; ///< Search locally by name and id @@ -754,13 +777,10 @@ public: Datatype *findByName(const string &n); ///< Return type of given name Datatype *setName(Datatype *ct,const string &n); ///< Set the given types name void setDisplayFormat(Datatype *ct,uint4 format); ///< Set the display format associated with the given data-type - void setFields(vector &fd,TypeStruct *ot,int4 fixedsize,int4 fixedalign,uint4 flags); ///< Set fields on a TypeStruct - void setFields(vector &fd,TypeUnion *ot,int4 fixedsize,int4 fixedalign,uint4 flags); ///< Set fields on a TypeUnion + void setFields(const vector &fd,TypeStruct *ot,int4 newSize,int4 newAlign,uint4 flags); ///< Set fields on a TypeStruct + void setFields(const vector &fd,TypeUnion *ot,int4 newSize,int4 newAlign,uint4 flags); ///< Set fields on a TypeUnion void setPrototype(const FuncProto *fp,TypeCode *newCode,uint4 flags); ///< Set the prototype on a TypeCode - bool setEnumValues(const vector &namelist, - const vector &vallist, - const vector &assignlist, - TypeEnum *te); ///< Set named values for an enumeration + void setEnumValues(const map &nmap,TypeEnum *te); ///< Set named values for an enumeration Datatype *decodeType(Decoder &decoder); ///< Restore Datatype from a stream Datatype *decodeTypeWithCodeFlags(Decoder &decoder,bool isConstructor,bool isDestructor); TypeVoid *getTypeVoid(void); ///< Get the "void" data-type @@ -798,6 +818,8 @@ public: void parseEnumConfig(Decoder &decoder); ///< Parse the \ tag void setCoreType(const string &name,int4 size,type_metatype meta,bool chartp); ///< Create a core data-type void cacheCoreTypes(void); ///< Cache common types + list::const_iterator beginWarnings(void) const { return warnings.begin(); } ///< Start of data-type warnings + list::const_iterator endWarnings(void) const { return warnings.end(); } ///< End of data-type warnings }; /// The display format for the data-type is changed based on the given format. A value of diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java index d2d0503538..ca854628b5 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java @@ -542,14 +542,14 @@ public class PcodeDataTypeManager { encoder.openElement(ELEM_TYPE); encodeNameIdAttributes(encoder, type); String metatype = type.isSigned() ? "int" : "uint"; - long[] keys = type.getValues(); + String[] names = type.getNames(); encoder.writeString(ATTRIB_METATYPE, metatype); encoder.writeSignedInteger(ATTRIB_SIZE, type.getLength()); encoder.writeBool(ATTRIB_ENUM, true); - for (long key : keys) { + for (String name : names) { encoder.openElement(ELEM_VAL); - encoder.writeString(ATTRIB_NAME, type.getName(key)); - encoder.writeSignedInteger(ATTRIB_VALUE, key); + encoder.writeString(ATTRIB_NAME, name); + encoder.writeSignedInteger(ATTRIB_VALUE, type.getValue(name)); encoder.closeElement(ELEM_VAL); } encoder.closeElement(ELEM_TYPE);