/* ### * IP: GHIDRA * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /// \file signature.hh /// \brief Classes for generating feature vectors representing individual functions #ifndef __SIGNATURE_HH__ #define __SIGNATURE_HH__ #include "funcdata.hh" namespace ghidra { typedef uint8 hashword; ///< Data-type for containing hash information extern AttributeId ATTRIB_BADDATA; ///< Marshaling attribute "baddata" extern AttributeId ATTRIB_HASH; ///< Marshaling attribute "hash" extern AttributeId ATTRIB_UNIMPL; ///< Marshaling attribute "unimpl" extern ElementId ELEM_BLOCKSIG; ///< Marshaling element \ extern ElementId ELEM_CALL; ///< Marshaling element \ extern ElementId ELEM_GENSIG; ///< Marshaling element \ extern ElementId ELEM_MAJOR; ///< Marshaling element \ extern ElementId ELEM_MINOR; ///< Marshaling element \ extern ElementId ELEM_COPYSIG; ///< Marshaling element \ extern ElementId ELEM_SETTINGS; ///< Marshaling element \ extern ElementId ELEM_SIG; ///< Marshaling element \ extern ElementId ELEM_SIGNATUREDESC; ///< Marshaling element \ extern ElementId ELEM_SIGNATURES; ///< Marshaling element \ extern ElementId ELEM_SIGSETTINGS; ///< Marshaling element \ extern ElementId ELEM_VARSIG; ///< Marshaling element \ /// \brief A \b feature describing some aspect of a function or other unit of code /// /// The underlying representation is just a 32-bit hash of the \e information representing /// the feature, but derived classes may be contain other meta-data describing where and how the /// feature was formed. Two features are generally unordered (they are either equal or not equal), /// but an ordering is used internally to normalize the vector representation and accelerate comparison. class Signature { uint4 sig; ///< Underlying 32-bit hash public: Signature(hashword h) { sig=(uint4)h; } ///< Constructor uint4 getHash(void) const { return sig; } ///< Get the underyling 32-bit hash of the feature void print(ostream &s) const; ///< Print the feature hash and a brief description of \b this feature to the given stream int4 compare(const Signature *op2) const; ///< Compare two features virtual ~Signature(void) {} ///< Destructor virtual void encode(Encoder &encoder) const; /// Encode \b this feature to the given stream virtual void decode(Decoder &decoder); /// Restore \b this feature from the given stream /// \brief Print a brief description of \b this feature to the given stream virtual void printOrigin(ostream &s) const { s << hex << "0x" << setfill('0') << setw(8) << sig; } /// \brief Compare two Signature pointers via their underlying hash values static bool comparePtr(Signature *a,Signature *b) { return (a->sig < b->sig); } }; /// \brief A node for data-flow \b feature generation /// /// A SignatureEntry is rooted at a specific Varnode in the data-flow of a function. /// During feature generation it iteratively hashes information about the Varnode and its nearest /// neighbors through the edges of the graph. Feature generation needs to explicitly label: /// - Varnodes that don't contribute meaningful information /// - Shadow Varnodes that are direct or indirect COPYs of other Varnodes /// - Stand-alone COPYs from a constant or input to a Varnode that is not directly read from again class SignatureEntry { /// Varnode properties that need to be explicit during feature generation enum SignatureFlags { SIG_NODE_TERMINAL = 0x1, ///< Varnode has no incoming edges SIG_NODE_COMMUTATIVE = 0x2, ///< No distinction between this Varnode's incoming edges SIG_NODE_NOT_EMITTED = 0x4, ///< Varnode is not emitted as a formal feature (it might be hashed with other features) SIG_NODE_STANDALONE = 0x8, ///< Varnode is a stand-alone COPY VISITED = 0x10, ///< Mark for spanning tree construction MARKER_ROOT = 0x20 ///< Special root status in marker subgraph }; /// \brief A path node for doing depth first traversals of data-flow informed by SignatureEntry struct DFSNode { SignatureEntry *entry; ///< The specific node in the traversal path list::const_iterator iter; ///< The edge to the next node in the path }; Varnode *vn; ///< The root Varnode uint4 flags; ///< Feature generation properties of this Varnode hashword hash[2]; ///< Current and previous hash const PcodeOp *op; ///< The \e effective defining PcodeOp of this Varnode int4 startvn; ///< First incoming edge (via the \e effective PcodeOp) int4 inSize; ///< Number of incoming edges int4 index; ///< Post-order index SignatureEntry *shadow; ///< (If non-null) the Varnode being \e shadowed by this hashword getOpHash(uint4 modifiers); ///< Get a hash encoding the OpCode of the \e effective defining PcodeOp bool isVisited(void) const { return ((flags&VISITED)!=0); } ///< Return \b true if \b this node has been visited before void setVisited(void) { flags |= VISITED; } ///< Mark that \b this node has been visited /// \brief Get the number of input edges for \b this in the noise reduced form of the data-flow graph /// /// \return the number of input edges int4 markerSizeIn(void) const { if ((flags&MARKER_ROOT)!=0) return 1; return numInputs(); } /// \brief Get a specific node coming into \b this in the noise reduced form of the data-flow graph /// /// \param i is the index of the incoming node /// \param vRoot is the virtual root of the noise reduced form /// \param sigMap is the map from a Varnode to its SignatureEntry overlay /// \return the incoming SignatureEntry SignatureEntry *getMarkerIn(int4 i,SignatureEntry *vRoot,const map &sigMap) const { if ((flags&MARKER_ROOT)!=0) return vRoot; return mapToEntry(op->getIn(i+startvn),sigMap); } void standaloneCopyHash(uint4 modifiers); ///< Calculate the hash for stand-alone COPY static bool testStandaloneCopy(Varnode *vn); ///< Determine if the given Varnode is a stand-alone COPY static void noisePostOrder(const vector &rootlist,vector &postOrder,map &sigMap); static void noiseDominator(vector &postOrder,map &sigMap); public: SignatureEntry(Varnode *v,uint4 modifiers); ///< Construct from a Varnode SignatureEntry(int4 ind); ///< Construct a virtual node bool isTerminal(void) const { return ((flags&SIG_NODE_TERMINAL)!=0); } ///< Return \b true if \b this node has no inputs bool isNotEmitted(void) const { return ((flags&SIG_NODE_NOT_EMITTED)!=0); } ///< Return \b true if \b this is not emitted as a feature bool isCommutative(void) const { return ((flags&SIG_NODE_COMMUTATIVE)!=0); } ///< Return \b true if inputs to \b this are unordered bool isStandaloneCopy(void) const { return ((flags&SIG_NODE_STANDALONE)!=0); } ///< Return \b true if \b this is a stand-alone COPY int4 numInputs(void) const { return inSize; } ///< Return the number incoming edges to \b this node /// \brief Get the i-th incoming node /// /// \param i is the index /// \param sigMap is the map from Varnode to its SignatureEntry overlay /// \return the selected incoming SignatureEntry node SignatureEntry *getIn(int4 i,const map &sigMap) const { return mapToEntryCollapse(op->getIn(i+startvn),sigMap); } void calculateShadow(const map &sigMap); ///< Determine if \b this node shadows another void localHash(uint4 modifiers); ///< Compute an initial hash based on local properties of the Varnode void flip(void) { hash[1] = hash[0]; } ///< Store hash from previous iteration and prepare for next iteration void hashIn(vector &neigh); ///< Hash info from other nodes into \b this Varnode *getVarnode(void) const { return vn; } ///< Get the underlying Varnode which \b this overlays hashword getHash(void) const { return hash[0]; } ///< Get the current hash value static SignatureEntry *mapToEntry(const Varnode *vn,const map &sigMap); static SignatureEntry *mapToEntryCollapse(const Varnode *vn,const map &sigMap); static void removeNoise(map &sigMap); static hashword hashSize(Varnode *vn,uint4 modifiers); #ifdef COPYNOISE_DEBUG void verifyNoiseRemoval(map &sigMap) const; ///< Verify \b shadow is set correctly for \b this static void verifyAllNoiseRemoval(map &sigMap); ///< Verify all nodes have \b shadow set correctly #endif }; /// \brief A node for control-flow feature generation /// /// A BlockSignatureEntry is rooted at a specific basic block in the control-flow of a function. /// During feature generation it iteratively hashes information about the basic block and its /// nearest neighbors through the edges of the control-flow graph. class BlockSignatureEntry { BlockBasic *bl; ///< The root basic block hashword hash[2]; ///< Current and previous hash public: BlockSignatureEntry(BlockBasic *b) { bl = b; } ///< Construct from a basic block void localHash(uint4 modifiers); ///< Compute an initial hash based on local properties of the basic block void flip(void) { hash[1] = hash[0]; } ///< Store hash from previous iteration and prepare for next iteration void hashIn(vector &neigh); ///< Hash info from other nodes into \b this BlockBasic *getBlock(void) const { return bl; } ///< Get the underlying basic block which \b this overlays hashword getHash(void) const { return hash[0]; } ///< Get the current hash value }; /// \brief A \e feature representing a portion of the data-flow graph rooted at a particular Varnode /// /// The feature recursively incorporates details about the Varnode, the PcodeOp that defined it and /// its input Varnodes, up to a specific depth. class VarnodeSignature : public Signature { const Varnode *vn; ///< The root Varnode public: VarnodeSignature(const Varnode *v,hashword h) : Signature(h) { vn = v; } ///< Constructor virtual void encode(Encoder &encoder) const; virtual void printOrigin(ostream &s) const { vn->printRaw(s); } }; /// \brief A \e feature rooted in a basic block /// /// There are two forms of a block feature. /// Form 1 contains only local control-flow information about the basic block. /// Form 2 is a feature that combines two operations that occur in sequence within the block. /// This form incorporates info about the operations and data-flow info about their inputs. class BlockSignature : public Signature { const BlockBasic *bl; ///< The root basic block const PcodeOp *op1; ///< (Form 2)The first operation in sequence in the feature const PcodeOp *op2; ///< (Form 2)The second operation in sequence in the feature public: BlockSignature(const BlockBasic *b,hashword h, const PcodeOp *o1,const PcodeOp *o2) : Signature(h) { bl = b; op1 = o1; op2 = o2; } ///< Constructor virtual void encode(Encoder &encoder) const; virtual void printOrigin(ostream &s) const { bl->printHeader(s); } }; /// \brief A feature representing 1 or more \e stand-alone copies in a basic block /// /// A COPY operation is considered stand-alone if either a constant or a function input /// is copied into a location that is then not read directly by the function. /// These COPYs are incorporated into a single feature, which encodes the number /// and type of COPYs but does not encode the order in which they occur within the block. class CopySignature : public Signature { const BlockBasic *bl; ///< The basic block containing the COPY public: CopySignature(const BlockBasic *b,hashword h) : Signature(h) { bl = b; } ///< Constructor virtual void encode(Encoder &encoder) const; virtual void printOrigin(ostream &s) const; }; /// \brief A container for collecting a set of features (a feature vector) for a single function /// /// This manager handles: /// - Configuring details of the signature generation process /// - Establishing the function being signatured , via setCurrentFunction() /// - Generating the features, via generate() /// - Outputting the features, via encode() or print() /// /// The manager can be reused for multiple functions. class SigManager { static uint4 settings; ///< Signature settings (across all managers) vector sigs; ///< Feature set for the current function void clearSignatures(void); ///< Clear all current Signature/feature objects from \b this manager protected: const Funcdata *fd; ///< Current function off of which we are generating features void addSignature(Signature *sig) { sigs.push_back(sig); } ///< Add a new feature to the manager public: SigManager(void) { fd = (const Funcdata *)0; } ///< Constructor virtual ~SigManager(void) { clearSignatures(); } ///< Destructor virtual void clear(void); ///< Clear all current Signature/feature resources virtual void initializeFromStream(istream &s)=0; ///< Read configuration information from a character stream virtual void setCurrentFunction(const Funcdata *f); ///< Set the function used for (future) feature generation virtual void generate(void)=0; ///< Generate all features for the current function int4 numSignatures(void) const { return sigs.size(); } ///< Get the number of features currently generated Signature *getSignature(int4 i) const { return sigs[i]; } ///< Get the i-th Signature/feature void getSignatureVector(vector &feature) const; ///< Get the feature vector as a simple array of hashes hashword getOverallHash(void) const; ///< Combine all feature hashes into one overall hash void sortByHash(void) { sort(sigs.begin(),sigs.end(),Signature::comparePtr); } ///< Sort all current features void print(ostream &s) const; ///< Print a brief description of all current features to a stream void encode(Encoder &encoder) const; ///< Encode all current features to the given stream static uint4 getSettings(void) { return settings; } ///< Get the settings currently being used for signature generation static void setSettings(uint4 newvalue); ///< Establish settings to use for future signature generation }; /// \brief A manager for generating Signatures/features on function data-flow and control-flow /// /// Features are extracted from the data-flow and control-flow graphs of the function. /// The different feature types produced by this manager are: /// - VarnodeSignature /// - BlockSignature /// - CopySignature class GraphSigManager : public SigManager { public: /// Signature generation settings enum Mods { SIG_COLLAPSE_SIZE = 0x1, ///< Treat certain varnode sizes as the same SIG_COLLAPSE_INDNOISE = 0x2, ///< Collapse varnodes that indirect copies of each other // SIG_CALL_TERMINAL = 0x8, ///< Do not consider data-flow across CALLs SIG_DONOTUSE_CONST = 0x10, ///< Do not use value of constant in hash SIG_DONOTUSE_INPUT = 0x20, ///< Do not use (fact of) being an input in hash SIG_DONOTUSE_PERSIST = 0x40 ///< Do not use (fact of) being a global in hash }; private: uint4 sigmods; ///< Current settings to use for signature generation int4 maxiter; ///< Maximum number of iterations across data-flow graph int4 maxblockiter; ///< Maximum number of block iterations int4 maxvarnode; ///< Maximum number of Varnodes to signature map sigmap; ///< Map from Varnode to SignatureEntry overlay map blockmap; ///< Map from basic block to BlockSignatureEntry overlay void signatureIterate(void); ///< Do one iteration of hashing on the SignatureEntrys void signatureBlockIterate(void); ///< Do one iteration of hashing on the BlockSignatureEntrys void collectVarnodeSigs(void); ///< Generate the final feature for each Varnode from its SignatureEntry overlay void collectBlockSigs(void); ///< Generate the final feature(s) for each basic block from its BlockSignatureEntry overlay void varnodeClear(void); ///< Clear all SignatureEntry overlay objects void blockClear(void); ///< Clear all BlockSignatureEntry overlay objects void initializeBlocks(void); ///< Initialize BlockSignatureEntry overlays for the current function void flipVarnodes(void); ///< Store off \e current Varnode hash values as \e previous hash values void flipBlocks(void); ///< Store off \e current block hash values as \e previous hash values public: virtual void clear(void); GraphSigManager(void); ///< Constructor virtual ~GraphSigManager(void) { varnodeClear(); } ///< Destructor void setMaxIteration(int4 val) { maxiter = val; } ///< Override the default iterations used for Varnode features void setMaxBlockIteration(int4 val) { maxblockiter = val; } ///< Override the default iterations used for block features void setMaxVarnode(int4 val) { maxvarnode = val; } ///< Set a maximum threshold for Varnodes in a function virtual void initializeFromStream(istream &s); virtual void setCurrentFunction(const Funcdata *f); virtual void generate(void); static bool testSettings(uint4 val); ///< Test for valid signature generation settings }; /// \brief Given a Varnode, find its SignatureEntry overlay /// /// \param vn is the given Varnode /// \param sigMap is the map from Varnode to SignatureEntry /// \return the corresponding SignatureEntry inline SignatureEntry *SignatureEntry::mapToEntry(const Varnode *vn,const map &sigMap) { map::const_iterator iter; iter = sigMap.find(vn->getCreateIndex()); return (*iter).second; } /// \brief Given a Varnode, find its SignatureEntry overlay, collapsing shadows /// /// If the corresponding SignatureEntry shadows another, the shadowed SignatureEntry is returned instead. /// \param vn is the given Varnode /// \param sigMap is the map from Varnode to SignatureEntry /// \return the corresponding SignatureEntry inline SignatureEntry *SignatureEntry::mapToEntryCollapse(const Varnode *vn,const map &sigMap) { SignatureEntry *res = mapToEntry(vn,sigMap); if (res->shadow == (SignatureEntry *)0) return res; return res->shadow; } /// \brief Calculate a hash describing the size of a given Varnode /// /// The hash is computed from the size of the Varnode in bytes, as an integer value. /// Depending on the signature settings, the hash incorporates the full value, or /// it may truncate a value greater than 4. /// \param vn is the given Varnode /// \param modifiers are the settings being used for signature generation /// \return the hash value inline hashword SignatureEntry::hashSize(Varnode *vn,uint4 modifiers) { hashword val = (hashword) vn->getSize(); // Size of varnode if ((modifiers&GraphSigManager::SIG_COLLAPSE_SIZE)!=0) { if (val>4) // Treat sizes 4 and larger the same val = 4; } return val ^ (val<<7) ^ (val<<14) ^ (val<<21); } extern void simpleSignature(Funcdata *fd,Encoder &encoder); ///< Generate features for a single function extern void debugSignature(Funcdata *fd,Encoder &encoder); ///< Generate features (with debug info) for a single function } // End namespace ghidra #endif