GP-4009 Introduced BSim functionality including support for postgresql,

elasticsearch and h2 databases.  Added BSim correlator to Version
Tracking.
This commit is contained in:
caheckman 2023-11-17 01:13:42 +00:00 committed by ghidra1
parent f0f5b8f2a4
commit 0865a3dfb0
509 changed files with 77125 additions and 934 deletions

View file

@ -84,14 +84,14 @@ DECCORE=capability architecture options graph cover block cast typeop database c
funcdata funcdata_block funcdata_op funcdata_varnode unionresolve pcodeinject \
heritage prefersplit rangeutil ruleaction subflow blockaction merge double \
transform coreaction condexe override dynamic crc32 prettyprint \
printlanguage printc printjava memstate opbehavior paramid $(COREEXT_NAMES)
printlanguage printc printjava memstate opbehavior paramid signature $(COREEXT_NAMES)
# Files used for any project that use the sleigh decoder
SLEIGH= sleigh pcodeparse pcodecompile sleighbase slghsymbol \
slghpatexpress slghpattern semantics context filemanage
# Additional files for the GHIDRA specific build
GHIDRA= ghidra_arch inject_ghidra ghidra_translate loadimage_ghidra \
typegrp_ghidra database_ghidra ghidra_context cpool_ghidra \
ghidra_process comment_ghidra string_ghidra $(GHIDRAEXT_NAMES)
ghidra_process comment_ghidra string_ghidra signature_ghidra $(GHIDRAEXT_NAMES)
# Additional files specific to the sleigh compiler
SLACOMP=slgh_compile slghparse slghscan
# Additional special files that should not be considered part of the library

View file

@ -0,0 +1,243 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "analyzesigs.hh"
#include "loadimage_bfd.hh"
namespace ghidra {
// Constructing this registers the capability
IfaceAnalyzeSigsCapability IfaceAnalyzeSigsCapability::ifaceAnalyzeSigsCapability;
IfaceAnalyzeSigsCapability::IfaceAnalyzeSigsCapability(void)
{
name = "analyzesigs";
}
void IfaceAnalyzeSigsCapability::registerCommands(IfaceStatus *status)
{
status->registerCom(new IfcSignatureSettings(), "signature", "settings");
status->registerCom(new IfcPrintSignatures(),"print","signatures");
status->registerCom(new IfcSaveSignatures(),"save","signatures");
status->registerCom(new IfcSaveAllSignatures(),"saveall","signatures");
status->registerCom(new IfcProduceSignatures(),"produce","signatures");
}
/// \class IfcSignatureSettings
/// \brief Change global settings for signature generation : `signature settings <val>`
///
/// The provided integer value establishes the settings for any future signature generation
void IfcSignatureSettings::execute(istream &s)
{
uint4 mysetting = 0;
s.unsetf(ios::dec | ios::hex | ios::oct); // Let user specify base
s >> mysetting;
if (mysetting == 0)
throw IfaceParseError("Must specify settings integer");
SigManager::setSettings(mysetting);
*status->optr << "Signature settings set to " << hex << mysetting << endl;
}
/// \class IfcPrintSignatures
/// \brief Calculate and print signatures for the current function: `print signatures [...]`
///
/// Decompilation must already be complete. Features are extracted from the function and are
/// printed, one per line. The command optionally takes additional parameters that can alter
/// signature generation.
void IfcPrintSignatures::execute(istream &s)
{ //
if (dcp->fd == (Funcdata *)0)
throw IfaceExecutionError("No function selected");
if (!dcp->fd->isProcComplete())
throw IfaceExecutionError("Function has not been fully analyzed");
GraphSigManager smanage;
smanage.initializeFromStream(s);
*status->fileoptr << "Signatures for " << dcp->fd->getName() << endl;
smanage.setCurrentFunction(dcp->fd);
smanage.generate();
smanage.print(*status->fileoptr);
}
/// \class IfcSaveSignatures
/// \brief Calculate signatures and save them to a file: `save signatures <filename> [...]`
///
/// The features/signatures are extracted from the current function, which must already be
/// decompiled, and are written out in XML format. The first parameter must be the file name.
/// The command optionally takes additional parameters that can alter signature generation.
void IfcSaveSignatures::execute(istream &s)
{
if (dcp->fd == (Funcdata *)0)
throw IfaceExecutionError("No function selected");
if (!dcp->fd->isProcComplete())
throw IfaceExecutionError("Function has not been fully analyzed");
string sigfilename;
s >> sigfilename;
if (sigfilename.size()==0)
throw IfaceExecutionError("Need name of file to save signatures to");
GraphSigManager smanage;
smanage.initializeFromStream(s);
smanage.setCurrentFunction(dcp->fd);
smanage.generate();
ofstream t( sigfilename.c_str() );
if (!t)
throw IfaceExecutionError("Unable to open signature save file: "+sigfilename);
XmlEncode encoder(t);
smanage.encode(encoder);
t.close();
*status->fileoptr << "Successfully saved signatures for " << dcp->fd->getName() << endl;
}
/// \class IfcSaveAllSignatures
/// \brief Calculate signatures and save them to a file: `saveall signatures <filename> [...]`
///
/// For every known function entry point, the function is decompiled (using the current action)
/// and features/signatures are extracted. Features are written out in XML format to the
/// file indicated by the first parameter. The command optionally takes additional parameters
/// that can alter signature generation.
void IfcSaveAllSignatures::execute(istream &s)
{
if (dcp->conf == (Architecture *)0)
throw IfaceExecutionError("No architecture loaded");
string sigfilename;
s >> sigfilename;
if (sigfilename.size() == 0)
throw IfaceExecutionError("Need name of file to save signatures to");
if (smanage != (GraphSigManager *)0)
delete smanage;
smanage = new GraphSigManager();
smanage->initializeFromStream(s); // configure the manager;
ostream *saveoldfileptr = status->fileoptr;
status->fileoptr = new ofstream;
((ofstream *)status->fileoptr)->open(sigfilename.c_str());
if (!*status->fileoptr) {
delete status->fileoptr;
status->fileoptr = saveoldfileptr;
throw IfaceExecutionError("Unable to open signature save file: "+sigfilename);
}
string oldactname = dcp->conf->allacts.getCurrentName();
dcp->conf->allacts.setCurrent("normalize");
iterateFunctionsAddrOrder();
((ofstream *)status->fileoptr)->close();
delete status->fileoptr;
status->fileoptr = saveoldfileptr;
dcp->conf->allacts.setCurrent(oldactname);
delete smanage;
smanage = (GraphSigManager *)0;
}
void IfcSaveAllSignatures::iterationCallback(Funcdata *fd)
{
if (fd->hasNoCode()) {
*status->optr << "No code for " << fd->getName() << endl;
return;
}
try {
dcp->conf->clearAnalysis(fd); // Clear any old analysis
dcp->conf->allacts.getCurrent()->reset(*fd);
dcp->conf->allacts.getCurrent()->perform( *fd );
*status->optr << "Decompiled " << fd->getName();
*status->optr << '(' << dec << fd->getSize() << ')' << endl;
}
catch(LowlevelError &err) {
*status->optr << "Skipping " << fd->getName() << ": " << err.explain << endl;
return;
}
smanage->setCurrentFunction(fd);
smanage->generate();
uint4 numsigs = smanage->numSignatures();
if (numsigs != 0) {
Address addr = fd->getAddress();
uint4 spcindex = addr.getSpace()->getIndex();
uintb off = addr.getOffset();
status->fileoptr->write((char *)&spcindex,4);
status->fileoptr->write((char *)&off,sizeof(uintb));
status->fileoptr->write((char *)&numsigs,4);
uint4 namelen = fd->getName().size();
status->fileoptr->write((char *)&namelen,4);
status->fileoptr->write(fd->getName().c_str(),namelen);
XmlEncode encoder(*status->fileoptr);
smanage->encode(encoder);
}
smanage->clear();
dcp->conf->clearAnalysis(fd);
}
/// \class IfcProduceSignatures
/// \brief Calculate signatures and save combined hashes to a file: `produce signatures <filename> [...]`
///
/// For every known function entry point, the function is decompiled (using the current action)
/// and features/signatures are extracted. Features for a single function are combined using an
/// overall hash and written out to the file indicated by the first parameter. The file will contain
/// one line per function, with the name of the function followed by the overall hash. The command
/// optionally takes additional parameters that can alter signature generation.
void IfcProduceSignatures::iterationCallback(Funcdata *fd)
{
if (fd->hasNoCode()) {
*status->optr << "No code for " << fd->getName() << endl;
return;
}
try {
dcp->conf->clearAnalysis(fd); // Clear any old analysis
dcp->conf->allacts.getCurrent()->reset(*fd);
dcp->conf->allacts.getCurrent()->perform( *fd );
*status->optr << "Decompiled " << fd->getName();
*status->optr << '(' << dec << fd->getSize() << ')' << endl;
}
catch(LowlevelError &err) {
*status->optr << "Skipping " << fd->getName() << ": " << err.explain << endl;
return;
}
smanage->setCurrentFunction(fd);
smanage->generate();
hashword finalsig = smanage->getOverallHash();
(*status->fileoptr) << fd->getName() << " = 0x" << hex << setfill('0') << setw(16) << finalsig << endl;
smanage->clear();
dcp->conf->clearAnalysis(fd);
}
} // End namespace ghidra

View file

@ -0,0 +1,67 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file analyzesigs.hh
/// \brief Commands for feature/signature generation from the console interface
#ifndef __ANALYZESIGS_HH__
#define __ANALYZESIGS_HH__
#include "codedata.hh"
#include "signature.hh"
namespace ghidra {
/// \brief Interface capability point for console commands associated with signature/feature generation
class IfaceAnalyzeSigsCapability : public IfaceCapability {
static IfaceAnalyzeSigsCapability ifaceAnalyzeSigsCapability; ///< Singleton instance
IfaceAnalyzeSigsCapability(void); ///< Construct the singleton
IfaceAnalyzeSigsCapability(const IfaceAnalyzeSigsCapability &op2); ///< Not implemented
IfaceAnalyzeSigsCapability &operator=(const IfaceAnalyzeSigsCapability &op2); ///< Not implemented
public:
virtual void registerCommands(IfaceStatus *status);
};
class IfcSignatureSettings : public IfaceDecompCommand {
public:
virtual void execute(istream &s);
};
class IfcPrintSignatures : public IfaceDecompCommand {
public:
virtual void execute(istream &s);
};
class IfcSaveSignatures : public IfaceDecompCommand {
public:
virtual void execute(istream &s);
};
class IfcSaveAllSignatures : public IfaceDecompCommand {
protected:
GraphSigManager *smanage; ///< Manager for generating signatures
public:
IfcSaveAllSignatures(void) { smanage = (GraphSigManager *)0; } ///< Constructor
virtual ~IfcSaveAllSignatures(void) { if (smanage != (GraphSigManager *)0) delete smanage; }
virtual void execute(istream &s);
virtual void iterationCallback(Funcdata *fd);
};
class IfcProduceSignatures : public IfcSaveAllSignatures {
public:
virtual void iterationCallback(Funcdata *fd);
};
} // End namespace ghidra
#endif

View file

@ -32,7 +32,7 @@ using std::sqrt;
vector<ArchitectureCapability *> ArchitectureCapability::thelist;
const uint4 ArchitectureCapability::majorversion = 5;
const uint4 ArchitectureCapability::majorversion = 6;
const uint4 ArchitectureCapability::minorversion = 0;
AttributeId ATTRIB_ADDRESS = AttributeId("address",148);

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,357 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file signature.hh
/// \brief Classes for generating feature vectors representing individual functions
#ifndef __SIGNATURE_HH__
#define __SIGNATURE_HH__
#include "funcdata.hh"
namespace ghidra {
typedef uint8 hashword; ///< Data-type for containing hash information
extern AttributeId ATTRIB_BADDATA; ///< Marshaling attribute "baddata"
extern AttributeId ATTRIB_HASH; ///< Marshaling attribute "hash"
extern AttributeId ATTRIB_UNIMPL; ///< Marshaling attribute "unimpl"
extern ElementId ELEM_BLOCKSIG; ///< Marshaling element \<blocksig>
extern ElementId ELEM_CALL; ///< Marshaling element \<call>
extern ElementId ELEM_GENSIG; ///< Marshaling element \<gensig>
extern ElementId ELEM_MAJOR; ///< Marshaling element \<major>
extern ElementId ELEM_MINOR; ///< Marshaling element \<minor>
extern ElementId ELEM_COPYSIG; ///< Marshaling element \<copysig>
extern ElementId ELEM_SETTINGS; ///< Marshaling element \<settings>
extern ElementId ELEM_SIG; ///< Marshaling element \<sig>
extern ElementId ELEM_SIGNATUREDESC; ///< Marshaling element \<signaturedesc>
extern ElementId ELEM_SIGNATURES; ///< Marshaling element \<signatures>
extern ElementId ELEM_SIGSETTINGS; ///< Marshaling element \<sigsettings>
extern ElementId ELEM_VARSIG; ///< Marshaling element \<varsig>
/// \brief A \b feature describing some aspect of a function or other unit of code
///
/// The underlying representation is just a 32-bit hash of the \e information representing
/// the feature, but derived classes may be contain other meta-data describing where and how the
/// feature was formed. Two features are generally unordered (they are either equal or not equal),
/// but an ordering is used internally to normalize the vector representation and accelerate comparison.
class Signature {
uint4 sig; ///< Underlying 32-bit hash
public:
Signature(hashword h) { sig=(uint4)h; } ///< Constructor
uint4 getHash(void) const { return sig; } ///< Get the underyling 32-bit hash of the feature
void print(ostream &s) const; ///< Print the feature hash and a brief description of \b this feature to the given stream
int4 compare(const Signature *op2) const; ///< Compare two features
virtual ~Signature(void) {} ///< Destructor
virtual void encode(Encoder &encoder) const; /// Encode \b this feature to the given stream
virtual void decode(Decoder &decoder); /// Restore \b this feature from the given stream
/// \brief Print a brief description of \b this feature to the given stream
virtual void printOrigin(ostream &s) const {
s << hex << "0x" << setfill('0') << setw(8) << sig;
}
/// \brief Compare two Signature pointers via their underlying hash values
static bool comparePtr(Signature *a,Signature *b) { return (a->sig < b->sig); }
};
/// \brief A node for data-flow \b feature generation
///
/// A SignatureEntry is rooted at a specific Varnode in the data-flow of a function.
/// During feature generation it iteratively hashes information about the Varnode and its nearest
/// neighbors through the edges of the graph. Feature generation needs to explicitly label:
/// - Varnodes that don't contribute meaningful information
/// - Shadow Varnodes that are direct or indirect COPYs of other Varnodes
/// - Stand-alone COPYs from a constant or input to a Varnode that is not directly read from again
class SignatureEntry {
/// Varnode properties that need to be explicit during feature generation
enum SignatureFlags {
SIG_NODE_TERMINAL = 0x1, ///< Varnode has no incoming edges
SIG_NODE_COMMUTATIVE = 0x2, ///< No distinction between this Varnode's incoming edges
SIG_NODE_NOT_EMITTED = 0x4, ///< Varnode is not emitted as a formal feature (it might be hashed with other features)
SIG_NODE_STANDALONE = 0x8, ///< Varnode is a stand-alone COPY
VISITED = 0x10, ///< Mark for spanning tree construction
MARKER_ROOT = 0x20 ///< Special root status in marker subgraph
};
/// \brief A path node for doing depth first traversals of data-flow informed by SignatureEntry
struct DFSNode {
SignatureEntry *entry; ///< The specific node in the traversal path
list<PcodeOp *>::const_iterator iter; ///< The edge to the next node in the path
};
Varnode *vn; ///< The root Varnode
uint4 flags; ///< Feature generation properties of this Varnode
hashword hash[2]; ///< Current and previous hash
const PcodeOp *op; ///< The \e effective defining PcodeOp of this Varnode
int4 startvn; ///< First incoming edge (via the \e effective PcodeOp)
int4 inSize; ///< Number of incoming edges
int4 index; ///< Post-order index
SignatureEntry *shadow; ///< (If non-null) the Varnode being \e shadowed by this
hashword getOpHash(uint4 modifiers); ///< Get a hash encoding the OpCode of the \e effective defining PcodeOp
bool isVisited(void) const { return ((flags&VISITED)!=0); } ///< Return \b true if \b this node has been visited before
void setVisited(void) { flags |= VISITED; } ///< Mark that \b this node has been visited
/// \brief Get the number of input edges for \b this in the noise reduced form of the data-flow graph
///
/// \return the number of input edges
int4 markerSizeIn(void) const {
if ((flags&MARKER_ROOT)!=0) return 1;
return numInputs();
}
/// \brief Get a specific node coming into \b this in the noise reduced form of the data-flow graph
///
/// \param i is the index of the incoming node
/// \param vRoot is the virtual root of the noise reduced form
/// \param sigMap is the map from a Varnode to its SignatureEntry overlay
/// \return the incoming SignatureEntry
SignatureEntry *getMarkerIn(int4 i,SignatureEntry *vRoot,const map<int4,SignatureEntry *> &sigMap) const {
if ((flags&MARKER_ROOT)!=0) return vRoot;
return mapToEntry(op->getIn(i+startvn),sigMap);
}
void standaloneCopyHash(uint4 modifiers); ///< Calculate the hash for stand-alone COPY
static bool testStandaloneCopy(Varnode *vn); ///< Determine if the given Varnode is a stand-alone COPY
static void noisePostOrder(const vector<SignatureEntry *> &rootlist,vector<SignatureEntry *> &postOrder,map<int4,SignatureEntry *> &sigMap);
static void noiseDominator(vector<SignatureEntry *> &postOrder,map<int4,SignatureEntry *> &sigMap);
public:
SignatureEntry(Varnode *v,uint4 modifiers); ///< Construct from a Varnode
SignatureEntry(int4 ind); ///< Construct a virtual node
bool isTerminal(void) const { return ((flags&SIG_NODE_TERMINAL)!=0); } ///< Return \b true if \b this node has no inputs
bool isNotEmitted(void) const { return ((flags&SIG_NODE_NOT_EMITTED)!=0); } ///< Return \b true if \b this is not emitted as a feature
bool isCommutative(void) const { return ((flags&SIG_NODE_COMMUTATIVE)!=0); } ///< Return \b true if inputs to \b this are unordered
bool isStandaloneCopy(void) const { return ((flags&SIG_NODE_STANDALONE)!=0); } ///< Return \b true if \b this is a stand-alone COPY
int4 numInputs(void) const { return inSize; } ///< Return the number incoming edges to \b this node
/// \brief Get the i-th incoming node
///
/// \param i is the index
/// \param sigMap is the map from Varnode to its SignatureEntry overlay
/// \return the selected incoming SignatureEntry node
SignatureEntry *getIn(int4 i,const map<int4,SignatureEntry *> &sigMap) const {
return mapToEntryCollapse(op->getIn(i+startvn),sigMap);
}
void calculateShadow(const map<int4,SignatureEntry *> &sigMap); ///< Determine if \b this node shadows another
void localHash(uint4 modifiers); ///< Compute an initial hash based on local properties of the Varnode
void flip(void) { hash[1] = hash[0]; } ///< Store hash from previous iteration and prepare for next iteration
void hashIn(vector<SignatureEntry *> &neigh); ///< Hash info from other nodes into \b this
Varnode *getVarnode(void) const { return vn; } ///< Get the underlying Varnode which \b this overlays
hashword getHash(void) const { return hash[0]; } ///< Get the current hash value
static SignatureEntry *mapToEntry(const Varnode *vn,const map<int4,SignatureEntry *> &sigMap);
static SignatureEntry *mapToEntryCollapse(const Varnode *vn,const map<int4,SignatureEntry *> &sigMap);
static void removeNoise(map<int4,SignatureEntry *> &sigMap);
static hashword hashSize(Varnode *vn,uint4 modifiers);
#ifdef COPYNOISE_DEBUG
void verifyNoiseRemoval(map<int4,SignatureEntry *> &sigMap) const; ///< Verify \b shadow is set correctly for \b this
static void verifyAllNoiseRemoval(map<int4,SignatureEntry *> &sigMap); ///< Verify all nodes have \b shadow set correctly
#endif
};
/// \brief A node for control-flow feature generation
///
/// A BlockSignatureEntry is rooted at a specific basic block in the control-flow of a function.
/// During feature generation it iteratively hashes information about the basic block and its
/// nearest neighbors through the edges of the control-flow graph.
class BlockSignatureEntry {
BlockBasic *bl; ///< The root basic block
hashword hash[2]; ///< Current and previous hash
public:
BlockSignatureEntry(BlockBasic *b) { bl = b; } ///< Construct from a basic block
void localHash(uint4 modifiers); ///< Compute an initial hash based on local properties of the basic block
void flip(void) { hash[1] = hash[0]; } ///< Store hash from previous iteration and prepare for next iteration
void hashIn(vector<BlockSignatureEntry *> &neigh); ///< Hash info from other nodes into \b this
BlockBasic *getBlock(void) const { return bl; } ///< Get the underlying basic block which \b this overlays
hashword getHash(void) const { return hash[0]; } ///< Get the current hash value
};
/// \brief A \e feature representing a portion of the data-flow graph rooted at a particular Varnode
///
/// The feature recursively incorporates details about the Varnode, the PcodeOp that defined it and
/// its input Varnodes, up to a specific depth.
class VarnodeSignature : public Signature {
const Varnode *vn; ///< The root Varnode
public:
VarnodeSignature(const Varnode *v,hashword h) : Signature(h) { vn = v; } ///< Constructor
virtual void encode(Encoder &encoder) const;
virtual void printOrigin(ostream &s) const { vn->printRaw(s); }
};
/// \brief A \e feature rooted in a basic block
///
/// There are two forms of a block feature.
/// Form 1 contains only local control-flow information about the basic block.
/// Form 2 is a feature that combines two operations that occur in sequence within the block.
/// This form incorporates info about the operations and data-flow info about their inputs.
class BlockSignature : public Signature {
const BlockBasic *bl; ///< The root basic block
const PcodeOp *op1; ///< (Form 2)The first operation in sequence in the feature
const PcodeOp *op2; ///< (Form 2)The second operation in sequence in the feature
public:
BlockSignature(const BlockBasic *b,hashword h,
const PcodeOp *o1,const PcodeOp *o2) : Signature(h)
{ bl = b; op1 = o1; op2 = o2; } ///< Constructor
virtual void encode(Encoder &encoder) const;
virtual void printOrigin(ostream &s) const { bl->printHeader(s); }
};
/// \brief A feature representing 1 or more \e stand-alone copies in a basic block
///
/// A COPY operation is considered stand-alone if either a constant or a function input
/// is copied into a location that is then not read directly by the function.
/// These COPYs are incorporated into a single feature, which encodes the number
/// and type of COPYs but does not encode the order in which they occur within the block.
class CopySignature : public Signature {
const BlockBasic *bl; ///< The basic block containing the COPY
public:
CopySignature(const BlockBasic *b,hashword h)
: Signature(h) { bl = b; } ///< Constructor
virtual void encode(Encoder &encoder) const;
virtual void printOrigin(ostream &s) const;
};
/// \brief A container for collecting a set of features (a feature vector) for a single function
///
/// This manager handles:
/// - Configuring details of the signature generation process
/// - Establishing the function being signatured , via setCurrentFunction()
/// - Generating the features, via generate()
/// - Outputting the features, via encode() or print()
///
/// The manager can be reused for multiple functions.
class SigManager {
static uint4 settings; ///< Signature settings (across all managers)
vector<Signature *> sigs; ///< Feature set for the current function
void clearSignatures(void); ///< Clear all current Signature/feature objects from \b this manager
protected:
const Funcdata *fd; ///< Current function off of which we are generating features
void addSignature(Signature *sig) { sigs.push_back(sig); } ///< Add a new feature to the manager
public:
SigManager(void) { fd = (const Funcdata *)0; } ///< Constructor
virtual ~SigManager(void) { clearSignatures(); } ///< Destructor
virtual void clear(void); ///< Clear all current Signature/feature resources
virtual void initializeFromStream(istream &s)=0; ///< Read configuration information from a character stream
virtual void setCurrentFunction(const Funcdata *f); ///< Set the function used for (future) feature generation
virtual void generate(void)=0; ///< Generate all features for the current function
int4 numSignatures(void) const { return sigs.size(); } ///< Get the number of features currently generated
Signature *getSignature(int4 i) const { return sigs[i]; } ///< Get the i-th Signature/feature
void getSignatureVector(vector<uint4> &feature) const; ///< Get the feature vector as a simple array of hashes
hashword getOverallHash(void) const; ///< Combine all feature hashes into one overall hash
void sortByHash(void) { sort(sigs.begin(),sigs.end(),Signature::comparePtr); } ///< Sort all current features
void print(ostream &s) const; ///< Print a brief description of all current features to a stream
void encode(Encoder &encoder) const; ///< Encode all current features to the given stream
static uint4 getSettings(void) { return settings; } ///< Get the settings currently being used for signature generation
static void setSettings(uint4 newvalue); ///< Establish settings to use for future signature generation
};
/// \brief A manager for generating Signatures/features on function data-flow and control-flow
///
/// Features are extracted from the data-flow and control-flow graphs of the function.
/// The different feature types produced by this manager are:
/// - VarnodeSignature
/// - BlockSignature
/// - CopySignature
class GraphSigManager : public SigManager {
public:
/// Signature generation settings
enum Mods {
SIG_COLLAPSE_SIZE = 0x1, ///< Treat certain varnode sizes as the same
SIG_COLLAPSE_INDNOISE = 0x2, ///< Collapse varnodes that indirect copies of each other
// SIG_CALL_TERMINAL = 0x8, ///< Do not consider data-flow across CALLs
SIG_DONOTUSE_CONST = 0x10, ///< Do not use value of constant in hash
SIG_DONOTUSE_INPUT = 0x20, ///< Do not use (fact of) being an input in hash
SIG_DONOTUSE_PERSIST = 0x40 ///< Do not use (fact of) being a global in hash
};
private:
uint4 sigmods; ///< Current settings to use for signature generation
int4 maxiter; ///< Maximum number of iterations across data-flow graph
int4 maxblockiter; ///< Maximum number of block iterations
int4 maxvarnode; ///< Maximum number of Varnodes to signature
map<int4,SignatureEntry *> sigmap; ///< Map from Varnode to SignatureEntry overlay
map<int4,BlockSignatureEntry *> blockmap; ///< Map from basic block to BlockSignatureEntry overlay
void signatureIterate(void); ///< Do one iteration of hashing on the SignatureEntrys
void signatureBlockIterate(void); ///< Do one iteration of hashing on the BlockSignatureEntrys
void collectVarnodeSigs(void); ///< Generate the final feature for each Varnode from its SignatureEntry overlay
void collectBlockSigs(void); ///< Generate the final feature(s) for each basic block from its BlockSignatureEntry overlay
void varnodeClear(void); ///< Clear all SignatureEntry overlay objects
void blockClear(void); ///< Clear all BlockSignatureEntry overlay objects
void initializeBlocks(void); ///< Initialize BlockSignatureEntry overlays for the current function
void flipVarnodes(void); ///< Store off \e current Varnode hash values as \e previous hash values
void flipBlocks(void); ///< Store off \e current block hash values as \e previous hash values
public:
virtual void clear(void);
GraphSigManager(void); ///< Constructor
virtual ~GraphSigManager(void) { varnodeClear(); } ///< Destructor
void setMaxIteration(int4 val) { maxiter = val; } ///< Override the default iterations used for Varnode features
void setMaxBlockIteration(int4 val) { maxblockiter = val; } ///< Override the default iterations used for block features
void setMaxVarnode(int4 val) { maxvarnode = val; } ///< Set a maximum threshold for Varnodes in a function
virtual void initializeFromStream(istream &s);
virtual void setCurrentFunction(const Funcdata *f);
virtual void generate(void);
static bool testSettings(uint4 val); ///< Test for valid signature generation settings
};
/// \brief Given a Varnode, find its SignatureEntry overlay
///
/// \param vn is the given Varnode
/// \param sigMap is the map from Varnode to SignatureEntry
/// \return the corresponding SignatureEntry
inline SignatureEntry *SignatureEntry::mapToEntry(const Varnode *vn,const map<int4,SignatureEntry *> &sigMap)
{
map<int4,SignatureEntry *>::const_iterator iter;
iter = sigMap.find(vn->getCreateIndex());
return (*iter).second;
}
/// \brief Given a Varnode, find its SignatureEntry overlay, collapsing shadows
///
/// If the corresponding SignatureEntry shadows another, the shadowed SignatureEntry is returned instead.
/// \param vn is the given Varnode
/// \param sigMap is the map from Varnode to SignatureEntry
/// \return the corresponding SignatureEntry
inline SignatureEntry *SignatureEntry::mapToEntryCollapse(const Varnode *vn,const map<int4,SignatureEntry *> &sigMap)
{
SignatureEntry *res = mapToEntry(vn,sigMap);
if (res->shadow == (SignatureEntry *)0)
return res;
return res->shadow;
}
/// \brief Calculate a hash describing the size of a given Varnode
///
/// The hash is computed from the size of the Varnode in bytes, as an integer value.
/// Depending on the signature settings, the hash incorporates the full value, or
/// it may truncate a value greater than 4.
/// \param vn is the given Varnode
/// \param modifiers are the settings being used for signature generation
/// \return the hash value
inline hashword SignatureEntry::hashSize(Varnode *vn,uint4 modifiers)
{
hashword val = (hashword) vn->getSize(); // Size of varnode
if ((modifiers&GraphSigManager::SIG_COLLAPSE_SIZE)!=0) {
if (val>4) // Treat sizes 4 and larger the same
val = 4;
}
return val ^ (val<<7) ^ (val<<14) ^ (val<<21);
}
extern void simpleSignature(Funcdata *fd,Encoder &encoder); ///< Generate features for a single function
extern void debugSignature(Funcdata *fd,Encoder &encoder); ///< Generate features (with debug info) for a single function
} // End namespace ghidra
#endif

View file

@ -0,0 +1,119 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "signature_ghidra.hh"
#include "signature.hh"
namespace ghidra {
// Constructing the singleton registers the capability
GhidraSignatureCapability GhidraSignatureCapability::ghidraSignatureCapability;
void GhidraSignatureCapability::initialize(void)
{
commandmap["generateSignatures"] = new SignaturesAt(false);
commandmap["debugSignatures"] = new SignaturesAt(true);
commandmap["getSignatureSettings"] = new GetSignatureSettings();
commandmap["setSignatureSettings"] = new SetSignatureSettings();
}
void SignaturesAt::loadParameters(void)
{
GhidraCommand::loadParameters();
PackedDecode decoder(ghidra);
ArchitectureGhidra::readStringStream(sin,decoder);
addr = Address::decode(decoder); // Parse XML for functions address
}
void SignaturesAt::rawAction(void)
{
Funcdata *fd = ghidra->symboltab->getGlobalScope()->queryFunction(addr);
if (fd == (Funcdata *)0) {
ostringstream s;
s << "Bad address for signatures: " << addr.getShortcut();
addr.printRaw(s);
s << '\n';
throw LowlevelError(s.str());
}
if (!fd->isProcStarted()) {
string curname = ghidra->allacts.getCurrentName();
Action *sigact;
if (curname != "normalize")
sigact = ghidra->allacts.setCurrent("normalize");
else
sigact = ghidra->allacts.getCurrent();
#ifdef __REMOTE_SOCKET__
connect_to_console(fd);
#endif
sigact->reset(*fd);
sigact->perform(*fd);
if (curname != "normalize")
ghidra->allacts.setCurrent(curname);
}
sout.write("\000\000\001\016",4);
PackedEncode encoder(sout); // Write output XML directly to outstream
if (debug)
debugSignature(fd,encoder);
else
simpleSignature(fd,encoder);
sout.write("\000\000\001\017",4);
}
void GetSignatureSettings::rawAction(void)
{
sout.write("\000\000\001\016",4); // Write output XML directly to outstream
PackedEncode encoder(sout);
encoder.openElement(ELEM_SIGSETTINGS);
encoder.openElement(ELEM_MAJOR);
encoder.writeSignedInteger(ATTRIB_CONTENT, ArchitectureCapability::getMajorVersion());
encoder.closeElement(ELEM_MAJOR);
encoder.openElement(ELEM_MINOR);
encoder.writeSignedInteger(ATTRIB_CONTENT, ArchitectureCapability::getMinorVersion());
encoder.closeElement(ELEM_MINOR);
encoder.openElement(ELEM_SETTINGS);
encoder.writeUnsignedInteger(ATTRIB_CONTENT, SigManager::getSettings());
encoder.closeElement(ELEM_SETTINGS);
encoder.closeElement(ELEM_SIGSETTINGS);
sout.write("\000\000\001\017",4);
}
void SetSignatureSettings::loadParameters(void)
{
string settingString;
GhidraCommand::loadParameters();
ArchitectureGhidra::readStringStream(sin,settingString);
istringstream s(settingString);
s.unsetf(ios::dec | ios::hex | ios::oct);
s >> settings;
}
void SetSignatureSettings::rawAction(void)
{
if (GraphSigManager::testSettings(settings)) {
SigManager::setSettings(settings);
ArchitectureGhidra::writeStringStream(sout,"t");
}
else
ArchitectureGhidra::writeStringStream(sout,"f");
}
} // End namespace ghidra

View file

@ -0,0 +1,78 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file signature_ghidra.hh
/// \brief Feature/Signature generation commands that can be issued to the decompiler by the Ghidra client
#ifndef __GHIDRA_SIGNATURES_HH__
#define __GHIDRA_SIGNATURES_HH__
#include "ghidra_process.hh"
namespace ghidra {
/// \brief Signature command capability
///
/// This class is instantiated as a singleton and registers commands that the Ghidra client can issue
/// for generating feature vectors extracted from decompiled functions.
class GhidraSignatureCapability : public GhidraCapability {
static GhidraSignatureCapability ghidraSignatureCapability; ///< Singleton instance
GhidraSignatureCapability(void) { name = "signature"; } ///< Construct the singleton instance
GhidraSignatureCapability(const GhidraSignatureCapability &op2); ///< Not implemented
GhidraSignatureCapability &operator=(const GhidraSignatureCapability &op2); ///< Not implemented
public:
virtual void initialize(void);
};
/// \brief Command to generate a feature vector from a function's data-flow and control-flow graphs
///
/// The command expects to receive the entry point address of a function. The function is
/// decompiled using the "normalize" simplification style. Then features are extracted from the
/// resulting data-flow and control-flow graphs of the decompiled function. The features are
/// returned to the Ghidra client. The command can be instantiated in two forms. One form returns
/// a stream-lined encoding of the feature vector for more efficient transfers during normal operation.
/// The other form returns more descriptive meta-data with the features and is suitable for debugging
/// or exploring the feature generation process.
class SignaturesAt : public GhidraCommand {
bool debug; ///< True if the command should return verbose feature encodings
Address addr; ///< The entry point of the function to generate features for
virtual void loadParameters(void);
public:
SignaturesAt(bool dbg) { debug = dbg; } ///< Constructor specifying response format
virtual void rawAction(void);
};
/// \brief Command to retrieve current decompiler settings being used for feature/signature generation
///
/// The command returns an opaque integer indicating the state of boolean properties affecting
/// feature generation. The reserved value of 0 indicates that no settings have been provided to the
/// decompiler process.
class GetSignatureSettings : public GhidraCommand {
public:
virtual void rawAction(void);
};
/// \brief Command to provide the global settings used by the decompiler process during feature/signature generation
///
/// The command expects to receive an opaque integer value encoding the state of boolean properties affecting
/// feature generation. The command returns 't' indicating a valid setting was received or 'f' for an invalid setting.
class SetSignatureSettings : public GhidraCommand {
uint4 settings; ///< Opaque settings value being requested
virtual void loadParameters(void);
public:
virtual void rawAction(void);
};
} // End namespace ghidra
#endif