ghidra/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc
2020-01-31 17:21:49 -05:00

533 lines
16 KiB
C++

/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dynamic.hh"
#include "funcdata.hh"
#include "crc32.hh"
// Table for how to hash opcodes, lumps certain operators (i.e. ADD SUB PTRADD PTRSUB) into one hash
// zero indicates the operator should be skipped
uint4 DynamicHash::transtable[] = {
0,
CPUI_COPY, CPUI_LOAD, CPUI_STORE, CPUI_BRANCH, CPUI_CBRANCH, CPUI_BRANCHIND,
CPUI_CALL, CPUI_CALLIND, CPUI_CALLOTHER, CPUI_RETURN,
CPUI_INT_EQUAL, CPUI_INT_EQUAL, // NOT_EQUAL hashes same as EQUAL
CPUI_INT_SLESS, CPUI_INT_SLESS, // SLESSEQUAL hashes same as SLESS
CPUI_INT_LESS, CPUI_INT_LESS, // LESSEQUAL hashes same as LESS
CPUI_INT_ZEXT, CPUI_INT_SEXT,
CPUI_INT_ADD, CPUI_INT_ADD, // SUB hases same as ADD
CPUI_INT_CARRY, CPUI_INT_SCARRY, CPUI_INT_SBORROW,
CPUI_INT_2COMP, CPUI_INT_NEGATE,
CPUI_INT_XOR, CPUI_INT_AND, CPUI_INT_OR, CPUI_INT_MULT, // LEFT hases same as MULT
CPUI_INT_RIGHT, CPUI_INT_SRIGHT,
CPUI_INT_MULT, CPUI_INT_DIV, CPUI_INT_SDIV, CPUI_INT_REM, CPUI_INT_SREM,
CPUI_BOOL_NEGATE, CPUI_BOOL_XOR, CPUI_BOOL_AND, CPUI_BOOL_OR,
CPUI_FLOAT_EQUAL, CPUI_FLOAT_EQUAL, // NOTEQUAL hases same as EQUAL
CPUI_FLOAT_LESS, CPUI_FLOAT_LESS, // LESSEQUAL hashes same as EQUAL
0, // Unused slot - skip
CPUI_FLOAT_NAN,
CPUI_FLOAT_ADD, CPUI_FLOAT_DIV, CPUI_FLOAT_MULT, CPUI_FLOAT_ADD, // SUB hashes same as ADD
CPUI_FLOAT_NEG, CPUI_FLOAT_ABS, CPUI_FLOAT_SQRT,
CPUI_FLOAT_INT2FLOAT, CPUI_FLOAT_FLOAT2FLOAT, CPUI_FLOAT_TRUNC, CPUI_FLOAT_CEIL, CPUI_FLOAT_FLOOR,
CPUI_FLOAT_ROUND,
CPUI_MULTIEQUAL, CPUI_INDIRECT, CPUI_PIECE, CPUI_SUBPIECE,
0, // CAST is skipped
CPUI_INT_ADD, CPUI_INT_ADD, // PTRADD and PTRSUB hash same as INT_ADD
CPUI_SEGMENTOP, CPUI_CPOOLREF, CPUI_NEW, CPUI_INSERT, CPUI_EXTRACT, CPUI_POPCOUNT
};
/// These edges are sorted to provide consistency to the hash
/// The sort is based on the PcodeOp sequence number first, then the Varnode slot
/// \param op2 is the edge to compare \b this to
/// \return \b true if \b this should be ordered before the other edge
bool ToOpEdge::operator<(const ToOpEdge &op2) const
{
const Address &addr1( op->getSeqNum().getAddr() );
const Address &addr2( op2.op->getSeqNum().getAddr() );
if (addr1 != addr2)
return (addr1 < addr2);
uintm ord1 = op->getSeqNum().getOrder();
uintm ord2 = op2.op->getSeqNum().getOrder();
if (ord1 != ord2)
return (ord1 < ord2);
return (slot < op2.slot);
}
/// The hash accumulates:
/// - the Varnode slot
/// - the address of the PcodeOp
/// - the op-code of the PcodeOp
///
/// The op-codes are translated so that the hash is invariant under
/// common variants.
/// \param reg is the incoming hash accumulator value
/// \return the accumulator value with \b this edge folded in
uint4 ToOpEdge::hash(uint4 reg) const
{
reg = crc_update(reg,(uint4)slot);
reg = crc_update(reg,DynamicHash::transtable[op->code()]);
uintb val = op->getSeqNum().getAddr().getOffset();
int4 sz = op->getSeqNum().getAddr().getAddrSize();
for(int4 i=0;i<sz;++i) {
reg = crc_update(reg,(uint4)val); // Hash in the address
val >>= 8;
}
return reg;
}
/// When building the edge, certain p-code ops (CAST) are effectively ignored so that
/// we get the same hash whether or not these ops are present.
/// \param vn is the given Varnode
void DynamicHash::buildVnUp(const Varnode *vn)
{
const PcodeOp *op;
for(;;) {
if (!vn->isWritten()) return;
op = vn->getDef();
if (transtable[op->code()] != 0) break; // Do not ignore this operation
vn = op->getIn(0);
}
opedge.push_back(ToOpEdge(op,-1));
}
/// When building edges, certain p-code ops (CAST) are effectively ignored so that
/// we get the same hash whether or not these ops are present.
/// \param vn is the given Varnode
void DynamicHash::buildVnDown(const Varnode *vn)
{
list<PcodeOp *>::const_iterator iter;
uint4 insize = opedge.size();
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
const PcodeOp *op = *iter;
const Varnode *tmpvn = vn;
while(transtable[op->code()]==0) {
tmpvn = op->getOut();
if (tmpvn == (const Varnode *)0) {
op = (const PcodeOp *)0;
break;
}
op = tmpvn->loneDescend();
if (op == (const PcodeOp *)0) break;
}
if (op == (const PcodeOp *)0) continue;
int4 slot = op->getSlot(tmpvn);
opedge.push_back(ToOpEdge(op,slot));
}
if ((uint4)opedge.size()-insize > 1)
sort(opedge.begin()+insize,opedge.end());
}
/// \param op is the given PcodeOp thats already in the sub-graph
void DynamicHash::buildOpUp(const PcodeOp *op)
{
for(int4 i=0;i<op->numInput();++i) {
const Varnode *vn = op->getIn(i);
vnedge.push_back(vn);
}
}
/// \param op is the given PcodeOp thats already in the sub-graph
void DynamicHash::buildOpDown(const PcodeOp *op)
{
const Varnode *vn = op->getOut();
if (vn == (const Varnode *)0) return;
vnedge.push_back(vn);
}
void DynamicHash::gatherUnmarkedVn(void)
{
for(int4 i=0;i<vnedge.size();++i) {
const Varnode *vn = vnedge[i];
if (vn->isMark()) continue;
markvn.push_back(vn);
vn->setMark();
}
vnedge.clear();
}
void DynamicHash::gatherUnmarkedOp(void)
{
for(;opedgeproc<opedge.size();++opedgeproc) {
const PcodeOp *op = opedge[opedgeproc].getOp();
if (op->isMark()) continue;
markop.push_back(op);
op->setMark();
}
}
void DynamicHash::clear(void)
{
markop.clear();
markvn.clear();
vnedge.clear();
opedge.clear();
}
/// A sub-graph is formed extending from the given Varnode as the root. The
/// method specifies how the sub-graph is extended. In particular:
/// - Method 0 is extends to just immediate p-code ops reading or writing root
/// - Method 1 extends to one more level of inputs from method 0.
/// - Method 2 extends to one more level of outputs from method 0.
/// - Method 3 extends to inputs and outputs
///
/// The resulting hash and address can be obtained after calling this method
/// through getHash() and getAddress().
/// \param root is the given root Varnode
/// \param method is the hashing method to use: 0, 1, 2, 3
void DynamicHash::calcHash(const Varnode *root,uint4 method)
{
vnproc = 0;
opproc = 0;
opedgeproc = 0;
vnedge.push_back(root);
gatherUnmarkedVn();
for(uint4 i=vnproc;i<markvn.size();++i)
buildVnUp(markvn[i]);
for(;vnproc<markvn.size();++vnproc)
buildVnDown(markvn[vnproc]);
switch(method) {
case 0:
break;
case 1:
gatherUnmarkedOp();
for(;opproc<markop.size();++opproc)
buildOpUp(markop[opproc]);
gatherUnmarkedVn();
for(;vnproc<markvn.size();++vnproc)
buildVnUp(markvn[vnproc]);
break;
case 2:
gatherUnmarkedOp();
for(;opproc<markop.size();++opproc)
buildOpDown(markop[opproc]);
gatherUnmarkedVn();
for(;vnproc<markvn.size();++vnproc)
buildVnDown(markvn[vnproc]);
break;
case 3:
gatherUnmarkedOp();
for(;opproc<markop.size();++opproc)
buildOpUp(markop[opproc]);
gatherUnmarkedVn();
for(;vnproc<markvn.size();++vnproc)
buildVnDown(markvn[vnproc]);
break;
default:
break;
}
for(uint4 i=0;i<markvn.size();++i) // Clear our marks
markvn[i]->clearMark();
for(uint4 i=0;i<markop.size();++i)
markop[i]->clearMark();
if (opedge.size() == 0) {
hash = (uint8)0;
addrresult = Address();
return;
}
uint4 reg = 0x3ba0fe06; // Calculate the 32-bit hash
// Hash in information about the root
reg = crc_update(reg,(uint4)root->getSize());
if (root->isConstant()) {
uintb val = root->getOffset();
for(int4 i=0;i<root->getSize();++i) {
reg = crc_update(reg,(uint4)val);
val >>= 8;
}
}
for(uint4 i=0;i<opedge.size();++i)
reg = opedge[i].hash(reg);
// Build the final 64-bit hash
const PcodeOp *op = (const PcodeOp *)0;
int4 slot = 0;
uint4 ct;
bool attachedop = true;
for(ct=0;ct<opedge.size();++ct) { // Find op that is directly attached to -root- i.e. not a skip op
op = opedge[ct].getOp();
slot = opedge[ct].getSlot();
if ((slot < 0) && (op->getOut() == root)) break;
if ((slot >=0) && (op->getIn(slot)==root)) break;
}
if (ct == opedge.size()) { // If everything attached to the root was a skip op
op = opedge[0].getOp(); // Return op that is not attached directly
slot = opedge[0].getSlot();
attachedop = false;
}
// 15 bits unused
hash = attachedop ? 0 : 1;
hash <<= 4;
hash |= method; // 4-bits
hash <<= 7;
hash |= (uint8)op->code(); // 7-bits
hash <<= 5;
hash |= (uint8)(slot & 0x1f); // 5-bits
hash <<= 32;
hash |= (uint8)reg; // 32-bits for the neighborhood hash
addrresult = op->getSeqNum().getAddr();
}
/// Collect the set of Varnodes at the same address as the given Varnode.
/// Starting with method 0, increment the method and calculate hashes
/// of the Varnodes until the given Varnode has a unique hash within the set.
/// The resulting hash and address can be obtained after calling this method
/// through getHash() and getAddress().
///
/// In the rare situation that the last method still does not yield a unique hash,
/// the hash encodes:
/// - the smallest number of hash collisions
/// - the method that produced the smallest number of hash collisions
/// - the position of the root within the collision list
///
/// For most cases, this will still uniquely identify the root Varnode.
/// \param root is the given root Varnode
/// \param fd is the function (holding the data-flow graph)
void DynamicHash::uniqueHash(const Varnode *root,Funcdata *fd)
{
vector<Varnode *> vnlist;
vector<Varnode *> vnlist2;
vector<Varnode *> champion;
uint4 method;
uint8 tmphash;
Address tmpaddr;
uint4 maxduplicates = 8;
for(method=0;method<4;++method) {
clear();
calcHash(root,method);
if (hash == 0) return; // Can't get a good hash
tmphash = hash;
tmpaddr = addrresult;
vnlist.clear();
vnlist2.clear();
gatherFirstLevelVars(vnlist,fd,tmpaddr,tmphash);
for(uint4 i=0;i<vnlist.size();++i) {
Varnode *tmpvn = vnlist[i];
clear();
calcHash(tmpvn,method);
if (hash == tmphash) { // Hash collision
vnlist2.push_back(tmpvn);
if (vnlist2.size()>maxduplicates) break;
}
}
if (vnlist2.size() <= maxduplicates) {
if ((champion.size()==0)||(vnlist2.size() < champion.size())) {
champion = vnlist2;
if (champion.size()==1) break; // Current hash is unique
}
}
}
if (champion.empty()) {
hash = (uint8)0;
addrresult = Address(); // Couldn't find a unique hash
return;
}
uint4 total = (uint4)champion.size() - 1; // total is in range [0,maxduplicates-1]
uint4 pos;
for(pos=0;pos<=total;++pos)
if (champion[pos] == root) break;
if (pos > total) {
hash = (uint8)0;
addrresult = Address();
return;
}
hash = tmphash | ((uint8)pos << 49); // Store three bits for position with list of duplicate hashes
hash |= ((uint8)total << 52); // Store three bits for total number of duplicate hashes
addrresult = tmpaddr;
}
/// \brief Given an address and hash, find the unique matching Varnode
///
/// The method, number of collisions, and position are pulled out of the hash.
/// Hashes for the method are performed at Varnodes linked to the given address,
/// and the Varnode which matches the hash (and the position) is returned.
/// If the number of collisions for the hash does not match, this method
/// will not return a Varnode, even if the position looks valid.
/// \param fd is the function containing the data-flow
/// \param addr is the given address
/// \param h is the hash
/// \return the matching Varnode or NULL
Varnode *DynamicHash::findVarnode(const Funcdata *fd,const Address &addr,uint8 h)
{
uint4 method = getMethodFromHash(h);
uint4 total = getTotalFromHash(h);
uint4 pos = getPositionFromHash(h);
clearTotalPosition(h);
vector<Varnode *> vnlist;
vector<Varnode *> vnlist2;
gatherFirstLevelVars(vnlist,fd,addr,h);
for(uint4 i=0;i<vnlist.size();++i) {
Varnode *tmpvn = vnlist[i];
clear();
calcHash(tmpvn,method);
if (hash == h)
vnlist2.push_back(tmpvn);
}
if (total != vnlist2.size()) return (Varnode *)0;
return vnlist2[pos];
}
/// \brief Get the Varnodes immediately attached to PcodeOps at the given address
///
/// Varnodes can be either inputs or outputs to the PcodeOps. The op-code, slot, and
/// attachment boolean encoded in the hash are used to further filter the
/// PcodeOp and Varnode objects. Varnodes are passed back in sequence with a list container.
/// \param varlist is the container that will hold the matching Varnodes
/// \param fd is the function holding the data-flow
/// \param addr is the given address
/// \param h is the given hash
void DynamicHash::gatherFirstLevelVars(vector<Varnode *> &varlist,const Funcdata *fd,const Address &addr,uint8 h)
{
OpCode opc = getOpCodeFromHash(h);
int4 slot = getSlotFromHash(h);
bool isnotattached = getIsNotAttached(h);
PcodeOpTree::const_iterator iter = fd->beginOp(addr);
PcodeOpTree::const_iterator enditer = fd->endOp(addr);
while(iter!=enditer) {
PcodeOp *op = (*iter).second;
++iter;
if (op->code() != opc) continue;
if (slot <0) {
Varnode *vn = op->getOut();
if (vn != (Varnode *)0) {
if (isnotattached) { // If original varnode was not attached to (this) op
op = vn->loneDescend();
if (op != (PcodeOp *)0) {
if (transtable[op->code()] == 0) { // Check for skipped op
vn = op->getOut();
if (vn == (Varnode *)0) continue;
}
}
}
varlist.push_back(vn);
}
}
else if (slot < op->numInput()) {
Varnode *vn = op->getIn(slot);
if (isnotattached) {
op = vn->getDef();
if ((op != (PcodeOp *)0)&&(transtable[op->code()]==0))
vn = op->getIn(0);
}
varlist.push_back(vn);
}
}
}
/// The hash encodes the input \e slot the root Varnode was attached to in its PcodeOp.
/// \param h is the hash value
/// \return the slot index or -1 if the Varnode was attached as output
int4 DynamicHash::getSlotFromHash(uint8 h)
{
int4 res = (int4)((h>>32)&0x1f);
if (res == 31)
res = -1;
return res;
}
/// The hash encodes the \e method used to produce it.
/// \param h is the hash value
/// \return the method: 0, 1, 2, 3
uint4 DynamicHash::getMethodFromHash(uint8 h)
{
return (uint4)((h>>44)&0xf);
}
/// The hash encodes the op-code of the p-code op attached to the root Varnode
/// \param h is the hash value
/// \return the op-code
OpCode DynamicHash::getOpCodeFromHash(uint8 h)
{
return (OpCode)((h>>37)&0x7f);
}
/// The hash encodes the position of the root Varnode within the list of hash collisions
/// \param h is the hash value
/// \return the position of the root
uint4 DynamicHash::getPositionFromHash(uint8 h)
{
return (uint4)((h>>49)&7);
}
/// The hash encodes the total number of collisions for that hash
/// \param h is the hash value
/// \return the total number of collisions
uint4 DynamicHash::getTotalFromHash(uint8 h)
{
return ((uint4)((h>>52)&7)+1);
}
/// The hash encodes whether or not the root was directly attached to its PcodeOp
/// \param h is the hash value
/// \return \b true if the root was not attached
bool DynamicHash::getIsNotAttached(uint8 h)
{
return (((h>>48)&1)!=0);
}
/// The position and total collisions fields are set by the uniqueness and
/// need to be cleared when comparing raw hashes.
/// \param h is a reference to the hash to modify
void DynamicHash::clearTotalPosition(uint8 &h)
{
uint8 val = 0x3f;
val <<= 49;
val = ~val;
h &= val;
}