mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-04 02:09:44 +02:00
GP-4733 Heap strings
This commit is contained in:
parent
265f0072f3
commit
9a416877e8
11 changed files with 770 additions and 240 deletions
|
@ -4,9 +4,9 @@
|
|||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -18,27 +18,177 @@
|
|||
|
||||
namespace ghidra {
|
||||
|
||||
const int4 StringSequence::MINIMUM_SEQUENCE_LENGTH = 4;
|
||||
const int4 ArraySequence::MINIMUM_SEQUENCE_LENGTH = 4;
|
||||
|
||||
/// Initialize the sequence with the \b root operation which writes the earliest character in the memory region.
|
||||
/// \param fdata is the function containing the sequence
|
||||
/// \param ct is the data-type of an element in the array
|
||||
/// \param root is the PcodeOp to be interpreted as the root, copying the earliest element
|
||||
ArraySequence::ArraySequence(Funcdata &fdata,Datatype *ct,PcodeOp *root)
|
||||
:data(fdata)
|
||||
{
|
||||
rootOp = root;
|
||||
charType = ct;
|
||||
block = rootOp->getParent();
|
||||
numElements = 0;
|
||||
}
|
||||
|
||||
/// The output Varnodes themselves should be verified to only be read outside of the basic block.
|
||||
/// So effectively only LOADs, STOREs, and CALLs can really interfere. Check for these between the given ops.
|
||||
/// \param startOp is the starting op to check
|
||||
/// \param endOp is the ending op
|
||||
/// \return \b true if there is no interference, \b false if there is possible interference
|
||||
bool ArraySequence::interfereBetween(PcodeOp *startOp,PcodeOp *endOp)
|
||||
|
||||
{
|
||||
startOp = startOp->nextOp();
|
||||
while(startOp != endOp) {
|
||||
if (startOp->getEvalType() == PcodeOp::special) {
|
||||
OpCode opc = startOp->code();
|
||||
if (opc != CPUI_INDIRECT && opc != CPUI_CALLOTHER &&
|
||||
opc != CPUI_SEGMENTOP && opc != CPUI_CPOOLREF && opc != CPUI_NEW)
|
||||
return false;
|
||||
}
|
||||
startOp = startOp->nextOp();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Sort the ops based on block order. Starting with the root op, walk backward until an interfering
|
||||
/// gap is found or until the earliest op is reached. Similarly, walk forward until an interfering gap is found.
|
||||
/// Truncate the op array to be this smaller set. If too many were truncated, return \b false.
|
||||
/// \return \b true if a maximal set of ops is found containing at the least the minimum number required
|
||||
bool ArraySequence::checkInterference(void)
|
||||
|
||||
{
|
||||
sort(moveOps.begin(),moveOps.end()); // Sort ops based on basic block order
|
||||
int4 pos;
|
||||
for(pos=0;pos<moveOps.size();++pos) {
|
||||
if (moveOps[pos].op == rootOp) break;
|
||||
}
|
||||
if (pos == moveOps.size()) return false;
|
||||
PcodeOp *curOp = moveOps[pos].op;
|
||||
int4 startingPos,endingPos;
|
||||
for(startingPos=pos-1;startingPos>=0;--startingPos) {
|
||||
PcodeOp *prevOp = moveOps[startingPos].op;
|
||||
if (!interfereBetween(prevOp,curOp))
|
||||
break;
|
||||
curOp = prevOp;
|
||||
}
|
||||
startingPos += 1;
|
||||
curOp = moveOps[pos].op;
|
||||
for(endingPos=pos+1;endingPos < moveOps.size();++endingPos) {
|
||||
PcodeOp *nextOp = moveOps[endingPos].op;
|
||||
if (!interfereBetween(curOp,nextOp))
|
||||
break;
|
||||
curOp = nextOp;
|
||||
}
|
||||
if (endingPos- startingPos < MINIMUM_SEQUENCE_LENGTH)
|
||||
return false;
|
||||
if (startingPos > 0) {
|
||||
for(int4 i=startingPos;i<endingPos;++i) {
|
||||
moveOps[i-startingPos] = moveOps[i];
|
||||
}
|
||||
}
|
||||
moveOps.resize(endingPos-startingPos,WriteNode(0,(PcodeOp *)0,-1));
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Create an array of bytes being written into the memory region.
|
||||
/// Run through the ops and place their constant input (at given \b slot) into the array based on their
|
||||
/// offset, relative to the given root offset.
|
||||
/// If there are gaps in the byte array, remove any op that doesn't write to the contiguous
|
||||
/// region in front of the root. Return 0 if the contiguous region is too small.
|
||||
/// \param sz is the maximum size of the byte array
|
||||
/// \param slot is the slot to fetch input constants from
|
||||
/// \param rootOff is the root offset
|
||||
/// \param bigEndian is \b true if constant inputs have big endian encoding
|
||||
/// \return the number of characters in the contiguous region
|
||||
int4 ArraySequence::formByteArray(int4 sz,int4 slot,uint8 rootOff,bool bigEndian)
|
||||
|
||||
{
|
||||
byteArray.resize(sz,0);
|
||||
vector<uint1> used(sz,0);
|
||||
int4 elSize = charType->getSize();
|
||||
for(int4 i=0;i<moveOps.size();++i) {
|
||||
int4 bytePos = moveOps[i].offset - rootOff;
|
||||
if (bytePos + elSize > sz) continue;
|
||||
uint8 val = moveOps[i].op->getIn(slot)->getOffset();
|
||||
used[bytePos] = (val == 0) ? 2 : 1; // Mark byte as used, a 2 indicates a null terminator
|
||||
if (bigEndian) {
|
||||
for(int4 j=0;j<elSize;++j) {
|
||||
uint1 b = (val >> (elSize-1-j)*8) & 0xff;
|
||||
byteArray[bytePos+j] = b;
|
||||
}
|
||||
}
|
||||
else {
|
||||
for(int4 j=0;j<elSize;++j) {
|
||||
byteArray[bytePos+j] = (uint1)val;
|
||||
val >>= 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
int4 bigElSize = charType->getAlignSize();
|
||||
int4 maxEl = used.size() / bigElSize;
|
||||
int4 count;
|
||||
for(count=0;count<maxEl;count += 1) {
|
||||
uint1 val = used[ count * bigElSize ];
|
||||
if (val != 1) { // Count number of characters not including null terminator
|
||||
if (val == 2)
|
||||
count += 1; // Allow a single null terminator
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (count < MINIMUM_SEQUENCE_LENGTH)
|
||||
return 0;
|
||||
if (count != moveOps.size()) {
|
||||
uint8 maxOff = rootOff + count * bigElSize;
|
||||
vector<WriteNode> finalOps;
|
||||
for(int4 i=0;i<moveOps.size();++i) {
|
||||
if (moveOps[i].offset < maxOff)
|
||||
finalOps.push_back(moveOps[i]);
|
||||
}
|
||||
moveOps.swap(finalOps);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/// Use the \b charType to select the appropriate string copying function. If a match to the \b charType
|
||||
/// doesn't exist, use a built-in \b memcpy function. The id of the selected built-in function is returned.
|
||||
/// The value indicating either the number of characters or number of bytes being copied is also passed back.
|
||||
/// \param index will hold the number of elements being copied
|
||||
uint4 ArraySequence::selectStringCopyFunction(int4 &index)
|
||||
|
||||
{
|
||||
TypeFactory *types = data.getArch()->types;
|
||||
if (charType == types->getTypeChar(types->getSizeOfChar())) {
|
||||
index = numElements;
|
||||
return UserPcodeOp::BUILTIN_STRNCPY;
|
||||
}
|
||||
else if (charType == types->getTypeChar(types->getSizeOfWChar())) {
|
||||
index = numElements;
|
||||
return UserPcodeOp::BUILTIN_WCSNCPY;
|
||||
}
|
||||
index = numElements * charType->getAlignSize();
|
||||
return UserPcodeOp::BUILTIN_MEMCPY;
|
||||
}
|
||||
|
||||
/// \brief Set-up for recovering COPY ops into a memory range, given a Symbol and an Address being COPYed into
|
||||
///
|
||||
/// The SymbolEntry and Address are passed in, with an expected data-type. Check if there is an array
|
||||
/// of the data-type within the Symbol, and if so, initialize the memory range for the
|
||||
/// the sequence. Follow on with gathering PcodeOps and testing if the sequence is viable. If not, the
|
||||
/// the size the memory range will be set to zero.
|
||||
/// of the data-type within the Symbol, and if so, initialize the memory range for the the sequence.
|
||||
/// Follow on with gathering PcodeOps and testing if the sequence is viable. If not, the the size the memory
|
||||
/// range will be set to zero.
|
||||
/// \param fdata is the function containing the root COPY
|
||||
/// \param ct is the specific data-type for which there should be an array
|
||||
/// \param ent is the given Symbol
|
||||
/// \param root is the COPY holding the constant
|
||||
/// \param addr is the Address being COPYed into
|
||||
StringSequence::StringSequence(Funcdata &fdata,Datatype *ct,SymbolEntry *ent,PcodeOp *root,const Address &addr)
|
||||
: data(fdata)
|
||||
: ArraySequence(fdata,ct,root)
|
||||
{
|
||||
rootOp = root;
|
||||
rootAddr = addr;
|
||||
charType = ct;
|
||||
entry = ent;
|
||||
size = 0;
|
||||
if (entry->getAddr().getSpace() != addr.getSpace())
|
||||
return;
|
||||
int8 off = rootAddr.getOffset() - entry->getFirst();
|
||||
|
@ -47,43 +197,33 @@ StringSequence::StringSequence(Funcdata &fdata,Datatype *ct,SymbolEntry *ent,Pco
|
|||
if (rootOp->getIn(0)->getOffset() == 0)
|
||||
return;
|
||||
Datatype *parentType = entry->getSymbol()->getType();
|
||||
Datatype *lastType = (Datatype *)0;
|
||||
Datatype *arrayType = (Datatype *)0;
|
||||
int8 lastOff = 0;
|
||||
do {
|
||||
if (parentType == ct)
|
||||
break;
|
||||
lastType = parentType;
|
||||
arrayType = parentType;
|
||||
lastOff = off;
|
||||
parentType = parentType->getSubType(off, &off);
|
||||
} while(parentType != (Datatype *)0);
|
||||
if (parentType != ct || lastType == (Datatype *)0 || lastType->getMetatype() != TYPE_ARRAY)
|
||||
if (parentType != ct || arrayType == (Datatype *)0 || arrayType->getMetatype() != TYPE_ARRAY)
|
||||
return;
|
||||
startAddr = rootAddr - lastOff;
|
||||
size = ((TypeArray *)lastType)->numElements() * charType->getAlignSize();
|
||||
block = rootOp->getParent();
|
||||
if (collectCopyOps()) {
|
||||
if (checkCopyInterference()) {
|
||||
if (formByteArray()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
clear();
|
||||
}
|
||||
|
||||
void StringSequence::clear(void)
|
||||
|
||||
{
|
||||
size = 0;
|
||||
moveOps.clear();
|
||||
if (!collectCopyOps(arrayType->getSize()))
|
||||
return;
|
||||
if (!checkInterference())
|
||||
return;
|
||||
int4 arrSize = arrayType->getSize() - (int4)(rootAddr.getOffset() - startAddr.getOffset());
|
||||
numElements = formByteArray(arrSize,0,rootAddr.getOffset(),rootAddr.isBigEndian());
|
||||
}
|
||||
|
||||
/// The COPYs must be in the same basic block.
|
||||
/// If any COPY size does not match the \b copyType, return \b false.
|
||||
/// If there is a COPY to the array entry before rootVn, return \b false.
|
||||
/// Otherwise earlier COPYs are skipped. No COPYs are collected after the first gap (entry with no COPY to it).
|
||||
/// \param size is the number of bytes in the memory region
|
||||
/// \return \b true to indicate legal COPY ops of constants were recovered.
|
||||
bool StringSequence::collectCopyOps(void)
|
||||
bool StringSequence::collectCopyOps(int4 size)
|
||||
|
||||
{
|
||||
Address endAddr = startAddr + (size - 1); // startAddr - endAddr bounds the formal array
|
||||
|
@ -122,67 +262,6 @@ bool StringSequence::collectCopyOps(void)
|
|||
return (moveOps.size() >= MINIMUM_SEQUENCE_LENGTH);
|
||||
}
|
||||
|
||||
/// The output Varnodes themselves should be verified to only be read outside of the basic block.
|
||||
/// So effectively only LOADs, STOREs, and CALLs can really interfere. Check for these between the given ops.
|
||||
/// \param startOp is the is the starting COPY
|
||||
/// \param endOp is the ending COPY
|
||||
/// \return \b true if there is no interference, \b false if there is possible interference
|
||||
bool StringSequence::checkBetweenCopy(PcodeOp *startOp,PcodeOp *endOp)
|
||||
|
||||
{
|
||||
startOp = startOp->nextOp();
|
||||
while(startOp != endOp) {
|
||||
if (startOp->getEvalType() == PcodeOp::special) {
|
||||
OpCode opc = startOp->code();
|
||||
if (opc != CPUI_INDIRECT && opc != CPUI_CALLOTHER &&
|
||||
opc != CPUI_SEGMENTOP && opc != CPUI_CPOOLREF && opc != CPUI_NEW)
|
||||
return false;
|
||||
}
|
||||
startOp = startOp->nextOp();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Sort the COPY ops based on block order. Starting with the root COPY, walk backward until an interfering
|
||||
/// gap is found or until the earliest COPY is reached. Similarly, walk forward until an interfering gap is found.
|
||||
/// Truncate the COPY op array to be this smaller set. If too many were truncated, return \b false.
|
||||
/// \return \b true if a maximal set of COPYs is found containing at the least the minimum number required
|
||||
bool StringSequence::checkCopyInterference(void)
|
||||
|
||||
{
|
||||
sort(moveOps.begin(),moveOps.end()); // Sort COPYs based on basic block order
|
||||
int4 pos;
|
||||
for(pos=0;pos<moveOps.size();++pos) {
|
||||
if (moveOps[pos].op == rootOp) break;
|
||||
}
|
||||
if (pos == moveOps.size()) return false;
|
||||
PcodeOp *curOp = moveOps[pos].op;
|
||||
int4 startingPos,endingPos;
|
||||
for(startingPos=pos-1;startingPos>=0;--startingPos) {
|
||||
PcodeOp *prevOp = moveOps[startingPos].op;
|
||||
if (!checkBetweenCopy(prevOp,curOp))
|
||||
break;
|
||||
curOp = prevOp;
|
||||
}
|
||||
startingPos += 1;
|
||||
curOp = moveOps[pos].op;
|
||||
for(endingPos=pos+1;endingPos < moveOps.size();++endingPos) {
|
||||
PcodeOp *nextOp = moveOps[endingPos].op;
|
||||
if (!checkBetweenCopy(curOp,nextOp))
|
||||
break;
|
||||
curOp = nextOp;
|
||||
}
|
||||
if (endingPos- startingPos < MINIMUM_SEQUENCE_LENGTH)
|
||||
return false;
|
||||
if (startingPos > 0) {
|
||||
for(int4 i=startingPos;i<endingPos;++i) {
|
||||
moveOps[i-startingPos] = moveOps[i];
|
||||
}
|
||||
}
|
||||
moveOps.resize(endingPos-startingPos,WriteNode(0,(PcodeOp *)0,-1));
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief Construct a Varnode, with data-type, that acts as a pointer (in)to the Symbol to the root Address
|
||||
///
|
||||
/// First, a PTRSUB is built from the base register to the Symbol. Then depending on its data-type, additional
|
||||
|
@ -258,82 +337,6 @@ Varnode *StringSequence::constructTypedPointer(PcodeOp *insertPoint)
|
|||
return spacePtr;
|
||||
}
|
||||
|
||||
/// Create an array of bytes from the root Varnode to the extent of the memory region.
|
||||
/// Run through the COPYs and place their constant input into the array.
|
||||
/// If there are gaps in the byte array, remove any COPY that doesn't write to the contiguous
|
||||
/// region in front of the root Varnode. Return \b false if the contiguous region is too small.
|
||||
/// \return \b true if there exists enough COPYs that write into the region in front of the root Varnode without gaps
|
||||
bool StringSequence::formByteArray(void)
|
||||
|
||||
{
|
||||
int4 diff = rootAddr.getOffset() - startAddr.getOffset();
|
||||
byteArray.resize(size-diff,0);
|
||||
vector<uint1> used(size-diff,0);
|
||||
int4 elSize = charType->getSize();
|
||||
bool isBigEndian = rootAddr.isBigEndian();
|
||||
for(int4 i=0;i<moveOps.size();++i) {
|
||||
int4 bytePos = moveOps[i].offset - rootAddr.getOffset();
|
||||
if (used[bytePos] != 0)
|
||||
return false; // Multiple COPYs to same place
|
||||
uint8 val = moveOps[i].op->getIn(0)->getOffset();
|
||||
used[bytePos] = (val == 0) ? 2 : 1; // Mark byte as used, a 2 indicates a null terminator
|
||||
if (isBigEndian) {
|
||||
for(int4 j=0;j<elSize;++j) {
|
||||
uint1 b = (val >> (elSize-1-j)*8) & 0xff;
|
||||
byteArray[bytePos+j] = b;
|
||||
}
|
||||
}
|
||||
else {
|
||||
for(int4 j=0;j<elSize;++j) {
|
||||
byteArray[bytePos+j] = (uint1)val;
|
||||
val >>= 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
int4 bigElSize = charType->getAlignSize();
|
||||
int4 count;
|
||||
for(count=0;count<moveOps.size();++count) {
|
||||
uint1 val = used[ count * bigElSize ];
|
||||
if (val != 1) { // Count number of characters not including null terminator
|
||||
if (val == 2)
|
||||
count += 1; // Allow a single null terminator
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (count < MINIMUM_SEQUENCE_LENGTH)
|
||||
return false;
|
||||
if (count != moveOps.size()) {
|
||||
uintb maxOff = rootAddr.getOffset() + count * bigElSize;
|
||||
vector<WriteNode> finalOps;
|
||||
for(int4 i=0;i<moveOps.size();++i) {
|
||||
if (moveOps[i].offset < maxOff)
|
||||
finalOps.push_back(moveOps[i]);
|
||||
}
|
||||
moveOps.swap(finalOps);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Use the \b charType to select the appropriate string copying function. If a match to the \b charType
|
||||
/// doesn't exist, use a built-in \b memcpy function. The id of the selected built-in function is returned.
|
||||
/// The value indicating either the number of characters or number of bytes being copied is also passed back.
|
||||
/// \param index will hold the number of elements being copied
|
||||
uint4 StringSequence::selectStringCopyFunction(int4 &index)
|
||||
|
||||
{
|
||||
TypeFactory *types = data.getArch()->types;
|
||||
if (charType == types->getTypeChar(types->getSizeOfChar())) {
|
||||
index = moveOps.size();
|
||||
return UserPcodeOp::BUILTIN_STRNCPY;
|
||||
}
|
||||
else if (charType == types->getTypeChar(types->getSizeOfWChar())) {
|
||||
index = moveOps.size();
|
||||
return UserPcodeOp::BUILTIN_WCSNCPY;
|
||||
}
|
||||
index = moveOps.size() * charType->getSize();
|
||||
return UserPcodeOp::BUILTIN_MEMCPY;
|
||||
}
|
||||
|
||||
/// A built-in user-op that copies string data is created. Its first (destination) parameter is constructed
|
||||
/// as a pointer to the array holding the character data, which may be nested in other arrays or structures.
|
||||
/// The second (source) parameter is an \e internal \e string constructed from the \b byteArray. The
|
||||
|
@ -386,7 +389,7 @@ void StringSequence::removeForward(const WriteNode &curNode,map<PcodeOp *,list<W
|
|||
map<PcodeOp *,list<WriteNode>::iterator>::iterator miter = xref.find(op);
|
||||
if (miter != xref.end()) {
|
||||
// We have seen the PIECE twice
|
||||
uintb off = (*(*miter).second).offset;
|
||||
uint8 off = (*(*miter).second).offset;
|
||||
if (curNode.offset < off)
|
||||
off = curNode.offset;
|
||||
points.erase((*miter).second);
|
||||
|
@ -456,19 +459,382 @@ bool StringSequence::transform(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
void RuleStringSequence::getOpList(vector<uint4> &oplist) const
|
||||
/// From a starting pointer, backtrack through PTRADDs to a putative root Varnode pointer.
|
||||
/// \param initPtr is pointer Varnode into the root STORE
|
||||
void HeapSequence::findBasePointer(Varnode *initPtr)
|
||||
|
||||
{
|
||||
basePointer = initPtr;
|
||||
while(basePointer->isWritten()) {
|
||||
PcodeOp *op = basePointer->getDef();
|
||||
if (op->code() != CPUI_PTRADD) break;
|
||||
int8 sz = op->getIn(2)->getOffset();
|
||||
if (sz != charType->getAlignSize()) break;
|
||||
basePointer = op->getIn(0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Find STOREs with pointers derived from the \b basePointer and that are in the same
|
||||
/// basic block as the root STORE. The root STORE is \e not included in the resulting set.
|
||||
/// \param stores holds the collected STOREs
|
||||
void HeapSequence::findInitialStores(vector<PcodeOp *> &stores)
|
||||
|
||||
{
|
||||
Datatype *ptrType = rootOp->getIn(1)->getTypeReadFacing(rootOp);
|
||||
vector<Varnode *> ptradds;
|
||||
ptradds.push_back(basePointer);
|
||||
int4 pos = 0;
|
||||
int4 alignSize = charType->getAlignSize();
|
||||
while(pos < ptradds.size()) {
|
||||
Varnode *vn = ptradds[pos];
|
||||
pos += 1;
|
||||
list<PcodeOp *>::const_iterator iter;
|
||||
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
|
||||
PcodeOp *op = *iter;
|
||||
OpCode opc = op->code();
|
||||
if (opc == CPUI_PTRADD) {
|
||||
if (op->getIn(0) != vn) continue;
|
||||
if (op->getOut()->getTypeDefFacing() != ptrType) continue;
|
||||
if (op->getIn(2)->getOffset() != alignSize) continue;
|
||||
ptradds.push_back(op->getOut());
|
||||
}
|
||||
else if (opc == CPUI_STORE && op->getParent() == block && op != rootOp) {
|
||||
if (op->getIn(1) != vn) continue;
|
||||
stores.push_back(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Recursively walk an ADD tree from a given root, collecting offsets and non-constant elements
|
||||
///
|
||||
/// The constant offsets are returned as a final summed offset. Any non-constant Varnodes encountered are
|
||||
/// passed back in a list. Recursion is depth limited.
|
||||
/// \param vn is the given root of ADD tree
|
||||
/// \param nonConst will hold the list of non-constant Varnodes in the tree
|
||||
/// \param maxDepth is the maximum recursion depth
|
||||
/// \return the sum of all constant offsets
|
||||
uint8 HeapSequence::calcAddElements(Varnode *vn,vector<Varnode *> &nonConst,int4 maxDepth)
|
||||
|
||||
{
|
||||
if (vn->isConstant())
|
||||
return vn->getOffset();
|
||||
if (!vn->isWritten()||vn->getDef()->code() != CPUI_INT_ADD || maxDepth == 0) {
|
||||
nonConst.push_back(vn);
|
||||
return 0;
|
||||
}
|
||||
uint8 res = calcAddElements(vn->getDef()->getIn(0),nonConst,maxDepth-1);
|
||||
res += calcAddElements(vn->getDef()->getIn(1),nonConst,maxDepth-1);
|
||||
return res;
|
||||
}
|
||||
|
||||
/// \brief Calculate the offset and any non-constant additive elements between the given Varnode and the \b basePointer
|
||||
///
|
||||
/// Walk backward from the given Varnode thru PTRADDs and ADDs, summing any offsets encountered.
|
||||
/// Any non-constant Varnodes encountered in the path, that are not themselves a pointer, are passed back in a list.
|
||||
/// \param vn is the given Varnode to trace back to the \b basePointer
|
||||
/// \param nonConst will hold the list of non-constant Varnodes being passed back
|
||||
/// \return the sum off constant offsets on the path
|
||||
uint8 HeapSequence::calcPtraddOffset(Varnode *vn,vector<Varnode *> &nonConst)
|
||||
|
||||
{
|
||||
uint8 res = 0;
|
||||
while(vn != basePointer) {
|
||||
PcodeOp *ptradd = vn->getDef();
|
||||
uint8 off = calcAddElements(ptradd->getIn(1),nonConst,3);
|
||||
off *= (uint8)ptradd->getIn(2)->getOffset();
|
||||
res += off;
|
||||
vn = ptradd->getIn(0);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// \brief Determine if two sets of Varnodes are equal
|
||||
///
|
||||
/// The sets are passed in as arrays that are assumed sorted. If the sets contain the
|
||||
/// exact same Varnodes, \b true is returned, \b false otherwise.
|
||||
/// \param op1 is the first set
|
||||
/// \param op2 is the second set
|
||||
/// \return \b true if and only if the sets are equal
|
||||
bool HeapSequence::setsEqual(const vector<Varnode *> &op1,const vector<Varnode *> &op2)
|
||||
|
||||
{
|
||||
if (op1.size() != op2.size()) return false;
|
||||
for(int4 i=0;i<op1.size();++i) {
|
||||
if (op1[i] != op2[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \param op is the STORE to test
|
||||
/// \return \b true if the value being STOREd has the right size and type
|
||||
bool HeapSequence::testValue(PcodeOp *op)
|
||||
|
||||
{
|
||||
Varnode *vn = op->getIn(2);
|
||||
if (!vn->isConstant())
|
||||
return false;
|
||||
if (vn->getSize() != charType->getSize())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Walk forward from the base pointer to all STORE ops from that pointer, keeping track of the offset.
|
||||
/// The final set of STOREs will all be in the same basic block as the root STORE and have
|
||||
/// a greater than or equal offset. If the minimum sequence size is reached, \b true is returned.
|
||||
/// \return \b true if the minimum number of STOREs is collected.
|
||||
bool HeapSequence::collectStoreOps(void)
|
||||
|
||||
{
|
||||
vector<PcodeOp *> initStores;
|
||||
findInitialStores(initStores);
|
||||
if (initStores.size() + 1 < MINIMUM_SEQUENCE_LENGTH)
|
||||
return false;
|
||||
baseOffset = calcPtraddOffset(rootOp->getIn(1), nonConstAdds);
|
||||
vector<Varnode *> nonConstComp;
|
||||
for(int4 i=0;i<initStores.size();++i) {
|
||||
PcodeOp *op = initStores[i];
|
||||
nonConstComp.clear();
|
||||
uint8 curOffset = calcPtraddOffset(op->getIn(1), nonConstComp);
|
||||
if (setsEqual(nonConstAdds, nonConstComp)) {
|
||||
if (curOffset < baseOffset)
|
||||
return false; // Root is not the earliest STORE
|
||||
if (!testValue(op))
|
||||
return false;
|
||||
moveOps.emplace_back(curOffset - baseOffset,op,-1);
|
||||
}
|
||||
}
|
||||
moveOps.emplace_back(0,rootOp,-1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// A built-in user-op that copies string data is created. Its first (destination) parameter is
|
||||
/// the base pointer of the STOREs. with the base offset added to it.
|
||||
/// The second (source) parameter is an \e internal \e string constructed from the \b byteArray. The
|
||||
/// third parameter is the constant indicating the length of the string. The \e user-op is inserted just before
|
||||
/// the last PcodeOp moving a character into the memory region.
|
||||
/// \return the constructed PcodeOp representing the \b memcpy
|
||||
PcodeOp *HeapSequence::buildStringCopy(void)
|
||||
|
||||
{
|
||||
PcodeOp *insertPoint = moveOps[0].op; // Earliest STORE in the block
|
||||
Datatype *charPtrType = rootOp->getIn(1)->getTypeReadFacing(rootOp);
|
||||
int4 numBytes = numElements * charType->getSize();
|
||||
Architecture *glb = data.getArch();
|
||||
Varnode *srcPtr = data.getInternalString(byteArray.data(), numBytes, charPtrType, insertPoint);
|
||||
if (srcPtr == (Varnode *)0)
|
||||
return (PcodeOp *)0;
|
||||
Varnode *destPtr = basePointer;
|
||||
if (baseOffset != 0 || !nonConstAdds.empty()) { // Create the index Varnode
|
||||
Varnode *indexVn = (Varnode *)0;
|
||||
Datatype *intType = glb->types->getBase(basePointer->getSize(), TYPE_INT);
|
||||
if (nonConstAdds.size() > 0) { // Add in any non-constant Varnodes
|
||||
indexVn = nonConstAdds[0];
|
||||
for(int4 i=1;i<nonConstAdds.size();++i) {
|
||||
PcodeOp *addOp = data.newOp(2,insertPoint->getAddr());
|
||||
data.opSetOpcode(addOp, CPUI_INT_ADD);
|
||||
data.opSetInput(addOp, indexVn, 0);
|
||||
data.opSetInput(addOp, nonConstAdds[i],1);
|
||||
indexVn = data.newUniqueOut(indexVn->getSize(), addOp);
|
||||
indexVn->updateType(intType, false, false);
|
||||
data.opInsertBefore(addOp, insertPoint);
|
||||
}
|
||||
}
|
||||
if (baseOffset != 0) { // Add in any non-zero constant
|
||||
uint8 numEl = baseOffset / charType->getAlignSize();
|
||||
Varnode *cvn = data.newConstant(basePointer->getSize(), numEl);
|
||||
if (indexVn == (Varnode *)0)
|
||||
indexVn = cvn;
|
||||
else {
|
||||
PcodeOp *addOp = data.newOp(2,insertPoint->getAddr());
|
||||
data.opSetOpcode(addOp, CPUI_INT_ADD);
|
||||
data.opSetInput(addOp, indexVn, 0);
|
||||
data.opSetInput(addOp, cvn,1);
|
||||
indexVn = data.newUniqueOut(indexVn->getSize(), addOp);
|
||||
indexVn->updateType(intType, false, false);
|
||||
data.opInsertBefore(addOp, insertPoint);
|
||||
}
|
||||
}
|
||||
PcodeOp *ptrAdd = data.newOp(3,insertPoint->getAddr());
|
||||
data.opSetOpcode(ptrAdd, CPUI_PTRADD);
|
||||
destPtr = data.newUniqueOut(basePointer->getSize(), ptrAdd);
|
||||
data.opSetInput(ptrAdd,basePointer,0);
|
||||
data.opSetInput(ptrAdd,indexVn,1);
|
||||
data.opSetInput(ptrAdd,data.newConstant(basePointer->getSize(), charType->getAlignSize()),2);
|
||||
destPtr->updateType(charPtrType, false, false);
|
||||
data.opInsertBefore(ptrAdd, insertPoint);
|
||||
}
|
||||
int4 index;
|
||||
uint4 builtInId = selectStringCopyFunction(index);
|
||||
glb->userops.registerBuiltin(builtInId);
|
||||
PcodeOp *copyOp = data.newOp(4,insertPoint->getAddr());
|
||||
data.opSetOpcode(copyOp, CPUI_CALLOTHER);
|
||||
data.opSetInput(copyOp, data.newConstant(4, builtInId), 0);
|
||||
data.opSetInput(copyOp, destPtr, 1);
|
||||
data.opSetInput(copyOp, srcPtr, 2);
|
||||
Varnode *lenVn = data.newConstant(4,index);
|
||||
lenVn->updateType(copyOp->inputTypeLocal(3), false, false);
|
||||
data.opSetInput(copyOp, lenVn, 3);
|
||||
data.opInsertBefore(copyOp, insertPoint);
|
||||
return copyOp;
|
||||
}
|
||||
|
||||
/// \brief Gather INDIRECT ops attached to the final sequence STOREs and their input/output Varnode pairs
|
||||
///
|
||||
/// There may be chained INDIRECTs for a single storage location as it crosses multiple STORE ops. Only
|
||||
/// the initial input and final output are gathered.
|
||||
/// \param indirects will hold the INDIRECT ops attached to sequence STOREs
|
||||
/// \param pairs will hold Varnode pairs where the first in the pair is the input and the second is the output
|
||||
void HeapSequence::gatherIndirectPairs(vector<PcodeOp *> &indirects,vector<Varnode *> &pairs)
|
||||
|
||||
{
|
||||
for(int4 i=0;i<moveOps.size();++i) {
|
||||
PcodeOp *op = moveOps[i].op->previousOp();
|
||||
while(op != (PcodeOp *)0) {
|
||||
if (op->code() != CPUI_INDIRECT) break;
|
||||
op->setMark();
|
||||
indirects.push_back(op);
|
||||
op = op->previousOp();
|
||||
}
|
||||
}
|
||||
for(int4 i=0;i<indirects.size();++i) {
|
||||
PcodeOp *op = indirects[i];
|
||||
Varnode *outvn = op->getOut();
|
||||
bool hasUse = false;
|
||||
list<PcodeOp *>::const_iterator iter;
|
||||
for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) {
|
||||
PcodeOp *useOp = *iter;
|
||||
if (!useOp->isMark()) { // Look for read of outvn that is not by another STORE INDIRECT
|
||||
hasUse = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hasUse) { // If it has another use
|
||||
Varnode *invn = op->getIn(0);
|
||||
while(invn->isWritten()) {
|
||||
PcodeOp *defOp = invn->getDef(); // Trace back to input Varnode that is not defined by a STORE INDIRECT
|
||||
if (!defOp->isMark()) break;
|
||||
invn = defOp->getIn(0);
|
||||
}
|
||||
pairs.push_back(invn);
|
||||
pairs.push_back(outvn);
|
||||
data.opUnsetOutput(op);
|
||||
}
|
||||
}
|
||||
for(int4 i=0;i<indirects.size();++i)
|
||||
indirects[i]->clearMark();
|
||||
}
|
||||
|
||||
/// \brief Remove the given PcodeOp and any other ops that uniquely produce its inputs
|
||||
///
|
||||
/// The given PcodeOp is always removed. PcodeOps are recursively removed, if the only data-flow
|
||||
/// path of their output is to the given op, and they are not a CALL or are otherwise special.
|
||||
/// \param op is the given PcodeOp to remove
|
||||
/// \param scratch is scratch space for holding
|
||||
void HeapSequence::removeRecursive(PcodeOp *op,vector<PcodeOp *> &scratch)
|
||||
|
||||
{
|
||||
scratch.clear();
|
||||
scratch.push_back(op);
|
||||
int4 pos = 0;
|
||||
while(pos < scratch.size()) {
|
||||
op = scratch[pos];
|
||||
pos += 1;
|
||||
for(int4 i=0;i<op->numInput();++i) {
|
||||
Varnode *vn = op->getIn(i);
|
||||
if (!vn->isWritten() || vn->isAutoLive()) continue;
|
||||
if (vn->loneDescend() == (PcodeOp *)0) continue;
|
||||
PcodeOp *defOp = vn->getDef();
|
||||
if (defOp->isCall() || defOp->isIndirectSource()) continue;
|
||||
scratch.push_back(defOp);
|
||||
}
|
||||
data.opDestroy(op);
|
||||
}
|
||||
}
|
||||
|
||||
/// If the STORE pointer no longer has any other uses, remove the PTRADD producing it, recursively,
|
||||
/// up to the base pointer. INDIRECT ops surrounding any STORE that is removed are replaced with
|
||||
/// INDIRECTs around the user-op replacing the STOREs.
|
||||
/// \param replaceOp is the user-op replacement for the STOREs
|
||||
void HeapSequence::removeStoreOps(PcodeOp *replaceOp)
|
||||
|
||||
{
|
||||
vector<PcodeOp *> indirects;
|
||||
vector<Varnode *> indirectPairs;
|
||||
vector<PcodeOp *> scratch;
|
||||
gatherIndirectPairs(indirects, indirectPairs);
|
||||
for(int4 i=0;i<moveOps.size();++i) {
|
||||
PcodeOp *op = moveOps[i].op;
|
||||
removeRecursive(op, scratch);
|
||||
}
|
||||
for(int4 i=0;i<indirects.size();++i) {
|
||||
data.opDestroy(indirects[i]);
|
||||
}
|
||||
for(int4 i=0;i<indirectPairs.size();i+=2) {
|
||||
Varnode *invn = indirectPairs[i];
|
||||
Varnode *outvn = indirectPairs[i+1];
|
||||
PcodeOp *newInd = data.newOp(2,replaceOp->getAddr());
|
||||
data.opSetOpcode(newInd, CPUI_INDIRECT);
|
||||
data.opSetOutput(newInd,outvn);
|
||||
data.opSetInput(newInd,invn,0);
|
||||
data.opSetInput(newInd,data.newVarnodeIop(replaceOp),1);
|
||||
data.opInsertBefore(newInd, replaceOp);
|
||||
}
|
||||
}
|
||||
|
||||
/// \brief Constructor for the sequence of STORE ops
|
||||
///
|
||||
/// From a given STORE op, construct the sequence of STOREs off of the same root pointer.
|
||||
/// The STOREs must be in the same basic block. They can be out of order but must fill out a contiguous
|
||||
/// region of memory with a minimum number of character elements. The values being stored are accumulated
|
||||
/// in a byte array. The initial STORE must have the earliest offset in the sequence. If a sequence
|
||||
/// matching these conditions isn't found, the constructed object will be in an invalid state, and
|
||||
/// isInvalid() will return \b true.
|
||||
/// \param fdata is the function containing the sequence
|
||||
/// \param ct is the character data-type being STOREd
|
||||
/// \param root is the given (putative) initial STORE in the sequence
|
||||
HeapSequence::HeapSequence(Funcdata &fdata,Datatype *ct,PcodeOp *root)
|
||||
: ArraySequence(fdata,ct,root)
|
||||
{
|
||||
baseOffset = 0;
|
||||
findBasePointer(rootOp->getIn(1));
|
||||
if (!collectStoreOps())
|
||||
return;
|
||||
if (!checkInterference())
|
||||
return;
|
||||
int4 arrSize = moveOps.size() * charType->getAlignSize();
|
||||
bool bigEndian = moveOps[0].op->getIn(0)->getSpaceFromConst()->isBigEndian();
|
||||
numElements = formByteArray(arrSize, 2, 0, bigEndian);
|
||||
}
|
||||
|
||||
/// The user-op representing the string move is created and all the STORE ops are removed.
|
||||
/// If successful \b true is returned. The transform fails (only) if the accumulated bytes do not
|
||||
/// represent a legal unicode string.
|
||||
/// \return \b true if STOREs are successfully converted to a user-op with a string representation
|
||||
bool HeapSequence::transform(void)
|
||||
|
||||
{
|
||||
PcodeOp *memCpyOp = buildStringCopy();
|
||||
if (memCpyOp == (PcodeOp *)0)
|
||||
return false;
|
||||
removeStoreOps(memCpyOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RuleStringCopy::getOpList(vector<uint4> &oplist) const
|
||||
|
||||
{
|
||||
oplist.push_back(CPUI_COPY);
|
||||
}
|
||||
|
||||
/// \class RuleStringSequence
|
||||
/// \brief Replace a sequence of COPY ops moving single characters with a \b memcpy CALLOTHER copying a whole string
|
||||
/// \class RuleStringCopy
|
||||
/// \brief Replace a sequence of COPY ops moving single characters with a CALLOTHER copying a whole string
|
||||
///
|
||||
/// Given a root COPY of a constant character, search for other COPYs in the same basic block that form a sequence
|
||||
/// of characters that can be interpreted as a single string. Replace the sequence of COPYs with a single
|
||||
/// \b memcpy CALLOTHER.
|
||||
int4 RuleStringSequence::applyOp(PcodeOp *op,Funcdata &data)
|
||||
/// \b memcpy or \b wcsncpy user-op.
|
||||
int4 RuleStringCopy::applyOp(PcodeOp *op,Funcdata &data)
|
||||
|
||||
{
|
||||
if (!op->getIn(0)->isConstant()) return 0; // Constant
|
||||
|
@ -488,4 +854,34 @@ int4 RuleStringSequence::applyOp(PcodeOp *op,Funcdata &data)
|
|||
return 1;
|
||||
}
|
||||
|
||||
void RuleStringStore::getOpList(vector<uint4> &oplist) const
|
||||
|
||||
{
|
||||
oplist.push_back(CPUI_STORE);
|
||||
}
|
||||
|
||||
/// \class RuleStringStore
|
||||
/// \brief Replace a sequence of STORE ops moving single characters with a CALLOTHER copying a whole string
|
||||
///
|
||||
/// Given a root STORE of a constant character, search for other STOREs in the same basic block off of the
|
||||
/// same base pointer that form a sequence a sequence that can be interpreted as a single string. Replace
|
||||
/// the STOREs with a single \b strncpy or \b wcsncpy user-op.
|
||||
int4 RuleStringStore::applyOp(PcodeOp *op,Funcdata &data)
|
||||
|
||||
{
|
||||
if (!op->getIn(2)->isConstant()) return 0; // Constant
|
||||
Varnode *ptrvn = op->getIn(1);
|
||||
Datatype *ct = ptrvn->getTypeReadFacing(op);
|
||||
if (ct->getMetatype() != TYPE_PTR) return 0;
|
||||
ct = ((TypePointer *)ct)->getPtrTo();
|
||||
if (!ct->isCharPrint()) return 0; // Copied to a "char" data-type Varnode
|
||||
if (ct->isOpaqueString()) return 0;
|
||||
HeapSequence sequence(data,ct,op);
|
||||
if (!sequence.isValid())
|
||||
return 0;
|
||||
if (!sequence.transform())
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // End namespace ghidra
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -22,43 +22,53 @@
|
|||
|
||||
namespace ghidra {
|
||||
|
||||
/// \brief A class for collecting sequences of COPY ops that hold string data
|
||||
/// \brief A sequence of PcodeOps that move data in-to/out-of an array data-type.
|
||||
///
|
||||
/// Given a starting Address and a Symbol with a character array as a component, a class instance collects
|
||||
/// a maximal set of COPY ops that can be treated as writing a single string into memory. Then, if the
|
||||
/// transform() method is called, an explicit string is constructed, and the COPYs are replaced with a
|
||||
/// \b memcpy CALLOTHER that takes the string as its source input.
|
||||
class StringSequence {
|
||||
/// A container for a sequence of PcodeOps within a basic block where we are trying to determine if the sequence
|
||||
/// can be replaced with a single \b memcpy style user-op.
|
||||
class ArraySequence {
|
||||
public:
|
||||
static const int4 MINIMUM_SEQUENCE_LENGTH; ///< Minimum number of sequential characters to trigger replacement with CALLOTHER
|
||||
/// \brief Helper class holding a data-flow edge and optionally a memory offset being COPYed into or from
|
||||
class WriteNode {
|
||||
friend class StringSequence;
|
||||
uintb offset; ///< Offset into the memory region
|
||||
public:
|
||||
uint8 offset; ///< Offset into the memory region
|
||||
PcodeOp *op; ///< PcodeOp moving into/outof memory region
|
||||
int4 slot; ///< either input slot (>=0) or output (-1)
|
||||
public:
|
||||
WriteNode(uintb off,PcodeOp *o,int4 sl) { offset = off; op = o; slot = sl; } ///< Constructor
|
||||
|
||||
WriteNode(uint8 off,PcodeOp *o,int4 sl) { offset = off; op = o; slot = sl; } ///< Constructor
|
||||
/// \brief Compare two nodes by their order within a basic block
|
||||
bool operator<(const WriteNode &node2) const { return op->getSeqNum().getOrder() < node2.op->getSeqNum().getOrder(); }
|
||||
/// \brief Compare two PcodeOps based on the position of the element they copy within the sequence
|
||||
static bool compareOffset(const WriteNode &a,const WriteNode &b) { return a.offset < b.offset; }
|
||||
};
|
||||
private:
|
||||
Funcdata &data; ///< Function being analyzed
|
||||
protected:
|
||||
Funcdata &data; ///< The function containing the sequence
|
||||
PcodeOp *rootOp; ///< The root PcodeOp
|
||||
Datatype *charType; ///< Element data-type
|
||||
BlockBasic *block; ///< Basic block containing all the COPY/STORE ops
|
||||
int4 numElements; ///< Number of elements in the final sequence
|
||||
vector<WriteNode> moveOps; ///< COPY/STORE into the array memory region
|
||||
vector<uint1> byteArray; ///< Constants collected in a single array
|
||||
static bool interfereBetween(PcodeOp *startOp,PcodeOp *endOp); ///< Check for interfering ops between the two given ops
|
||||
bool checkInterference(void); ///< Find maximal set of ops containing the root with no interfering ops in between
|
||||
int4 formByteArray(int4 sz,int4 slot,uint8 rootOff,bool bigEndian); ///< Put constant values from COPYs into a single byte array
|
||||
uint4 selectStringCopyFunction(int4 &index); ///< Pick either strncpy, wcsncpy, or memcpy function used to copy string
|
||||
public:
|
||||
ArraySequence(Funcdata &fdata,Datatype *ct,PcodeOp *root); ///< Constructor
|
||||
bool isValid(void) const { return numElements != 0; } ///< Return \b true if sequence is found
|
||||
};
|
||||
|
||||
/// \brief A class for collecting sequences of COPY ops writing characters to the same string
|
||||
///
|
||||
/// Given a starting Address and a Symbol with a character array as a component, a class instance collects
|
||||
/// a maximal set of COPY ops that can be treated as writing a single string into memory. Then, if the
|
||||
/// transform() method is called, an explicit string is constructed, and the COPYs are replaced with a
|
||||
/// \b strncpy or similar CALLOTHER that takes the string as its source input.
|
||||
class StringSequence : public ArraySequence {
|
||||
Address rootAddr; ///< Address within the memory region associated with the root PcodeOp
|
||||
Address startAddr; ///< Starting address of the memory region
|
||||
SymbolEntry *entry; ///< Symbol at the root Address
|
||||
int4 size; ///< Size of the memory region in bytes
|
||||
Datatype *charType; ///< Element data-type
|
||||
BlockBasic *block; ///< Basic block containing all the COPY ops
|
||||
vector<WriteNode> moveOps; ///< COPYs into the array memory region
|
||||
vector<uint1> byteArray; ///< Constants collected in a single array
|
||||
bool collectCopyOps(void); ///< Collect ops COPYing constants into the memory region
|
||||
bool checkBetweenCopy(PcodeOp *startOp,PcodeOp *endOp); ///< Check for interfering ops between the two given COPYs
|
||||
bool checkCopyInterference(void); ///< Find maximal set of COPYs containing the root COPY with no interfering ops in between
|
||||
bool formByteArray(void); ///< Put constant values from COPYs into a single byte array
|
||||
uint4 selectStringCopyFunction(int4 &index); ///< Pick either strncpy, wcsncpy, or memcpy function used to copy string
|
||||
bool collectCopyOps(int size); ///< Collect ops COPYing constants into the memory region
|
||||
PcodeOp *buildStringCopy(void); ///< Build the strncpy,wcsncpy, or memcpy function with string as input
|
||||
static void removeForward(const WriteNode &curNode,map<PcodeOp *,list<WriteNode>::iterator> &xref,
|
||||
list<WriteNode> &points,vector<WriteNode> &deadOps);
|
||||
|
@ -66,17 +76,51 @@ private:
|
|||
Varnode *constructTypedPointer(PcodeOp *insertPoint);
|
||||
public:
|
||||
StringSequence(Funcdata &fdata,Datatype *ct,SymbolEntry *ent,PcodeOp *root,const Address &addr);
|
||||
bool isValid(void) const { return size != 0; } ///< Return \b true if COPYs are found that look like a valid string
|
||||
void clear(void); ///< Clear any resources used and mark the sequence as invalid
|
||||
bool transform(void); ///< Transform COPYs into a single memcpy user-op
|
||||
};
|
||||
|
||||
class RuleStringSequence : public Rule {
|
||||
/// \brief A sequence of STORE operations writing characters through the same string pointer
|
||||
///
|
||||
/// Given an initial STORE, a class instance collects a maximal set of STORE ops that can be treated as writing
|
||||
/// a single string into memory. If the transform() method is called, an explicit string is constructed, and
|
||||
/// the STOREs are replaced with a \b strncpy or similar CALLOTHER that takes the string as its source input.
|
||||
class HeapSequence : public ArraySequence {
|
||||
Varnode *basePointer; ///< Pointer that sequence is stored to
|
||||
uint8 baseOffset; ///< Offset relative to pointer to root STORE
|
||||
vector<Varnode *> nonConstAdds; ///< non-constant Varnodes being added into pointer calculation
|
||||
void findBasePointer(Varnode *initPtr); ///< Find the base pointer for the sequence
|
||||
void findInitialStores(vector<PcodeOp *> &stores);
|
||||
static uint8 calcAddElements(Varnode *vn,vector<Varnode *> &nonConst,int4 maxDepth);
|
||||
uint8 calcPtraddOffset(Varnode *vn,vector<Varnode *> &nonConst);
|
||||
static bool setsEqual(const vector<Varnode *> &op1,const vector<Varnode *> &op2);
|
||||
bool testValue(PcodeOp *op); ///< Test if a STORE value has the matching form for the sequence
|
||||
bool collectStoreOps(void); ///< Collect ops STOREing into a memory region from the same root pointer
|
||||
PcodeOp *buildStringCopy(void); ///< Build the strncpy,wcsncpy, or memcpy function with string as input
|
||||
void gatherIndirectPairs(vector<PcodeOp *> &indirects,vector<Varnode *> &pairs);
|
||||
void removeRecursive(PcodeOp *op,vector<PcodeOp *> &scratch);
|
||||
void removeStoreOps(PcodeOp *replaceOp); ///< Remove all STORE ops from the basic block
|
||||
public:
|
||||
RuleStringSequence(const string &g) : Rule( g, 0, "stringsequence") {} ///< Constructor
|
||||
HeapSequence(Funcdata &fdata,Datatype *ct,PcodeOp *root);
|
||||
bool transform(void); ///< Transform STOREs into a single memcpy user-op
|
||||
};
|
||||
|
||||
class RuleStringCopy : public Rule {
|
||||
public:
|
||||
RuleStringCopy(const string &g) : Rule( g, 0, "stringcopy") {} ///< Constructor
|
||||
virtual Rule *clone(const ActionGroupList &grouplist) const {
|
||||
if (!grouplist.contains(getGroup())) return (Rule *)0;
|
||||
return new RuleStringSequence(getGroup());
|
||||
return new RuleStringCopy(getGroup());
|
||||
}
|
||||
virtual void getOpList(vector<uint4> &oplist) const;
|
||||
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
|
||||
};
|
||||
|
||||
class RuleStringStore : public Rule {
|
||||
public:
|
||||
RuleStringStore(const string &g) : Rule( g, 0, "stringstore") {} ///< Constructor
|
||||
virtual Rule *clone(const ActionGroupList &grouplist) const {
|
||||
if (!grouplist.contains(getGroup())) return (Rule *)0;
|
||||
return new RuleStringStore(getGroup());
|
||||
}
|
||||
virtual void getOpList(vector<uint4> &oplist) const;
|
||||
virtual int4 applyOp(PcodeOp *op,Funcdata &data);
|
||||
|
|
|
@ -5597,7 +5597,8 @@ void ActionDatabase::universalAction(Architecture *conf)
|
|||
actcleanup->addRule( new RuleSplitCopy("splitcopy") );
|
||||
actcleanup->addRule( new RuleSplitLoad("splitpointer") );
|
||||
actcleanup->addRule( new RuleSplitStore("splitpointer") );
|
||||
actcleanup->addRule( new RuleStringSequence("constsequence"));
|
||||
actcleanup->addRule( new RuleStringCopy("constsequence"));
|
||||
actcleanup->addRule( new RuleStringStore("constsequence"));
|
||||
}
|
||||
act->addAction( actcleanup );
|
||||
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -5126,7 +5126,7 @@ void FuncCallSpecs::commitNewInputs(Funcdata &data,vector<Varnode *> &newinput)
|
|||
/// Any other intersecting outputs are updated to be either truncations or extensions of this.
|
||||
/// Any active trials are updated,
|
||||
/// \param data is the calling function
|
||||
/// \param newout is the list of intersecting outputs
|
||||
/// \param newoutput is the list of intersecting outputs
|
||||
void FuncCallSpecs::commitNewOutputs(Funcdata &data,vector<Varnode *> &newoutput)
|
||||
|
||||
{
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -399,8 +399,12 @@ int4 StringManager::getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4
|
|||
}
|
||||
else
|
||||
return -1;
|
||||
if (codepoint >= 0xd800 && codepoint <= 0xdfff)
|
||||
return -1; // Reserved for surrogates, invalid codepoints
|
||||
if (codepoint >= 0xd800) {
|
||||
if (codepoint > 0x10ffff) // Bigger than maximum codepoint
|
||||
return -1;
|
||||
if (codepoint <= 0xdfff)
|
||||
return -1; // Reserved for surrogates, invalid codepoints
|
||||
}
|
||||
skip = sk;
|
||||
return codepoint;
|
||||
}
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -116,7 +116,7 @@ SubvariableFlow::ReplaceVarnode *SubvariableFlow::setReplacement(Varnode *vn,uin
|
|||
return (ReplaceVarnode *)0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (vn->isInput()) { // Must be careful with inputs
|
||||
// Inputs must come in from the right register/memory
|
||||
if (bitsize < 8) return (ReplaceVarnode *)0; // Dont create input flag
|
||||
|
@ -408,7 +408,7 @@ bool SubvariableFlow::traceForward(ReplaceVarnode *rvn)
|
|||
hcount += 1;
|
||||
break;
|
||||
case CPUI_INT_ADD:
|
||||
if ((rvn->mask & 1)==0)
|
||||
if ((rvn->mask & 1)==0)
|
||||
return false; // Cannot account for carry
|
||||
rop = createOpDown(CPUI_INT_ADD,2,op,rvn,slot);
|
||||
if (!createLink(rop,rvn->mask,-1,outvn)) return false;
|
||||
|
@ -779,7 +779,7 @@ bool SubvariableFlow::traceBackward(ReplaceVarnode *rvn)
|
|||
default:
|
||||
break; // Everything else we abort
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1749,7 +1749,7 @@ bool SplitFlow::doTrace(void)
|
|||
/// If \b pointer Varnode is written by an INT_ADD, PTRSUB, or PTRADD from a another pointer
|
||||
/// to a structure or array, update \b pointer Varnode, \b baseOffset, and \b ptrType to this.
|
||||
/// \return \b true if \b pointer was successfully updated
|
||||
bool SplitDatatype::RootPointer::backUpPointer(void)
|
||||
bool SplitDatatype::RootPointer::backUpPointer(Datatype *impliedBase)
|
||||
|
||||
{
|
||||
if (!pointer->isWritten())
|
||||
|
@ -1767,8 +1767,10 @@ bool SplitDatatype::RootPointer::backUpPointer(void)
|
|||
return false;
|
||||
Datatype *parent = ((TypePointer *)ct)->getPtrTo();
|
||||
type_metatype meta = parent->getMetatype();
|
||||
if (meta != TYPE_STRUCT && meta != TYPE_ARRAY)
|
||||
return false;
|
||||
if (meta != TYPE_STRUCT && meta != TYPE_ARRAY) {
|
||||
if (opc != CPUI_PTRADD || parent != impliedBase)
|
||||
return false;
|
||||
}
|
||||
ptrType = (TypePointer *)ct;
|
||||
int4 off = (int4)cvn->getOffset();
|
||||
if (opc == CPUI_PTRADD)
|
||||
|
@ -1789,8 +1791,13 @@ bool SplitDatatype::RootPointer::backUpPointer(void)
|
|||
bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType)
|
||||
|
||||
{
|
||||
Datatype *impliedBase = (Datatype *)0;
|
||||
if (valueType->getMetatype() == TYPE_PARTIALSTRUCT)
|
||||
valueType = ((TypePartialStruct *)valueType)->getParent();
|
||||
else if (valueType->getMetatype() == TYPE_ARRAY) {
|
||||
valueType = ((TypeArray *)valueType)->getBase();
|
||||
impliedBase = valueType;
|
||||
}
|
||||
loadStore = op;
|
||||
baseOffset = 0;
|
||||
firstPointer = pointer = op->getIn(1);
|
||||
|
@ -1799,14 +1806,16 @@ bool SplitDatatype::RootPointer::find(PcodeOp *op,Datatype *valueType)
|
|||
return false;
|
||||
ptrType = (TypePointer *)ct;
|
||||
if (ptrType->getPtrTo() != valueType) {
|
||||
if (!backUpPointer())
|
||||
if (impliedBase != (Datatype *)0)
|
||||
return false;
|
||||
if (!backUpPointer(impliedBase))
|
||||
return false;
|
||||
if (ptrType->getPtrTo() != valueType)
|
||||
return false;
|
||||
}
|
||||
for(int4 i=0;i<2;++i) {
|
||||
for(int4 i=0;i<3;++i) {
|
||||
if (pointer->isAddrTied() || pointer->loneDescend() == (PcodeOp *)0) break;
|
||||
if (!backUpPointer())
|
||||
if (!backUpPointer(impliedBase))
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
|
@ -1929,6 +1938,13 @@ bool SplitDatatype::testDatatypeCompatibility(Datatype *inBase,Datatype *outBase
|
|||
return false;
|
||||
if (!inConstant && inBase == outBase && inBase->getMetatype() == TYPE_STRUCT)
|
||||
return false; // Don't split a whole structure unless it is getting initialized from a constant
|
||||
if (isLoadStore && outCategory == 1 && inBase->getMetatype() == TYPE_ARRAY)
|
||||
return false; // Don't split array pointer writing into primitive
|
||||
if (isLoadStore && inCategory == 1 && !inConstant && outBase->getMetatype() == TYPE_ARRAY)
|
||||
return false; // Don't split primitive into an array pointer, TODO: We could check if primitive is defined by PIECE
|
||||
if (isLoadStore && inCategory == 0 && outCategory == 0 && !inConstant &&
|
||||
inBase->getMetatype() == TYPE_ARRAY && outBase->getMetatype() == TYPE_ARRAY)
|
||||
return false; // Don't split copies between arrays
|
||||
bool inHole;
|
||||
bool outHole;
|
||||
int4 curOff = 0;
|
||||
|
@ -2322,6 +2338,7 @@ SplitDatatype::SplitDatatype(Funcdata &func)
|
|||
types = glb->types;
|
||||
splitStructures = (glb->split_datatype_config & OptionSplitDatatypes::option_struct) != 0;
|
||||
splitArrays = (glb->split_datatype_config & OptionSplitDatatypes::option_array) != 0;
|
||||
isLoadStore = false;
|
||||
}
|
||||
|
||||
/// Based on the input and output data-types, determine if and how the given COPY operation
|
||||
|
@ -2372,6 +2389,7 @@ bool SplitDatatype::splitCopy(PcodeOp *copyOp,Datatype *inType,Datatype *outType
|
|||
bool SplitDatatype::splitLoad(PcodeOp *loadOp,Datatype *inType)
|
||||
|
||||
{
|
||||
isLoadStore = true;
|
||||
Varnode *outVn = loadOp->getOut();
|
||||
PcodeOp *copyOp = (PcodeOp *)0;
|
||||
if (!outVn->isAddrTied())
|
||||
|
@ -2423,6 +2441,7 @@ bool SplitDatatype::splitLoad(PcodeOp *loadOp,Datatype *inType)
|
|||
bool SplitDatatype::splitStore(PcodeOp *storeOp,Datatype *outType)
|
||||
|
||||
{
|
||||
isLoadStore = true;
|
||||
Varnode *inVn = storeOp->getIn(2);
|
||||
PcodeOp *loadOp = (PcodeOp *)0;
|
||||
Datatype *inType = (Datatype *)0;
|
||||
|
@ -2538,10 +2557,17 @@ Datatype *SplitDatatype::getValueDatatype(PcodeOp *loadStore,int4 size,TypeFacto
|
|||
baseOffset = 0;
|
||||
}
|
||||
type_metatype metain = resType->getMetatype();
|
||||
if (metain != TYPE_STRUCT && metain == TYPE_ARRAY)
|
||||
return (Datatype *)0;
|
||||
return tlst->getExactPiece(resType, baseOffset, size);
|
||||
}
|
||||
if (resType->getAlignSize() < size) {
|
||||
if (metain == TYPE_INT || metain == TYPE_UINT || metain == TYPE_BOOL || metain == TYPE_FLOAT || metain == TYPE_PTR) {
|
||||
if ((size % resType->getAlignSize()) == 0) {
|
||||
int4 numEl = size / resType->getAlignSize();
|
||||
return tlst->getTypeArray(numEl, resType);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (metain == TYPE_STRUCT || metain == TYPE_ARRAY)
|
||||
return tlst->getExactPiece(resType, baseOffset, size);
|
||||
return (Datatype *)0;}
|
||||
|
||||
/// \brief Create and return a placeholder associated with the given Varnode
|
||||
///
|
||||
|
@ -2757,7 +2783,7 @@ bool SubfloatFlow::traceBackward(TransformVar *rvn)
|
|||
default:
|
||||
break; // Everything else we abort
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -173,7 +173,7 @@ class SplitDatatype {
|
|||
Varnode *firstPointer; ///< Direct pointer input for LOAD or STORE
|
||||
Varnode *pointer; ///< The root pointer
|
||||
int4 baseOffset; ///< Offset of the LOAD or STORE relative to root pointer
|
||||
bool backUpPointer(void); ///< Follow flow of \b pointer back thru INT_ADD or PTRSUB
|
||||
bool backUpPointer(Datatype *impliedBase); ///< Follow flow of \b pointer back thru INT_ADD or PTRSUB
|
||||
public:
|
||||
bool find(PcodeOp *op,Datatype *valueType); ///< Locate root pointer for underlying LOAD or STORE
|
||||
void freePointerChain(Funcdata &data); ///< Remove unused pointer calculations
|
||||
|
@ -183,6 +183,7 @@ class SplitDatatype {
|
|||
vector<Component> dataTypePieces; ///< Sequence of all data-type pairs being copied
|
||||
bool splitStructures; ///< Whether or not structures should be split
|
||||
bool splitArrays; ///< Whether or not arrays should be split
|
||||
bool isLoadStore; ///< True if trying to split LOAD or STORE
|
||||
Datatype *getComponent(Datatype *ct,int4 offset,bool &isHole);
|
||||
int4 categorizeDatatype(Datatype *ct); ///< Categorize if and how data-type should be split
|
||||
bool testDatatypeCompatibility(Datatype *inBase,Datatype *outBase,bool inConstant);
|
||||
|
|
|
@ -3688,7 +3688,7 @@ TypePointer *TypeFactory::getTypePointer(int4 s,Datatype *pt,uint4 ws,const stri
|
|||
return res;
|
||||
}
|
||||
|
||||
// Don't create more than a depth of 2, i.e. ptr->ptr->ptr->...
|
||||
/// Don't create more than a depth of 1, i.e. ptr->ptr
|
||||
/// \param s is the size of the pointer
|
||||
/// \param pt is the pointed-to data-type
|
||||
/// \param ws is the wordsize associated with the pointer
|
||||
|
@ -3697,16 +3697,8 @@ TypePointer *TypeFactory::getTypePointerNoDepth(int4 s,Datatype *pt,uint4 ws)
|
|||
|
||||
{
|
||||
if (pt->getMetatype()==TYPE_PTR) {
|
||||
Datatype *basetype = ((TypePointer *)pt)->getPtrTo();
|
||||
type_metatype meta = basetype->getMetatype();
|
||||
// Make sure that at least we return a pointer to something the size of -pt-
|
||||
if (meta == TYPE_PTR)
|
||||
pt = getBase(pt->getSize(),TYPE_UNKNOWN); // Pass back unknown *
|
||||
else if (meta == TYPE_UNKNOWN) {
|
||||
if (basetype->getSize() == pt->getSize()) // If -pt- is pointer to UNKNOWN of the size of a pointer
|
||||
return (TypePointer *)pt; // Just return pt, don't add another pointer
|
||||
pt = getBase(pt->getSize(),TYPE_UNKNOWN); // Otherwise construct pointer to UNKNOWN of size of pointer
|
||||
}
|
||||
pt = getBase(pt->getSize(),TYPE_UNKNOWN); // Pass back unknown *
|
||||
}
|
||||
return getTypePointer(s,pt,ws);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue