Merge remote-tracking branch 'origin/GP-2603_ConditionalConstPhiNode'

(Closes #4527)
This commit is contained in:
Ryan Kurtz 2023-02-21 12:43:25 -05:00
commit ebde3f7250
6 changed files with 388 additions and 19 deletions

View file

@ -13,6 +13,8 @@ src/decompile/cpp/.gitignore||GHIDRA||||END|
src/decompile/cpp/Doxyfile||GHIDRA|||Most of this file is autogenerated by doxygen which falls under the GPL - output from GPL products are NOT GPL! - mjbell4|END|
src/decompile/cpp/Makefile||GHIDRA||||END|
src/decompile/datatests/concat.xml||GHIDRA||||END|
src/decompile/datatests/condconst.xml||GHIDRA||||END|
src/decompile/datatests/condmulti.xml||GHIDRA||||END|
src/decompile/datatests/convert.xml||GHIDRA||||END|
src/decompile/datatests/deadvolatile.xml||GHIDRA||||END|
src/decompile/datatests/deindirect.xml||GHIDRA||||END|

View file

@ -3946,6 +3946,242 @@ int4 ActionDeadCode::apply(Funcdata &data)
return 0;
}
void ActionConditionalConst::clearMarks(const vector<PcodeOp *> &opList)
{
for(int4 i=0;i<opList.size();++i)
opList[i]->clearMark();
}
/// \brief Collect COPY, INDIRECT, and MULTIEQUAL ops reachable from the given Varnode, without going thru excised edges
///
/// If data-flow from the Varnode does not go through excised edges and reaches the op via other MULTIEQUALs,
/// INDIRECTs, and COPYs, the op is put in a list, and its mark is set
/// \param vn is the given Varnode
/// \param phiNodeEdges is the list of edges to excise
/// \param reachable will hold the list ops that have been reached
void ActionConditionalConst::collectReachable(Varnode *vn,vector<PcodeOpNode> &phiNodeEdges,vector<PcodeOp *> &reachable)
{
sort(phiNodeEdges.begin(),phiNodeEdges.end());
int4 count = 0;
if (vn->isWritten()) {
PcodeOp *op = vn->getDef();
if (op->code() == CPUI_MULTIEQUAL) {
// Consider defining MULTIEQUAL to be "reachable" This allows flowToAlternatePath to discover
// a loop back to vn from the constBlock, even if no other non-constant path survives
op->setMark();
reachable.push_back(op);
}
}
for(;;) {
list<PcodeOp *>::const_iterator iter;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
PcodeOp *op = *iter;
if (op->isMark()) continue;
OpCode opc = op->code();
if (opc == CPUI_MULTIEQUAL) {
PcodeOpNode tmpOp(op,0);
for(tmpOp.slot=0;tmpOp.slot<op->numInput();++tmpOp.slot) {
if (op->getIn(tmpOp.slot) != vn) continue; // Find incoming slot for current Varnode
// Don't count as flow if coming thru excised edge
if (!binary_search(phiNodeEdges.begin(),phiNodeEdges.end(),tmpOp)) break;
}
if (tmpOp.slot == op->numInput()) continue; // Was the MULTIEQUAL reached
}
else if (opc != CPUI_COPY && opc != CPUI_INDIRECT)
continue;
reachable.push_back(op);
op->setMark();
}
if (count >= reachable.size()) break;
vn = reachable[count]->getOut();
count += 1;
}
}
/// \brief Does the output of the given op reunite with the alternate flow
///
/// Assuming alternate flows have been marked, follow the flow of the given op forward through
/// MULTIEQUAL, INDIRECT, and COPY ops. If it hits the alternate flow, return \b true.
/// \param op is the given PcodeOp
/// \return \b true is there is an alternate path
bool ActionConditionalConst::flowToAlternatePath(PcodeOp *op)
{
if (op->isMark()) return true;
vector<Varnode *> markSet;
Varnode *vn = op->getOut();
markSet.push_back(vn);
vn->setMark();
int4 count = 0;
bool foundPath = false;
while(count < markSet.size()) {
vn = markSet[count];
count += 1;
list<PcodeOp *>::const_iterator iter;
for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) {
PcodeOp *nextOp = *iter;
OpCode opc = nextOp->code();
if (opc == CPUI_MULTIEQUAL) {
if (nextOp->isMark()) {
foundPath = true;
break;
}
}
else if (opc != CPUI_COPY && opc != CPUI_INDIRECT)
continue;
Varnode *outVn = nextOp->getOut();
if (outVn->isMark()) continue;
outVn->setMark();
markSet.push_back(outVn);
}
if (foundPath) break;
}
for(int4 i=0;i<markSet.size();++i)
markSet[i]->clearMark();
return foundPath;
}
/// \brief Test if flow from a specific edge is disjoint from other edges
///
/// All MULTIEQUAL and COPY ops reachable from the edge are marked. If any other edge
/// is in this marked set, mark both edges in the result set.
/// \param edges is the set of edges
/// \param i is the index of the specific edge to test
/// \param result is the array of marks to be returned
/// \return \b true if the selected edge flows together with any other edge
bool ActionConditionalConst::flowTogether(const vector<PcodeOpNode> &edges,int4 i,vector<int4> &result)
{
vector<PcodeOp *> reachable;
vector<PcodeOpNode> excise; // No edge excised
collectReachable(edges[i].op->getOut(),excise,reachable);
bool res = false;
for(int4 j=0;j<edges.size();++j) {
if (i == j) continue;
if (result[j] == 0) continue; // Check for disconnected path
if (edges[j].op->isMark()) {
result[i] = 2; // Disconnected paths, which flow together
result[j] = 2;
res = true;
}
}
clearMarks(reachable);
return res;
}
/// \brief Place a COPY of a constant at the end of a basic block
///
/// \param op is an alternate "last" op
/// \param bl is the basic block
/// \param constVn is the constant to be assigned
/// \param data is the function containing the block
/// \return the new output Varnode of the COPY
Varnode *ActionConditionalConst::placeCopy(PcodeOp *op,BlockBasic *bl,Varnode *constVn,Funcdata &data)
{
PcodeOp *lastOp = bl->lastOp();
list<PcodeOp *>::iterator iter;
Address addr;
if (lastOp == (PcodeOp *)0) {
iter = bl->endOp();
addr = op->getAddr();
}
else if (lastOp->isBranch()) {
iter = lastOp->getBasicIter(); // Insert before any branch
addr = lastOp->getAddr();
}
else {
iter = bl->endOp();
addr = lastOp->getAddr();
}
PcodeOp *copyOp = data.newOp(1,addr);
data.opSetOpcode(copyOp, CPUI_COPY);
Varnode *outVn = data.newUniqueOut(constVn->getSize(), copyOp);
data.opSetInput(copyOp,constVn,0);
data.opInsert(copyOp, bl, iter);
return outVn;
}
/// \brief Place a single COPY assignment shared by multiple MULTIEQUALs
///
/// Find the common ancestor block among all MULTIEQUALs marked as flowing together.
/// Place a COPY assigning a constant at the bottom of this block.
/// Replace all the input edge Varnodes on the MULTIEQUALs with the output of this COPY.
/// \param phiNodeEdges is the list of MULTIEQUALs and their incoming edges
/// \param marks are the marks applied to the MULTIEQUALs (2 == flowtogether)
/// \param constVn is the constant being assigned by the COPY
/// \param data is the function
void ActionConditionalConst::placeMultipleConstants(vector<PcodeOpNode> &phiNodeEdges,vector<int4> &marks,
Varnode *constVn,Funcdata &data)
{
vector<FlowBlock *> blocks;
PcodeOp *op = (PcodeOp *)0;
for(int4 i=0;i<phiNodeEdges.size();++i) {
if (marks[i] != 2) continue; // Check that the MULTIQUAL is marked as flowing together
op = phiNodeEdges[i].op;
FlowBlock *bl = op->getParent();
bl = bl->getIn(phiNodeEdges[i].slot);
blocks.push_back(bl);
}
BlockBasic *rootBlock = (BlockBasic *)FlowBlock::findCommonBlock(blocks);
Varnode *outVn = placeCopy(op, rootBlock, constVn, data);
for(int4 i=0;i<phiNodeEdges.size();++i) {
if (marks[i] != 2) continue;
data.opSetInput(phiNodeEdges[i].op, outVn, phiNodeEdges[i].slot);
}
}
/// \brief Replace MULTIEQUAL edges with constant if there is no alternate flow
///
/// A given Varnode is known to be constant along a set of MULTIEQUAL edges. If these edges are excised from the
/// data-flow, and the output of a MULTIEQUAL does not rejoin with the Varnode along an alternate path, then that
/// edge is replaced with a constant.
/// \param varVn is the given Varnode
/// \param constVn is the constant to replace it with
/// \param phiNodeEdges is the set of edges the Varnode is known to be constant on
/// \param data is the function containing this data-flow
void ActionConditionalConst::handlePhiNodes(Varnode *varVn,Varnode *constVn,vector<PcodeOpNode> &phiNodeEdges,Funcdata &data)
{
vector<PcodeOp *> alternateFlow;
vector<int4> results(phiNodeEdges.size(),0);
collectReachable(varVn,phiNodeEdges,alternateFlow);
int4 alternate = 0;
for(int4 i=0;i<phiNodeEdges.size();++i) {
if (!flowToAlternatePath(phiNodeEdges[i].op)) {
results[i] = 1; // Mark as disconnecting
alternate += 1;
}
}
clearMarks(alternateFlow);
bool hasFlowTogether = false;
if (alternate > 1) {
// If we reach here, multiple MULTIEQUAL are disjoint from the non-constant flow
for(int4 i=0;i<results.size();++i) {
if (results[i] == 0) continue; // Is this a disconnected path
if (flowTogether(phiNodeEdges,i,results)) // Check if the disconnected paths flow together
hasFlowTogether = true;
}
}
// Add COPY assignment for each edge that has its own disconnected path going forward
for(int4 i=0;i<phiNodeEdges.size();++i) {
if (results[i] != 1) continue; // Check for disconnected path that does not flow into another path
PcodeOp *op = phiNodeEdges[i].op;
int4 slot = phiNodeEdges[i].slot;
BlockBasic *bl = (BlockBasic *)op->getParent()->getIn(slot);
Varnode *outVn = placeCopy(op, bl, constVn, data);
data.opSetInput(op,outVn,slot);
count += 1;
}
if (hasFlowTogether) {
placeMultipleConstants(phiNodeEdges, results, constVn, data); // Add COPY assignment for edges that flow together
count += 1;
}
}
/// \brief Replace reads of a given Varnode with a constant.
///
/// For each read op, check that is in or dominated by a specific block we known
@ -3953,44 +4189,66 @@ int4 ActionDeadCode::apply(Funcdata &data)
/// \param varVn is the given Varnode
/// \param constVn is the constant Varnode to replace with
/// \param constBlock is the block which dominates ops reading the constant value
/// \param useMultiequal is \b true if conditional constants can be applied to MULTIEQUAL ops
/// \param data is the function being analyzed
void ActionConditionalConst::propagateConstant(Varnode *varVn,Varnode *constVn,FlowBlock *constBlock,Funcdata &data)
void ActionConditionalConst::propagateConstant(Varnode *varVn,Varnode *constVn,FlowBlock *constBlock,bool useMultiequal,Funcdata &data)
{
vector<PcodeOpNode> phiNodeEdges;
list<PcodeOp *>::const_iterator iter,enditer;
iter = varVn->beginDescend();
enditer = varVn->endDescend();
FlowBlock *rootBlock = (FlowBlock *)0;
if (varVn->isWritten())
rootBlock = varVn->getDef()->getParent();
while(iter != enditer) {
PcodeOp *op = *iter;
++iter; // Advance iterator before possibly destroying descendant
if (op->isMarker()) continue; // Don't propagate constant into these
if (op->code() == CPUI_COPY) { // Don't propagate into COPY unless...
while(iter != enditer && *iter == op)
++iter; // Advance iterator off of current op, as this descendant may be erased
OpCode opc = op->code();
if (opc == CPUI_INDIRECT) // Don't propagate constant into these
continue;
else if (opc == CPUI_MULTIEQUAL) {
if (!useMultiequal)
continue;
if (varVn->isAddrTied() && varVn->getAddr() == op->getOut()->getAddr())
continue;
FlowBlock *bl = op->getParent();
for(int4 slot=0;slot<op->numInput();++slot) {
if (op->getIn(slot) == varVn) {
if (constBlock->dominates(bl->getIn(slot))) {
phiNodeEdges.emplace_back(op,slot);
}
}
}
continue;
}
else if (opc == CPUI_COPY) { // Don't propagate into COPY unless...
PcodeOp *followOp = op->getOut()->loneDescend();
if (followOp == (PcodeOp *)0) continue;
if (followOp->isMarker()) continue;
if (followOp->code() == CPUI_COPY) continue;
// ...unless COPY is into something more interesting
}
FlowBlock *bl = op->getParent();
while(bl != (FlowBlock *)0) {
if (bl == rootBlock) break;
if (bl == constBlock) { // Is op dominated by constBlock?
int4 slot = op->getSlot(varVn);
data.opSetInput(op,data.newConstant(varVn->getSize(),constVn->getOffset()),slot); // Replace ref with constant!
count += 1; // We made a change
break;
}
bl = bl->getImmedDom();
if (constBlock->dominates(op->getParent())) {
int4 slot = op->getSlot(varVn);
data.opSetInput(op,constVn,slot); // Replace ref with constant!
count += 1; // We made a change
}
}
if (!phiNodeEdges.empty())
handlePhiNodes(varVn, constVn, phiNodeEdges, data);
}
int4 ActionConditionalConst::apply(Funcdata &data)
{
bool useMultiequal = true;
AddrSpace *stackSpace = data.getArch()->getStackSpace();
if (stackSpace != (AddrSpace *)0) {
// Determining if conditional constants should apply to MULTIEQUAL operations may require
// flow calculations.
int4 numPasses = data.numHeritagePasses(stackSpace);
if (numPasses <= 0) // If the stack hasn't been heritaged yet
useMultiequal = false; // Don't propagate into MULTIEQUAL
}
const BlockGraph &blockGraph(data.getBasicBlocks());
for(int4 i=0;i<blockGraph.getSize();++i) {
FlowBlock *bl = blockGraph.getBlock(i);
@ -4029,7 +4287,7 @@ int4 ActionConditionalConst::apply(Funcdata &data)
constEdge = 1 - constEdge;
FlowBlock *constBlock = bl->getOut(constEdge);
if (!constBlock->restrictedByConditional(bl)) continue; // Make sure condition holds
propagateConstant(varVn,constVn,constBlock,data);
propagateConstant(varVn,constVn,constBlock,useMultiequal,data);
}
return 0;
}

View file

@ -565,6 +565,14 @@ public:
/// \brief Propagate conditional constants
class ActionConditionalConst : public Action {
static void clearMarks(const vector<PcodeOp *> &opList);
static void collectReachable(Varnode *vn,vector<PcodeOpNode> &phiNodeEdges,vector<PcodeOp *> &reachable);
static bool flowToAlternatePath(PcodeOp *op);
static bool flowTogether(const vector<PcodeOpNode> &edges,int4 i,vector<int4> &result);
static Varnode *placeCopy(PcodeOp *op,BlockBasic *bl,Varnode *constVn,Funcdata &data);
static void placeMultipleConstants(vector<PcodeOpNode> &phiNodeEdges,vector<int4> &marks,Varnode *constVn,Funcdata &data);
void handlePhiNodes(Varnode *varVn,Varnode *constVn,vector<PcodeOpNode> &phiNodeEdges,Funcdata &data);
void propagateConstant(Varnode *varVn,Varnode *constVn,FlowBlock *constBlock,bool useMultiequal,Funcdata &data);
public:
ActionConditionalConst(const string &g) : Action(0,"condconst",g) {} ///< Constructor
virtual Action *clone(const ActionGroupList &grouplist) const {
@ -572,7 +580,6 @@ public:
return new ActionConditionalConst(getGroup());
}
virtual int4 apply(Funcdata &data);
void propagateConstant(Varnode *varVn,Varnode *constVn,FlowBlock *constBlock,Funcdata &data);
};
/// \brief Normalize jump-table construction.

View file

@ -252,6 +252,7 @@ struct PcodeOpNode {
int4 slot; ///< Slot indicating the input Varnode end-point of the edge
PcodeOpNode(void) { op = (PcodeOp *)0; slot = 0; } ///< Unused constructor
PcodeOpNode(PcodeOp *o,int4 s) { op = o; slot = s; } ///< Constructor
bool operator<(const PcodeOpNode &op2) const; ///< Simple comparator for putting edges in a sorted container
};
/// \brief A node in a tree structure of CPUI_PIECE operations
@ -357,4 +358,17 @@ extern int4 functionalEqualityLevel(Varnode *vn1,Varnode *vn2,Varnode **res1,Var
extern bool functionalEquality(Varnode *vn1,Varnode *vn2);
extern bool functionalDifference(Varnode *vn1,Varnode *vn2,int4 depth);
/// Compare PcodeOps (as pointers) first, then slot
/// \param op2 is the other edge to compare with \b this
/// \return true if \b this should come before the other PcodeOp
inline bool PcodeOpNode::operator<(const PcodeOpNode &op2) const
{
if (op != op2.op)
return (op->getSeqNum().getTime() < op2.op->getSeqNum().getTime());
if (slot != op2.slot)
return (slot < op2.slot);
return false;
}
#endif

View file

@ -0,0 +1,59 @@
<decompilertest>
<binaryimage arch="x86:LE:64:default:gcc">
<!--
Examples of varnode values being used at points where they are known to be constant.
The use should be replaced with a constant Varnode in most cases. The example in
condconst_conn is a situation where the varnode should not be replace.
-->
<bytechunk space="ram" offset="0x1006fa" readonly="true">
554889e54889
7df88975f48955f0894dec837df40075
0e8b55f48b45f001c2488b45f8891048
8b45f8488d50048b45f48902837df40a
750d488b45f8488d50088b45f4890283
7dec077412488b45f84883c00c8b55ec
83c2088910eb10488b45f84883c0108b
55ec83c2098910905dc3554889e5897d
fc837dfc0075098b45fc890594082000
8b45fc890597082000837dfc0a75128b
45fc8905800820008b45fc89057b0820
00905dc3554889e5897dec8975e8c745
fc0a000000837dec007517837de80a75
088b45ec8945fceb0fc745fc14000000
eb068b45ec8945fc8b45fc5dc3
</bytechunk>
<symbol space="ram" offset="0x1006fa" name="condconst1"/>
<symbol space="ram" offset="0x10076a" name="condconst_copy"/>
<symbol space="ram" offset="0x1007a4" name="condconst_conn"/>
</binaryimage>
<script>
<com>parse line extern void condconst1(int4 *ptr,int4 a,int4 b,int4 c);</com>
<com>parse line extern void condconst_copy(int4 d);</com>
<com>parse line extern int4 condconst_conn(int4 x,int4 y);</com>
<com>map addr r0x301014 int4 glob1</com>
<com>map addr r0x301020 int4 glob2</com>
<com>map addr r0x301018 int4 glob3</com>
<com>map addr r0x30101c int4 glob4</com>
<com>lo fu condconst1</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu condconst_copy</com>
<com>decompile</com>
<com>print C</com>
<com>lo fu condconst_conn</com>
<com>decompile</com>
<com>print C</com>
</script>
<stringmatch name="Conditional Constant #1" min="1" max="1">\*ptr = b;</stringmatch>
<stringmatch name="Conditional Constant #2" min="1" max="1">ptr\[1\] = a;</stringmatch>
<stringmatch name="Conditional Constant #3" min="1" max="1">ptr\[2\] = 10;</stringmatch>
<stringmatch name="Conditional Constant #4" min="1" max="1">ptr\[3\] = c \+ 8;</stringmatch>
<stringmatch name="Conditional Constant #5" min="1" max="1">ptr\[4\] = 0x10;</stringmatch>
<stringmatch name="Conditional Constant #6" min="1" max="1">glob1 = 0;</stringmatch>
<stringmatch name="Conditional Constant #7" min="1" max="1">glob2 = d;</stringmatch>
<stringmatch name="Conditional Constant #8" min="1" max="1">glob3 = 10;</stringmatch>
<stringmatch name="Conditional Constant #9" min="1" max="1">glob4 = 10;</stringmatch>
<stringmatch name="Conditional Constant #10" min="1" max="1">iStack_c = x;</stringmatch>
<stringmatch name="Conditional Constant #11" min="1" max="1">iStack_c = 0x14;</stringmatch>
<stringmatch name="Conditional Constant #12" min="0" max="0">iStack_c = 10;</stringmatch>
</decompilertest>

View file

@ -0,0 +1,29 @@
<decompilertest>
<binaryimage arch="x86:LE:64:default:gcc">
<!--
An example of a conditional constant propagated along multiple paths that flow together
-->
<bytechunk space="ram" offset="0x100825" readonly="true">
554889e5897dfc8975f883
7dfc0075098b45fc8905d60720008b45
fc8905d9072000837dfc0a75188b45fc
8905c2072000837df8067f098b45fc89
05b7072000905dc3
</bytechunk>
<symbol space="ram" offset="0x100825" name="condconst_multi"/>
</binaryimage>
<script>
<com>parse line extern int4 condconst_multi(int4 a,int4 b);</com>
<com>map addr r0x301014 int4 glob1</com>
<com>map addr r0x301020 int4 glob2</com>
<com>map addr r0x301018 int4 glob3</com>
<com>map addr r0x30101c int4 glob4</com>
<com>lo fu condconst_multi</com>
<com>decompile</com>
<com>print C</com>
</script>
<stringmatch name="Conditional Multi #1" min="1" max="1">glob1 = 0;</stringmatch>
<stringmatch name="Conditional Multi #2" min="1" max="1">glob2 = a;</stringmatch>
<stringmatch name="Conditional Multi #3" min="1" max="1">glob3 = 10[,;]</stringmatch>
<stringmatch name="Conditional Multi #4" min="1" max="1">glob4 = 10;</stringmatch>
</decompilertest>