ghidra/Ghidra/Features/Decompiler/src/decompile/cpp/blockaction.hh

/* ###
 * IP: GHIDRA
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef __BLOCKACTION_HH__
#define __BLOCKACTION_HH__

/// \file blockaction.hh
/// \brief Actions and classes associated with transforming and structuring the control-flow graph

#include "action.hh"

namespace ghidra {

/// \brief Class for holding an edge while the underlying graph is being manipulated
///
/// The original FlowBlock nodes that define the end-points of the edge may get
/// collapsed, but the edge may still exist between higher level components.
/// The edge can still be retrieved via the getCurrentEdge() method.
class FloatingEdge {
  FlowBlock *top;		///< Starting FlowBlock of the edge
  FlowBlock *bottom;		///< Ending FlowBlock of the edge
public:
  FloatingEdge(FlowBlock *t,FlowBlock *b) { top = t; bottom = b; }	///< Construct given end points
  FlowBlock *getTop(void) const { return top; }			///< Get the starting FlowBlock
  FlowBlock *getBottom(void) const { return bottom; }		///< Get the ending FlowBlock
  FlowBlock *getCurrentEdge(int4 &outedge,FlowBlock *graph);	///< Get the current form of the edge
};

/// \brief A description of the body of a loop.
///
/// Following Tarjan, assuming there are no \e irreducible edges, a loop body is defined
/// by the \e head (or entry-point) and 1 or more tails, which each have a \e back \e edge into
/// the head.
class LoopBody {
  FlowBlock *head;		///< head of the loop
  vector<FlowBlock *> tails;	///< (Possibly multiple) nodes with back edge returning to the head
  int4 depth;			///< Nested depth of this loop
  int4 uniquecount;		///< Total number of unique head and tail nodes
  FlowBlock *exitblock;		///< Official exit block from loop, or NULL
  list<FloatingEdge> exitedges;	///< Edges that exit to the formal exit block
  LoopBody *immed_container;	///< Immediately containing loop body, or NULL
  void extendToContainer(const LoopBody &container,vector<FlowBlock *> &body) const;
public:
  LoopBody(FlowBlock *h) { head=h; immed_container = (LoopBody *)0; depth=0; }	///< Construct with a loop head
  FlowBlock *getHead(void) const { return head; }			///< Return the head FlowBlock of the loop
  FlowBlock *getCurrentBounds(FlowBlock **top,FlowBlock *graph);	///< Return current loop bounds (\b head and \b bottom).
  void addTail(FlowBlock *bl) { tails.push_back(bl); }			///< Add a \e tail to the loop
  FlowBlock *getExitBlock(void) const { return exitblock; }		///< Get the exit FlowBlock or NULL
  void findBase(vector<FlowBlock *> &body);				///< Mark the body FlowBlocks of \b this loop
  void extend(vector<FlowBlock *> &body) const;				///< Extend body (to blocks that never exit)
  void findExit(const vector<FlowBlock *> &body);			///< Choose the exit block for \b this loop
  void orderTails(void);						///< Find preferred \b tail
  void labelExitEdges(const vector<FlowBlock *> &body);			///< Label edges that exit the loop
  void labelContainments(const vector<FlowBlock *> &body,const vector<LoopBody *> &looporder);
  void emitLikelyEdges(list<FloatingEdge> &likely,FlowBlock *graph);	///< Collect likely \e unstructured edges
  void setExitMarks(FlowBlock *graph);					///< Mark all the exits to this loop
  void clearExitMarks(FlowBlock *graph);				///< Clear the mark on all the exits to this loop
  bool operator<(const LoopBody &op2) const { return (depth > op2.depth); }	///< Order loop bodies by depth
  static void mergeIdenticalHeads(vector<LoopBody *> &looporder);	///< Merge loop bodies that share the same \e head
  static bool compare_ends(LoopBody *a,LoopBody *b);			///< Compare the \b head then \b tail
  static int4 compare_head(LoopBody *a,FlowBlock *looptop);		///< Compare just the \b head
  static LoopBody *find(FlowBlock *looptop,const vector<LoopBody *> &looporder);	///< Find a LoopBody
  static void clearMarks(vector<FlowBlock *> &body);			///< Clear the body marks
};

/// \brief Algorithm for selecting unstructured edges based an Directed Acyclic Graphs (DAG)
///
/// With the exception of the back edges in loops, structured code tends to form a DAG.
/// Within the DAG, all building blocks of structured code have a single node entry point
/// and (at most) one exit block. Given root points, this class traces edges with this kind of
/// structure.  Paths can recursively split at any point, starting a new \e active BranchPoint, but
/// the BranchPoint can't be \e retired until all paths emanating from its start either terminate
/// or come back together at the same FlowBlock node. Once a BranchPoint is retired, all the edges
/// traversed from the start FlowBlock to the end FlowBlock are likely structurable. After pushing
/// the traces as far as possible and retiring as much as possible, any \e active edge left
/// is a candidate for an unstructured branch.
///
/// Ultimately this produces a list of \e likely \e gotos, which is used whenever the structuring
/// algorithm (ActionBlockStructure) gets stuck.
///
/// The tracing can be restricted to a \e loopbody by setting the top FlowBlock of the loop as
/// the root, and the loop exit block as the finish block.  Additionally, any edges that
/// exit the loop should be marked using LoopBody::setExitMarks().
class TraceDAG {

  struct BlockTrace;

  /// A node in the control-flow graph with multiple outgoing edges in the DAG. Ideally, all
  /// these paths eventually merge at the same node.
  struct BranchPoint {
    BranchPoint *parent;	///< The parent BranchPoint along which \b this is only one path
    int4 pathout;		///< Index (of the out edge from the parent) of the path along which \b this lies
    FlowBlock *top;		///< FlowBlock that embodies the branch point
    vector<BlockTrace *> paths;	///< BlockTrace for each possible path out of \b this BlockPoint
    int4 depth;			///< Depth of BranchPoints from the root
    bool ismark;		///< Possible mark
    void createTraces(void);	///< Given the BlockTrace objects, given a new BranchPoint
  public:
    void markPath(void);	///< Mark a path from \b this up to the root BranchPoint
    int4 distance(BranchPoint *op2);	///< Calculate distance between two BranchPoints
    FlowBlock *getPathStart(int4 i);	///< Get the start of the i-th BlockTrace
    BranchPoint(void);		///< Create the (unique) root branch point
    BranchPoint(BlockTrace *parenttrace);	///< Construct given a parent BlockTrace
    ~BranchPoint(void);		///< BranchPoint owns its BlockTraces
  };

  /// \brief A trace of a single path out of a BranchPoint
  ///
  /// Once a BranchPoint is retired with 1 outgoing edge, the multiple paths coming out of
  /// the BranchPoint are considered a single path for the parent BlockTrace.
  struct BlockTrace {
    enum {
      f_active = 1,		///< This BlockTrace is \e active.
      f_terminal = 2		///< All paths from this point exit (without merging back to parent)
    };
    uint4 flags;		///< Properties of the BlockTrace
    BranchPoint *top;		///< Parent BranchPoint for which this is a path
    int4 pathout;		///< Index of the out-edge for this path (relative to the parent BranchPoint)
    FlowBlock *bottom;		///< Current node being traversed along 1 path from decision point
    FlowBlock *destnode;	///< Next FlowBlock node \b this BlockTrace will try to push into
    int4 edgelump;		///< If >1, edge to \b destnode is "virtual" representing multiple edges coming together
    list<BlockTrace *>::iterator activeiter; ///< Position of \b this in the active trace list
    BranchPoint *derivedbp;	///< BranchPoint blocker \b this traces into
  public:
    BlockTrace(BranchPoint *t,int4 po,int4 eo);		///< Construct given a parent BranchPoint and path index
    BlockTrace(BranchPoint *root,int4 po,FlowBlock *bl);	///< Construct a root BlockTrace
    bool isActive(void) const { return ((flags & f_active)!=0); }	///< Return \b true if \b this is active
    bool isTerminal(void) const { return ((flags & f_terminal)!=0); }	///< Return \b true is \b this terminates
  };

  /// \brief Record for scoring a BlockTrace for suitability as an unstructured branch
  ///
  /// This class holds various metrics about BlockTraces that are used to sort them.
  struct BadEdgeScore {
    FlowBlock *exitproto;	///< Putative exit block for the BlockTrace
    BlockTrace *trace;		///< The active BlockTrace being considered
    int4 distance;		///< Minimum distance crossed by \b this and any other BlockTrace sharing same exit block
    int4 terminal;		///< 1 if BlockTrace destination has no exit, 0 otherwise
    int4 siblingedge;		///< Number of active BlockTraces with same BranchPoint and exit as \b this
    bool compareFinal(const BadEdgeScore &op2) const;	///< Compare BadEdgeScore for unstructured suitability
    bool operator<(const BadEdgeScore &op2) const;	///< Compare for grouping
  };

  list<FloatingEdge> &likelygoto;	///< A reference to the list of likely goto edges being produced
  vector<FlowBlock *> rootlist;		///< List of root FlowBlocks to trace from
  vector<BranchPoint *> branchlist;	///< Current set of BranchPoints that have been traced
  int4 activecount;			///< Number of active BlockTrace objects
  int4 missedactivecount;		///< Current number of active BlockTraces that can't be pushed further
  list<BlockTrace *> activetrace;	///< The list of \e active BlockTrace objects
  list<BlockTrace *>::iterator current_activeiter;	///< The current \e active BlockTrace being pushed
  FlowBlock *finishblock;		///< Designated exit block for the DAG (or null)
  void removeTrace(BlockTrace *trace);	///< Remove the indicated BlockTrace
  void processExitConflict(list<BadEdgeScore>::iterator start,list<BadEdgeScore>::iterator end);
  BlockTrace *selectBadEdge(void);	///< Select the the most likely unstructured edge from active BlockTraces
  void insertActive(BlockTrace *trace);	///< Move a BlockTrace into the \e active category
  void removeActive(BlockTrace *trace);	///< Remove a BlockTrace from the \e active category
  bool checkOpen(BlockTrace *trace);	///< Check if we can push the given BlockTrace into its next node
  list<BlockTrace *>::iterator openBranch(BlockTrace *parent);	///< Open a new BranchPoint along a given BlockTrace
  bool checkRetirement(BlockTrace *trace,FlowBlock *&exitblock);	///< Check if a given BlockTrace can be retired
  list<BlockTrace *>::iterator retireBranch(BranchPoint *bp,FlowBlock *exitblock);
  void clearVisitCount(void);		/// Clear the \b visitcount field of any FlowBlock we have modified
public:
  TraceDAG(list<FloatingEdge> &lg);	///< Construct given the container for likely unstructured edges
  ~TraceDAG(void);			///< Destructor
  void addRoot(FlowBlock *root) { rootlist.push_back(root); }	///< Add a root FlowBlock to the trace
  void initialize(void);		///< Create the initial BranchPoint and BlockTrace objects
  void pushBranches(void);		///< Push the trace through, removing edges as necessary
  void setFinishBlock(FlowBlock *bl) { finishblock = bl; }	///< Mark an exit point not to trace beyond
};

/// \brief Build a code structure from a control-flow graph (BlockGraph).
///
/// This class manages the main control-flow structuring algorithm for the decompiler.
/// In short:
///    - Start with a control-flow graph of basic blocks.
///    - Repeatedly apply:
///       - Search for sub-graphs matching specific code structure elements.
///       - Note the structure element and collapse the component nodes to a single node.
///    - If the process gets stuck, remove appropriate edges, marking them as unstructured.
class CollapseStructure {
  bool finaltrace;				///< Have we a made search for unstructured edges in the final DAG
  bool likelylistfull;				///< Have we generated a \e likely \e goto list for the current innermost loop
  list<FloatingEdge> likelygoto;		///< The current \e likely \e goto list
  list<FloatingEdge>::iterator likelyiter;	///< Iterator to the next most \e likely \e goto edge
  list<LoopBody> loopbody;			///< The list of loop bodies for this control-flow graph
  list<LoopBody>::iterator loopbodyiter;	///< Current (innermost) loop being structured
  BlockGraph &graph;				///< The control-flow graph
  int4 dataflow_changecount;			///< Number of data-flow changes made during structuring
  bool checkSwitchSkips(FlowBlock *switchbl,FlowBlock *exitblock);
  void onlyReachableFromRoot(FlowBlock *root,vector<FlowBlock *> &body);
  int4 markExitsAsGotos(vector<FlowBlock *> &body);	///< Mark edges exiting the body as \e unstructured gotos
  bool clipExtraRoots(void);			///< Mark edges between root components as \e unstructured gotos
  void labelLoops(vector<LoopBody *> &looporder);	///< Identify all the loops in this graph
  void orderLoopBodies(void);			///< Identify and label all loop structure for this graph
  bool updateLoopBody(void);			///< Find likely \e unstructured edges within the innermost loop body
  FlowBlock *selectGoto(void);			///< Select an edge to mark as  \e unstructured
  bool ruleBlockGoto(FlowBlock *bl);		///< Attempt to apply the BlockGoto structure
  bool ruleBlockCat(FlowBlock *bl);		///< Attempt to apply a BlockList structure
  bool ruleBlockOr(FlowBlock *bl);		///< Attempt to apply a BlockCondition structure
  bool ruleBlockProperIf(FlowBlock *bl);	///< Attempt to apply a 2 component form of BlockIf
  bool ruleBlockIfElse(FlowBlock *bl);		///< Attempt to apply a 3 component form of BlockIf
  bool ruleBlockIfNoExit(FlowBlock *bl);	///< Attempt to apply BlockIf where the body does not exit
  bool ruleBlockWhileDo(FlowBlock *bl);		///< Attempt to apply the BlockWhileDo structure
  bool ruleBlockDoWhile(FlowBlock *bl);		///< Attempt to apply the BlockDoWhile structure
  bool ruleBlockInfLoop(FlowBlock *bl);		///< Attempt to apply the BlockInfLoop structure
  bool ruleBlockSwitch(FlowBlock *bl);		///< Attempt to apply the BlockSwitch structure
  bool ruleCaseFallthru(FlowBlock *bl);		///< Attempt to one switch case falling through to another
  int4 collapseInternal(FlowBlock *targetbl);	///< The main collapsing loop
  void collapseConditions(void);		///< Simplify conditionals
public:
  CollapseStructure(BlockGraph &g);		///< Construct given a control-flow graph
  int4 getChangeCount(void) const { return dataflow_changecount; }	///< Get number of data-flow changes
  void collapseAll(void);			///< Run the whole algorithm
};

/// \brief Discover and eliminate \e split conditions
///
/// A \b split condition is when a conditional expression, resulting in a CBRANCH,
/// is duplicated across two blocks that would otherwise merge.
/// Instead of a single conditional in a merged block,
/// there are two copies of the conditional, two splitting blocks and no direct merge.
class ConditionalJoin {
  /// \brief A pair of Varnode objects that have been split (and should be merged)
  struct MergePair {
    Varnode *side1;		///< Varnode coming from block1
    Varnode *side2;		///< Varnode coming from block2
    MergePair(Varnode *s1,Varnode *s2) { side1 = s1; side2 = s2; }	///< Construct from Varnode objects
    bool operator<(const MergePair &op2) const;				///< Lexicographic comparator
  };
  Funcdata &data;			///< The function being analyzed
  BlockBasic *block1;			///< Side 1 of the (putative) split
  BlockBasic *block2;			///< Side 2 of the (putative) split
  BlockBasic *exita;			///< First (common) exit point
  BlockBasic *exitb;			///< Second (common) exit point
  int4 a_in1;				///< In edge of \b exita coming from \b block1
  int4 a_in2;				///< In edge of \b exita coming from \b block2
  int4 b_in1;				///< In edge of \b exitb coming from \b block1
  int4 b_in2;				///< In edge of \b exitb coming from \b block2
  PcodeOp *cbranch1;			///< CBRANCH at bottom of \b block1
  PcodeOp *cbranch2;			///< CBRANCH at bottom of \b block2
  BlockBasic *joinblock;		///< The new joined condition block
  map<MergePair,Varnode *> mergeneed;	///< Map from the MergePair of Varnodes to the merged Varnode
  bool findDups(void);			///< Search for duplicate conditional expressions
  void checkExitBlock(BlockBasic *exit,int4 in1,int4 in2);
  void cutDownMultiequals(BlockBasic *exit,int4 in1,int4 in2);
  void setupMultiequals(void);		///< Join the Varnodes in the new \b joinblock
  void moveCbranch(void);	 	//< Move one of the duplicated CBRANCHs into the new \b joinblock
public:
  ConditionalJoin(Funcdata &fd) : data(fd) { }	///< Constructor
  bool match(BlockBasic *b1,BlockBasic *b2);	///< Test blocks for the merge condition
  void execute(void);				///< Execute the merge
  void clear(void);				///< Clear for a new test
};

/// \brief Give each control-flow structure an opportunity to make a final transform
///
/// This is currently used to set up \e for loops via BlockWhileDo
class ActionStructureTransform : public Action {
public:
  ActionStructureTransform(const string &g) : Action(0,"structuretransform",g) {}	///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
    if (!grouplist.contains(getGroup())) return (Action *)0;
    return new ActionStructureTransform(getGroup());
  }
  virtual int4 apply(Funcdata &data);
};

/// \brief Flip conditional control-flow so that \e preferred comparison operators are used
///
/// This is used as an alternative to the standard algorithm that structures control-flow, when
/// normalization of the data-flow is important but structured source code doesn't need to be emitted.
class ActionNormalizeBranches : public Action {
public:
  ActionNormalizeBranches(const string &g) : Action(0,"normalizebranches",g) {}	///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
    if (!grouplist.contains(getGroup())) return (Action *)0;
    return new ActionNormalizeBranches(getGroup());
  }
  virtual int4 apply(Funcdata &data);
};

/// \brief  Attempt to normalize symmetric block structures.
///
/// This is used in conjunction with the action ActionBlockStructure
/// to make the most natural choice, when there is a choice in how code is structured.
/// This uses the preferComplement() method on structured FlowBlocks to choose between symmetric
/// structurings, such as an if/else where the \b true and \b false blocks can be swapped.
class ActionPreferComplement : public Action {
public:
  ActionPreferComplement(const string &g) : Action(0,"prefercomplement",g) {}	///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
    if (!grouplist.contains(getGroup())) return (Action *)0;
    return new ActionPreferComplement(getGroup());
  }
  virtual int4 apply(Funcdata &data);
};

/// \brief Structure control-flow using standard high-level code constructs.
class ActionBlockStructure : public Action {
public:
  ActionBlockStructure(const string &g) : Action(0,"blockstructure",g) {}	///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
    if (!grouplist.contains(getGroup())) return (Action *)0;
    return new ActionBlockStructure(getGroup());
  }
  virtual int4 apply(Funcdata &data);
};

/// \brief Perform final organization of the control-flow structure
///
/// Label unstructured edges, order switch cases, and order disjoint components of the control-flow
class ActionFinalStructure : public Action {
public:
  ActionFinalStructure(const string &g) : Action(0,"finalstructure",g) {}	///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
    if (!grouplist.contains(getGroup())) return (Action *)0;
    return new ActionFinalStructure(getGroup());
  }
  virtual int4 apply(Funcdata &data);
};

/// \brief Split the epilog code of the function
///
/// Introduce RETURN operations corresponding to individual branches flowing to the epilog.
class ActionReturnSplit : public Action {
  static void gatherReturnGotos(FlowBlock *parent,vector<FlowBlock *> &vec);
  static bool isSplittable(BlockBasic *b);		///< Determine if a RETURN block can be split
public:
  ActionReturnSplit(const string &g) : Action(0,"returnsplit",g) {}		///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
    if (!grouplist.contains(getGroup())) return (Action *)0;
    return new ActionReturnSplit(getGroup());
  }
  virtual int4 apply(Funcdata &data);
};

/// \brief  Look for conditional branch expressions that have been split and rejoin them
class ActionNodeJoin : public Action {
public:
  ActionNodeJoin(const string &g) : Action(0,"nodejoin",g) {}			///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
    if (!grouplist.contains(getGroup())) return (Action *)0;
    return new ActionNodeJoin(getGroup());
  }
  virtual int4 apply(Funcdata &data);
};

} // End namespace ghidra
#endif