GP-3696 - cleaning up function compare windows.

This commit is contained in:
ghidragon 2024-05-23 11:26:17 -04:00
parent 770f5447e1
commit 5ea8e97805
77 changed files with 4065 additions and 5654 deletions

View file

@ -15,6 +15,8 @@
*/
package ghidra.program.model.correlate;
import static ghidra.util.datastruct.Duo.Side.*;
import java.util.*;
import java.util.Map.Entry;
@ -23,7 +25,9 @@ import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.block.CodeBlock;
import ghidra.program.model.listing.*;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.util.FunctionAddressCorrelation;
import ghidra.program.util.ListingAddressCorrelation;
import ghidra.util.datastruct.Duo;
import ghidra.util.datastruct.Duo.Side;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
@ -41,7 +45,6 @@ import ghidra.util.task.TaskMonitor;
* 5) Sequences with no corresponding match are also removed from consideration.
* 6) Sequences are limited to a single basic-block, and the algorithm is basic-block aware.
* Once a match establishes a correspondence between a pair of basic blocks, the algorithm uses
* that information to further narrow in on and disambiguate matching sequences.
* 7) If a particular sequence has matches that are not unique, the algorithm tries to disambiguate the potential
* matches by looking at parent/child relationships of the containing basic-blocks. (see DisambiguateStrategy)
* 8) Multiple passes are attempted, each time the set of potential sequences is completely regenerated,
@ -49,7 +52,7 @@ import ghidra.util.task.TaskMonitor;
* allows matches discovered by earlier passes to disambiguate sequences in later passes.
*
*/
public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelation {
public class HashedFunctionAddressCorrelation implements ListingAddressCorrelation {
/**
* A helper class for sorting through, disambiguating, sequences with identical hashes
@ -59,64 +62,54 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
public Hash hash; // The disambiguating (secondary) hash
public int count; // Number of sequences (n-grams) in the subset matching the secondary hash
public InstructHash instruct; // (Starting Instruction of) the n-gram
public DisambiguatorEntry(Hash h,InstructHash inst) {
public DisambiguatorEntry(Hash h, InstructHash inst) {
hash = h;
instruct = inst;
count = 1;
}
}
private Function srcFunction;
private Function destFunction;
private TreeMap<Address,Address> srcToDest; // Final source -> destination address mapping
private TreeMap<Address,Address> destToSrc; // Final destination -> source address mapping
private Duo<Function> functions;
private TreeMap<Address, Address> srcToDest; // Final source -> destination address mapping
private TreeMap<Address, Address> destToSrc; // Final destination -> source address mapping
private HashStore srcStore; // Sorted list of source n-grams from which to draw potential matches
private HashStore destStore; // List of destination n-grams
private HashCalculator hashCalc; // Object that calculates n-gram hashes
private TaskMonitor monitor;
/**
* Correlates addresses between the two specified functions.
* @param function1 the first function
* @param function2 the second function
* @param mon the task monitor that indicates progress and allows the user to cancel.
* @param leftFunction the first function
* @param rightFunction the second function
* @param monitor the task monitor that indicates progress and allows the user to cancel.
* @throws CancelledException if the user cancels
* @throws MemoryAccessException if either functions memory can't be accessed.
*/
public HashedFunctionAddressCorrelation(Function function1, Function function2, TaskMonitor mon)
throws CancelledException, MemoryAccessException {
srcFunction = function1;
destFunction = function2;
monitor = mon;
srcToDest = new TreeMap<Address,Address>();
destToSrc = new TreeMap<Address,Address>();
if (function1 == null || function2 == null)
return;
srcStore = new HashStore(function1, monitor);
destStore = new HashStore(function2, monitor);
public HashedFunctionAddressCorrelation(Function leftFunction, Function rightFunction,
TaskMonitor monitor) throws CancelledException, MemoryAccessException {
if (leftFunction == null || rightFunction == null) {
throw new IllegalArgumentException("Functions can't be null!");
}
this.functions = new Duo<>(leftFunction, rightFunction);
this.monitor = monitor;
srcToDest = new TreeMap<Address, Address>();
destToSrc = new TreeMap<Address, Address>();
srcStore = new HashStore(leftFunction, monitor);
destStore = new HashStore(rightFunction, monitor);
hashCalc = new MnemonicHashCalculator();
calculate();
buildFinalMaps();
}
@Override
public Program getFirstProgram() {
return srcFunction.getProgram();
public Program getProgram(Side side) {
return functions.get(side).getProgram();
}
@Override
public Program getSecondProgram() {
return destFunction.getProgram();
}
@Override
public AddressSetView getAddressesInFirst() {
return srcFunction.getBody();
}
@Override
public AddressSetView getAddressesInSecond() {
return destFunction.getBody();
public AddressSetView getAddresses(Side side) {
return functions.get(side).getBody();
}
/**
@ -126,7 +119,7 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
public int getTotalInstructionsInFirst() {
return srcStore.getTotalInstructions();
}
/**
* Gets the total number of instructions that are in the second function.
* @return the second function's instruction count.
@ -161,7 +154,7 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
public List<Instruction> getUnmatchedInstructionsInFirst() {
return srcStore.getUnmatchedInstructions();
}
/**
* Determines the number of instructions from the second function that do not match an
* instruction in the first function.
@ -180,7 +173,8 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
* @param destInstruct is (the starting Instruction of) the destination n-gram
* @throws MemoryAccessException
*/
private void declareMatch(HashEntry srcEntry,InstructHash srcInstruct,HashEntry destEntry,InstructHash destInstruct) throws MemoryAccessException {
private void declareMatch(HashEntry srcEntry, InstructHash srcInstruct, HashEntry destEntry,
InstructHash destInstruct) throws MemoryAccessException {
boolean cancelMatch = false;
int matchSize = srcEntry.hash.size;
// Its possible that some instructions of the n-gram have already been matched
@ -192,7 +186,8 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
destStore.removeHash(destEntry); // Remove this HashEntry
cancelMatch = true; // Cancel the match
}
if (cancelMatch) return;
if (cancelMatch)
return;
ArrayList<Instruction> srcInstructVec = new ArrayList<Instruction>();
ArrayList<Instruction> destInstructVec = new ArrayList<Instruction>();
ArrayList<CodeBlock> srcBlockVec = new ArrayList<CodeBlock>();
@ -202,7 +197,7 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
HashStore.extendMatch(matchSize, srcInstruct, srcMatch, destInstruct, destMatch, hashCalc);
srcStore.matchHash(srcMatch, srcInstructVec, srcBlockVec);
destStore.matchHash(destMatch, destInstructVec, destBlockVec);
for(int i=0;i<srcInstructVec.size();++i)
for (int i = 0; i < srcInstructVec.size(); ++i)
srcToDest.put(srcInstructVec.get(i).getAddress(), destInstructVec.get(i).getAddress());
}
@ -219,16 +214,16 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
private static TreeMap<Hash, DisambiguatorEntry> constructDisambiguatorTree(HashEntry entry,
HashStore store, DisambiguateStrategy strategy)
throws CancelledException, MemoryAccessException {
TreeMap<Hash,DisambiguatorEntry> entryMap = new TreeMap<Hash,DisambiguatorEntry>();
TreeMap<Hash, DisambiguatorEntry> entryMap = new TreeMap<Hash, DisambiguatorEntry>();
int matchSize = entry.hash.size;
for(InstructHash curInstruct : entry.instList) {
for (InstructHash curInstruct : entry.instList) {
ArrayList<Hash> hashList = strategy.calcHashes(curInstruct, matchSize, store);
Iterator<Hash> iter = hashList.iterator();
while(iter.hasNext()) {
while (iter.hasNext()) {
Hash curHash = iter.next();
DisambiguatorEntry curEntry = entryMap.get(curHash);
if (curEntry == null) {
curEntry = new DisambiguatorEntry(curHash,curInstruct);
curEntry = new DisambiguatorEntry(curHash, curInstruct);
entryMap.put(curHash, curEntry);
}
else
@ -247,24 +242,30 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
* @throws CancelledException
* @throws MemoryAccessException
*/
private int disambiguateNgramsWithStrategy(DisambiguateStrategy strategy,HashEntry srcEntry,HashEntry destEntry) throws CancelledException, MemoryAccessException {
private int disambiguateNgramsWithStrategy(DisambiguateStrategy strategy, HashEntry srcEntry,
HashEntry destEntry) throws CancelledException, MemoryAccessException {
TreeMap<Hash, DisambiguatorEntry> srcDisambig =
constructDisambiguatorTree(srcEntry, srcStore, strategy);
TreeMap<Hash, DisambiguatorEntry> destDisambig =
constructDisambiguatorTree(destEntry, destStore, strategy);
int count = 0;
Iterator<DisambiguatorEntry> iter = srcDisambig.values().iterator();
while(iter.hasNext()) {
while (iter.hasNext()) {
DisambiguatorEntry srcDisEntry = iter.next();
if (srcDisEntry.count != 1) continue;
if (srcDisEntry.count != 1)
continue;
// Its possible for this InstructHash to have been matched by an earlier DisambiguatorEntry
if (srcDisEntry.instruct.isMatched) continue;
if (srcDisEntry.instruct.isMatched)
continue;
DisambiguatorEntry destDisEntry = destDisambig.get(srcDisEntry.hash);
if (destDisEntry == null) continue;
if (destDisEntry.count != 1) continue;
if (destDisEntry.instruct.isMatched) continue;
if (destDisEntry == null)
continue;
if (destDisEntry.count != 1)
continue;
if (destDisEntry.instruct.isMatched)
continue;
// If both sides have exactly one matching InstructHash, call it a match
declareMatch(srcEntry,srcDisEntry.instruct,destEntry,destDisEntry.instruct);
declareMatch(srcEntry, srcDisEntry.instruct, destEntry, destDisEntry.instruct);
count += 1;
}
return count;
@ -278,21 +279,27 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
* @throws CancelledException
* @throws MemoryAccessException
*/
private boolean disambiguateMatchingNgrams(HashEntry srcEntry,HashEntry destEntry) throws CancelledException, MemoryAccessException {
private boolean disambiguateMatchingNgrams(HashEntry srcEntry, HashEntry destEntry)
throws CancelledException, MemoryAccessException {
if (srcEntry.hasDuplicateBlocks())
return false;
if (destEntry.hasDuplicateBlocks())
return false;
if (srcEntry.hash.size != destEntry.hash.size)
return false; // This likely never happens, because we know the hash values are equal
int count = disambiguateNgramsWithStrategy(new DisambiguateByParent(),srcEntry,destEntry);
if (count != 0) return true;
count = disambiguateNgramsWithStrategy(new DisambiguateByChild(),srcEntry,destEntry);
if (count != 0) return true;
count = disambiguateNgramsWithStrategy(new DisambiguateByBytes(),srcEntry,destEntry);
if (count != 0) return true;
count= disambiguateNgramsWithStrategy(new DisambiguateByParentWithOrder(),srcEntry,destEntry);
if (count != 0) return true;
int count = disambiguateNgramsWithStrategy(new DisambiguateByParent(), srcEntry, destEntry);
if (count != 0)
return true;
count = disambiguateNgramsWithStrategy(new DisambiguateByChild(), srcEntry, destEntry);
if (count != 0)
return true;
count = disambiguateNgramsWithStrategy(new DisambiguateByBytes(), srcEntry, destEntry);
if (count != 0)
return true;
count = disambiguateNgramsWithStrategy(new DisambiguateByParentWithOrder(), srcEntry,
destEntry);
if (count != 0)
return true;
return false;
}
@ -303,7 +310,7 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
* @throws CancelledException
*/
private void findMatches() throws MemoryAccessException, CancelledException {
while(!srcStore.isEmpty() && !destStore.isEmpty()) {
while (!srcStore.isEmpty() && !destStore.isEmpty()) {
HashEntry srcEntry = srcStore.getFirstEntry();
HashEntry destEntry = destStore.getEntry(srcEntry.hash);
if (destEntry == null) {
@ -311,7 +318,8 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
}
else if (srcEntry.instList.size() == 1 && destEntry.instList.size() == 1) {
// Found a unique match
declareMatch(srcEntry,srcEntry.instList.getFirst(),destEntry,destEntry.instList.getFirst());
declareMatch(srcEntry, srcEntry.instList.getFirst(), destEntry,
destEntry.instList.getFirst());
}
else {
HashEntry destEntry2 = destStore.getFirstEntry();
@ -321,14 +329,15 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
}
else if (srcEntry2.instList.size() == 1 && destEntry2.instList.size() == 1) {
// Found a unique match
declareMatch(srcEntry2,srcEntry2.instList.getFirst(),destEntry2,destEntry2.instList.getFirst());
declareMatch(srcEntry2, srcEntry2.instList.getFirst(), destEntry2,
destEntry2.instList.getFirst());
}
else {
if (!disambiguateMatchingNgrams(srcEntry, destEntry))
srcStore.removeHash(srcEntry);
}
}
}
}
}
/**
@ -344,21 +353,24 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
* @throws MemoryAccessException
* @throws CancelledException
*/
private void runPasses(int minLength,int maxLength,boolean wholeBlock,boolean matchBlock,int maxPasses) throws MemoryAccessException, CancelledException {
private void runPasses(int minLength, int maxLength, boolean wholeBlock, boolean matchBlock,
int maxPasses) throws MemoryAccessException, CancelledException {
srcStore.calcHashes(minLength, maxLength, wholeBlock, matchBlock, hashCalc);
destStore.calcHashes(minLength, maxLength, wholeBlock, matchBlock, hashCalc);
for(int pass=0;pass < maxPasses;++pass) {
for (int pass = 0; pass < maxPasses; ++pass) {
int curMatch = srcStore.numMatchedInstructions();
if (curMatch == srcStore.getTotalInstructions()) break; // quit if there are no unmatched instructions
if (curMatch == srcStore.getTotalInstructions())
break; // quit if there are no unmatched instructions
srcStore.clearSort();
destStore.clearSort();
srcStore.insertHashes();
destStore.insertHashes();
findMatches();
if (curMatch == srcStore.numMatchedInstructions()) break; // quit if no new matched instructions
}
if (curMatch == srcStore.numMatchedInstructions())
break; // quit if no new matched instructions
}
}
/**
@ -377,33 +389,40 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
findMatches();
if (srcStore.numMatchedInstructions() == srcStore.getTotalInstructions()) return;
if (destStore.numMatchedInstructions() == destStore.getTotalInstructions()) return;
if (srcStore.numMatchedInstructions() == srcStore.getTotalInstructions())
return;
if (destStore.numMatchedInstructions() == destStore.getTotalInstructions())
return;
// Now try multiple passes of 3 and 4 long n-grams hopefully filling in a lot of small holes in our match
// given a scaffolding of previously matched basic blocks
runPasses(3,4,true,true,10);
runPasses(3, 4, true, true, 10);
if (srcStore.numMatchedInstructions() == srcStore.getTotalInstructions()) return;
if (destStore.numMatchedInstructions() == destStore.getTotalInstructions()) return;
if (srcStore.numMatchedInstructions() == srcStore.getTotalInstructions())
return;
if (destStore.numMatchedInstructions() == destStore.getTotalInstructions())
return;
// Repeat with big n-grams
int curMatch = srcStore.numMatchedInstructions();
runPasses(5,10,false,false,3);
runPasses(5, 10, false, false, 3);
if (srcStore.numMatchedInstructions() == curMatch)
return; // No progress
if (srcStore.numMatchedInstructions() == srcStore.getTotalInstructions())
return;
if (destStore.numMatchedInstructions() == destStore.getTotalInstructions())
return;
if (srcStore.numMatchedInstructions() == curMatch) return; // No progress
if (srcStore.numMatchedInstructions() == srcStore.getTotalInstructions()) return;
if (destStore.numMatchedInstructions() == destStore.getTotalInstructions()) return;
// Repeat with small n-grams
runPasses(3,4,true,true,10);
runPasses(3, 4, true, true, 10);
}
/**
* {@literal Given the src -> dest map, build the dest -> src map}
*/
private void buildFinalMaps() {
for(Entry<Address,Address> entry : srcToDest.entrySet()) {
for (Entry<Address, Address> entry : srcToDest.entrySet()) {
destToSrc.put(entry.getValue(), entry.getKey()); // Build the reverse map of srcToDest
}
}
@ -412,27 +431,20 @@ public class HashedFunctionAddressCorrelation implements FunctionAddressCorrelat
* Gets an iterator of the matching addresses from the first function to the second.
* @return the iterator
*/
public Iterator<Entry<Address,Address>> getFirstToSecondIterator() {
public Iterator<Entry<Address, Address>> getFirstToSecondIterator() {
return srcToDest.entrySet().iterator();
}
@Override
public Address getAddressInSecond(Address addressInFirst) {
return srcToDest.get(addressInFirst);
}
@Override
public Address getAddressInFirst(Address addressInSecond) {
return destToSrc.get(addressInSecond);
public Address getAddress(Side side, Address otherSideAddress) {
if (side == LEFT) {
return destToSrc.get(otherSideAddress);
}
return srcToDest.get(otherSideAddress);
}
@Override
public Function getFirstFunction() {
return srcFunction;
}
@Override
public Function getSecondFunction() {
return destFunction;
public Function getFunction(Side side) {
return functions.get(side);
}
}

View file

@ -15,25 +15,32 @@
*/
package ghidra.program.util;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.Program;
import ghidra.util.datastruct.Duo.Side;
/**
* This is the interface for a correlator that associates instructions from one function to
* instructions from another function. Given an address from one function it determines the matching
* address in the other function if possible.
*/
public interface FunctionAddressCorrelation extends ListingAddressCorrelation {
public class DummyListingAddressCorrelation implements ListingAddressCorrelation {
/**
* Gets the first function for this address correlator.
* @return the first function.
*/
public Function getFirstFunction();
@Override
public Program getProgram(Side side) {
return null;
}
/**
* Gets the second function for this address correlator.
* @return the second function.
*/
public Function getSecondFunction();
@Override
public Function getFunction(Side side) {
return null;
}
@Override
public AddressSetView getAddresses(Side side) {
return null;
}
@Override
public Address getAddress(Side side, Address otherSideAddress) {
return null;
}
}

View file

@ -17,54 +17,47 @@ package ghidra.program.util;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.Program;
import ghidra.util.datastruct.Duo.Side;
/**
* This is the interface for a correlator that associates addresses from one program with
* addresses from another program or it can associate addresses from one part of a program
* with addresses from another part of the same program. Given an address from the address set
* in the first program it determines the matching address from the address set for the second
* program if possible.
* with addresses from another part of the same program. Given an address from one program, it
* can provide the corresponding address for the other program. The two programs are referred to
* as the LEFT program and the RIGHT program. See {@link ghidra.util.datastruct.Duo.Side}
*/
public interface ListingAddressCorrelation {
/**
* Gets the program containing the first set of addresses.
* @return the program for the first set of addresses.
* Gets the program for the given side.
* @param side LEFT or RIGHT
* @return the program for the given side
*/
public abstract Program getFirstProgram();
public abstract Program getProgram(Side side);
/**
* Gets the program containing the second set of addresses.
* This program may be different from or the same as the first program.
* @return the program for the second set of addresses.
* Gets the function for the given side. This will be null if the addresses are not function
* based.
* @param side LEFT or RIGHT
* @return the function for the given side or null if not function based
*/
public abstract Program getSecondProgram();
public abstract Function getFunction(Side side);
/**
* Gets the first set of addresses for this correlator.
* @return the first set of addresses.
* Gets the addresses that are part of the correlator for the given side
* @param side LEFT or RIGHT
* @return the addresses that are part of the correlator for the given side
*/
public abstract AddressSetView getAddressesInFirst();
public abstract AddressSetView getAddresses(Side side);
/**
* Gets the second set of addresses for this correlator.
* @return the second set of addresses.
* Gets the address for the given side that matches the given address from the other side.
* @param side the side to get an address for
* @param otherSideAddress the address from the other side to find a match for
* @return the address for the given side that matches the given address from the other side.
*/
public abstract AddressSetView getAddressesInSecond();
/**
* Determine the address from the second set that matches the specified address in the first set.
* @param addressInFirst the address in the first address set.
* @return the matching address in the second set or null if a match couldn't be determined.
*/
public abstract Address getAddressInSecond(Address addressInFirst);
/**
* Determine the address from the first set that matches the specified address in the second set.
* @param addressInSecond the address in the second address set.
* @return the matching address in the first set or null if a match couldn't be determined.
*/
public abstract Address getAddressInFirst(Address addressInSecond);
public abstract Address getAddress(Side side, Address otherSideAddress);
}