mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-03 17:59:46 +02:00
GP-4400: ML extension improvements
This commit is contained in:
parent
47bd5a50cb
commit
efb837ef34
9 changed files with 316 additions and 109 deletions
|
@ -24,8 +24,7 @@ import ghidra.app.cmd.disassemble.DisassembleCommand;
|
|||
import ghidra.app.cmd.function.CreateFunctionCmd;
|
||||
import ghidra.app.script.GhidraScript;
|
||||
import ghidra.machinelearning.functionfinding.*;
|
||||
import ghidra.program.model.address.Address;
|
||||
import ghidra.program.model.address.AddressSet;
|
||||
import ghidra.program.model.address.*;
|
||||
import ghidra.program.model.block.BasicBlockModel;
|
||||
|
||||
//NOTE: This script is referenced by name in the help for the
|
||||
|
@ -131,7 +130,7 @@ public class FindFunctionsRFExampleScript extends GhidraScript {
|
|||
new GetAddressesToClassifyTask(currentProgram, minUndefinedRange);
|
||||
getAddressTask.run(monitor);
|
||||
|
||||
AddressSet toClassify = getAddressTask.getAddressesToClassify();
|
||||
AddressSetView toClassify = getAddressTask.getAddressesToClassify();
|
||||
|
||||
Map<Address, Double> potentialStarts = classifier.classify(toClassify, monitor);
|
||||
|
||||
|
|
|
@ -111,6 +111,11 @@
|
|||
of Starts</A> field doesn't cause all starts to be used for training (leaving
|
||||
none for testing).</P>
|
||||
|
||||
<H4><A name="MinimumUndefRangeSize"></A> Minimum Undefined Range Size </H4>
|
||||
<P> This value is the minimum size of an undefined address range that will be considered when
|
||||
applying the model to a program. Defaults to the value stored in the plugin options, see
|
||||
<A href="#MinLengthUndefinedRange">Minimum Length of Undefined Ranges to Search</A>. </P>
|
||||
|
||||
<H4><A name="RestrictSearchToAlignedAddresses"></A> Restrict Search to Aligned Addresses </H4>
|
||||
<P> If this is checked, only addresses which are zero modulo the value in the
|
||||
<A href="#AlignmentModulus">Alignment Modulus</A> combo box are searched for function starts.
|
||||
|
@ -139,12 +144,16 @@
|
|||
options). The results are displayed in a
|
||||
<A href="#FunctionStartTable"> Function Start Table</A>. </P>
|
||||
|
||||
<H4><A name="ApplyModelTo"></A> Apply Model To... Action </H4>
|
||||
<H4><A name="ApplyModelToOtherProgram"></A> Apply Model To Other Program... Action </H4>
|
||||
<P> This action will open a dialog to select another program in the current project and
|
||||
then apply the model to it. Note that the only check that the model is compatible with
|
||||
the selected program is that any context registers specified when training must be
|
||||
present in the selected program. </P>
|
||||
|
||||
<H4><A name="ApplyModelToSelection"></A> Apply Model To Selection Action </H4>
|
||||
<P> This action will apply the model to the current selection in the program used to train it.
|
||||
</P>
|
||||
|
||||
<H4><A name="DebugModel"></A> Debug Model Action </H4>
|
||||
<P> This action will display a <A href="#DebugModelTable"> Debug Model Table</A>, which shows
|
||||
all of the errors encountered when applying the model to its test set. </P>
|
||||
|
|
|
@ -36,7 +36,7 @@ import ghidra.app.services.ProgramManager;
|
|||
import ghidra.framework.main.ProgramFileChooser;
|
||||
import ghidra.framework.model.DomainFile;
|
||||
import ghidra.framework.preferences.Preferences;
|
||||
import ghidra.program.model.address.AddressSet;
|
||||
import ghidra.program.model.address.AddressSetView;
|
||||
import ghidra.program.model.listing.*;
|
||||
import ghidra.util.HelpLocation;
|
||||
import ghidra.util.Msg;
|
||||
|
@ -97,6 +97,10 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
private static final String ALIGNMENT_MODULUS_TIP =
|
||||
"Use to define the alignment for restricted search";
|
||||
|
||||
private static final String MIN_UNDEFINED_RANGE_SIZE_TEXT = "Minimum Undefined Range Size";
|
||||
private static final String MIN_UNDEFINED_RANGE_SIZE_TIP =
|
||||
"Minimum size of an undefined range of addresses to search over for function starts";
|
||||
|
||||
private static final String DEFAULT_INITIAL_BYTES = "8,16";
|
||||
private static final String INITIAL_BYTES_PROPERTY = "functionStartRFParams_initialBytes";
|
||||
private static final String DEFAULT_PRE_BYTES = "2,8";
|
||||
|
@ -123,16 +127,22 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
|
||||
private static final String APPLY_MODEL_ACTION_NAME = "ApplyModel";
|
||||
private static final String APPLY_MODEL_MENU_TEXT = "Apply Model";
|
||||
private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelTo";
|
||||
private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To...";
|
||||
private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelToOtherProgram";
|
||||
private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To Other Program...";
|
||||
private static final String APPLY_MODEL_SELECTION_ACTION_NAME = "ApplyModelToSelection";
|
||||
private static final String APPLY_MODEL_SELECTION_MENU_TEXT = "Apply Model To Selection";
|
||||
private static final String DEBUG_MODEL_ACTION_NAME = "DebugModel";
|
||||
private static final String DEBUG_MODEL_MENU_TEXT = "DEBUG - Show test set errors";
|
||||
|
||||
private static final String ACTION_GROUP_APPLY_LOCAL = "A0_ApplyLocal";
|
||||
private static final String ACTION_GROUP_APPLY_OTHER = "A1_ApplyOther";
|
||||
|
||||
private JTextField initialBytesField;
|
||||
private JTextField preBytesField;
|
||||
private JTextField factorField;
|
||||
private IntegerTextField minimumSizeField;
|
||||
private IntegerTextField maxStartsField;
|
||||
private IntegerTextField minUndefRangeField;
|
||||
private JTextField contextRegistersField;
|
||||
private JLabel numFuncsField;
|
||||
private JScrollPane tableScrollPane;
|
||||
|
@ -284,19 +294,35 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
.popupWhen(c -> trainingSource != null)
|
||||
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
|
||||
.popupMenuPath(APPLY_MODEL_MENU_TEXT)
|
||||
.popupMenuGroup(ACTION_GROUP_APPLY_LOCAL)
|
||||
.inWindow(ActionBuilder.When.ALWAYS)
|
||||
.onAction(c -> {
|
||||
searchTrainingProgram(tableModel.getLastSelectedObjects().get(0));
|
||||
searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), false);
|
||||
})
|
||||
.build();
|
||||
addAction(applyAction);
|
||||
|
||||
DockingAction applySelectionAction =
|
||||
new ActionBuilder(APPLY_MODEL_SELECTION_ACTION_NAME, plugin.getName())
|
||||
.description("Apply Model to Current Program Selection")
|
||||
.popupWhen(c -> trainingSource != null)
|
||||
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
|
||||
.popupMenuPath(APPLY_MODEL_SELECTION_MENU_TEXT)
|
||||
.popupMenuGroup(ACTION_GROUP_APPLY_LOCAL)
|
||||
.inWindow(ActionBuilder.When.ALWAYS)
|
||||
.onAction(c -> {
|
||||
searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), true);
|
||||
})
|
||||
.build();
|
||||
addAction(applySelectionAction);
|
||||
|
||||
DockingAction applyToAction =
|
||||
new ActionBuilder(APPLY_MODEL_TO_ACTION_NAME, plugin.getName())
|
||||
.description("Choose Program and Apply Model to it")
|
||||
.popupWhen(c -> trainingSource != null)
|
||||
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
|
||||
.popupMenuPath(APPLY_MODEL_TO_MENU_TEXT)
|
||||
.popupMenuGroup(ACTION_GROUP_APPLY_OTHER)
|
||||
.inWindow(ActionBuilder.When.ALWAYS)
|
||||
.onAction(c -> {
|
||||
searchOtherProgram(tableModel.getLastSelectedObjects().get(0));
|
||||
|
@ -309,6 +335,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
.popupWhen(c -> trainingSource != null)
|
||||
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
|
||||
.popupMenuPath(DEBUG_MODEL_MENU_TEXT)
|
||||
.popupMenuGroup(ACTION_GROUP_APPLY_OTHER)
|
||||
.inWindow(ActionBuilder.When.ALWAYS)
|
||||
.onAction(c -> {
|
||||
showTestErrors(tableModel.getLastSelectedObjects().get(0));
|
||||
|
@ -406,6 +433,14 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
updateNumFuncsField();
|
||||
funcDataPanel.add(numFuncsField);
|
||||
|
||||
JLabel minUndefRangeLabel = new GDLabel(MIN_UNDEFINED_RANGE_SIZE_TEXT);
|
||||
minUndefRangeLabel.setToolTipText(MIN_UNDEFINED_RANGE_SIZE_TIP);
|
||||
funcDataPanel.add(minUndefRangeLabel);
|
||||
minUndefRangeField = new IntegerTextField();
|
||||
minUndefRangeField.setAllowNegativeValues(false);
|
||||
minUndefRangeField.setValue(plugin.getMinUndefinedRangeSize());
|
||||
funcDataPanel.add(minUndefRangeField.getComponent());
|
||||
|
||||
JLabel restrictLabel = new GDLabel(RESTRICT_SEARCH_TEXT);
|
||||
restrictLabel.setToolTipText(RESTRICT_SEARCH_TIP);
|
||||
funcDataPanel.add(restrictLabel);
|
||||
|
@ -478,8 +513,8 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
numFuncsField.setText(Integer.toString(numFuncs));
|
||||
}
|
||||
|
||||
private void searchTrainingProgram(RandomForestRowObject modelRow) {
|
||||
searchProgram(trainingSource, modelRow);
|
||||
private void searchTrainingProgram(RandomForestRowObject modelRow, boolean useSelection) {
|
||||
searchProgram(trainingSource, modelRow, useSelection);
|
||||
}
|
||||
|
||||
private void searchOtherProgram(RandomForestRowObject modelRow) {
|
||||
|
@ -499,7 +534,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
" is not compatible with training source program " + trainingSource.getName());
|
||||
return;
|
||||
}
|
||||
searchProgram(p, modelRow);
|
||||
searchProgram(p, modelRow, false);
|
||||
}
|
||||
|
||||
private void showTestErrors(RandomForestRowObject modelRow) {
|
||||
|
@ -508,21 +543,32 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
|
|||
addGeneralActions(provider, trainingSource);
|
||||
}
|
||||
|
||||
private void searchProgram(Program targetProgram, RandomForestRowObject modelRow) {
|
||||
GetAddressesToClassifyTask getTask =
|
||||
new GetAddressesToClassifyTask(targetProgram, plugin.getMinUndefinedRangeSize());
|
||||
private void searchProgram(Program targetProgram, RandomForestRowObject modelRow,
|
||||
boolean useSelection) {
|
||||
|
||||
GetAddressesToClassifyTask getTask = null;
|
||||
if (useSelection) {
|
||||
getTask =
|
||||
new GetAddressesToClassifyTask(targetProgram, 1, plugin.getProgramSelection());
|
||||
}
|
||||
else {
|
||||
getTask =
|
||||
new GetAddressesToClassifyTask(targetProgram, minUndefRangeField.getLongValue());
|
||||
}
|
||||
|
||||
//don't want to use the dialog's progress bar
|
||||
TaskLauncher.launchModal("Gathering Addresses To Classify", getTask);
|
||||
if (getTask.isCancelled()) {
|
||||
return;
|
||||
}
|
||||
AddressSet execNonFunc = null;
|
||||
AddressSetView execNonFunc = null;
|
||||
if (restrictBox.isSelected()) {
|
||||
execNonFunc = getTask.getAddressesToClassify((long) modBox.getSelectedItem());
|
||||
}
|
||||
else {
|
||||
execNonFunc = getTask.getAddressesToClassify();
|
||||
}
|
||||
|
||||
FunctionStartTableProvider provider =
|
||||
new FunctionStartTableProvider(plugin, targetProgram, execNonFunc, modelRow, false);
|
||||
addGeneralActions(provider, targetProgram);
|
||||
|
|
|
@ -24,8 +24,9 @@ import ghidra.docking.settings.Settings;
|
|||
import ghidra.framework.plugintool.PluginTool;
|
||||
import ghidra.framework.plugintool.ServiceProvider;
|
||||
import ghidra.program.model.address.Address;
|
||||
import ghidra.program.model.address.AddressSet;
|
||||
import ghidra.program.model.address.AddressSetView;
|
||||
import ghidra.program.model.block.BasicBlockModel;
|
||||
import ghidra.program.model.listing.Instruction;
|
||||
import ghidra.program.model.listing.Program;
|
||||
import ghidra.util.datastruct.Accumulator;
|
||||
import ghidra.util.exception.CancelledException;
|
||||
|
@ -38,7 +39,7 @@ import ghidra.util.task.TaskMonitor;
|
|||
*/
|
||||
public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStartRowObject> {
|
||||
private RandomForestRowObject modelRow;
|
||||
private AddressSet addressesToClassify;
|
||||
private AddressSetView addressesToClassify;
|
||||
private boolean debug;
|
||||
private BasicBlockModel blockModel;
|
||||
private Map<Address, Double> addressToProbability;
|
||||
|
@ -54,7 +55,7 @@ public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStar
|
|||
* @param modelRow trained model info
|
||||
* @param debug is table displaying debug data
|
||||
*/
|
||||
public FunctionStartTableModel(PluginTool plugin, Program program, AddressSet toClassify,
|
||||
public FunctionStartTableModel(PluginTool plugin, Program program, AddressSetView toClassify,
|
||||
RandomForestRowObject modelRow, boolean debug) {
|
||||
super(program.getName(), plugin, program, null, false);
|
||||
this.modelRow = modelRow;
|
||||
|
@ -100,6 +101,7 @@ public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStar
|
|||
descriptor.addVisibleColumn(new AddressTableColumn());
|
||||
descriptor.addVisibleColumn(new ProbabilityTableColumn(), 0, false);
|
||||
descriptor.addVisibleColumn(new InterpretationTableColumn());
|
||||
descriptor.addVisibleColumn(new FallthroughTableColumn());
|
||||
descriptor.addVisibleColumn(new DataReferencesTableColumn());
|
||||
descriptor.addVisibleColumn(new UnconditionalFlowReferencesTableColumn());
|
||||
descriptor.addVisibleColumn(new ConditionalFlowReferencesTableColumn());
|
||||
|
@ -173,6 +175,32 @@ public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStar
|
|||
}
|
||||
}
|
||||
|
||||
private class FallthroughTableColumn
|
||||
extends AbstractDynamicTableColumn<FunctionStartRowObject, Boolean, Object> {
|
||||
|
||||
@Override
|
||||
public String getColumnName() {
|
||||
return "Is Fallthrough Target";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean getValue(FunctionStartRowObject rowObject, Settings settings, Object data,
|
||||
ServiceProvider services) throws IllegalArgumentException {
|
||||
Instruction preInstr =
|
||||
program.getListing().getInstructionBefore(rowObject.getAddress());
|
||||
if (preInstr == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!preInstr.hasFallthrough()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return preInstr.getFallThrough().equals(rowObject.getAddress());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class DataReferencesTableColumn
|
||||
extends AbstractDynamicTableColumn<FunctionStartRowObject, Integer, Object> {
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import javax.swing.*;
|
|||
import ghidra.framework.model.DomainObjectChangedEvent;
|
||||
import ghidra.framework.model.DomainObjectListener;
|
||||
import ghidra.framework.plugintool.ComponentProviderAdapter;
|
||||
import ghidra.program.model.address.AddressSet;
|
||||
import ghidra.program.model.address.AddressSetView;
|
||||
import ghidra.program.model.listing.Program;
|
||||
import ghidra.util.HelpLocation;
|
||||
import ghidra.util.table.*;
|
||||
|
@ -42,7 +42,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid
|
|||
private RandomForestFunctionFinderPlugin plugin;
|
||||
private Program program;
|
||||
private RandomForestRowObject modelRow;
|
||||
private AddressSet toClassify;
|
||||
private AddressSetView toClassify;
|
||||
private boolean debug;
|
||||
private String subTitle;
|
||||
private GhidraTable startTable;
|
||||
|
@ -59,7 +59,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid
|
|||
* @param debug whether to display debug version of table
|
||||
*/
|
||||
public FunctionStartTableProvider(RandomForestFunctionFinderPlugin plugin, Program program,
|
||||
AddressSet toClassify, RandomForestRowObject modelRow, boolean debug) {
|
||||
AddressSetView toClassify, RandomForestRowObject modelRow, boolean debug) {
|
||||
super(
|
||||
debug ? "Debug: Test Set Errors in " + program.getDomainFile().getPathname()
|
||||
: "Potential Functions in " + program.getDomainFile().getPathname(),
|
||||
|
|
|
@ -17,6 +17,7 @@ package ghidra.machinelearning.functionfinding;
|
|||
|
||||
import ghidra.program.model.address.*;
|
||||
import ghidra.program.model.listing.*;
|
||||
import ghidra.program.util.ProgramSelection;
|
||||
import ghidra.util.exception.CancelledException;
|
||||
import ghidra.util.task.Task;
|
||||
import ghidra.util.task.TaskMonitor;
|
||||
|
@ -27,8 +28,25 @@ import ghidra.util.task.TaskMonitor;
|
|||
|
||||
public class GetAddressesToClassifyTask extends Task {
|
||||
private Program prog;
|
||||
private AddressSet execNonFunc;
|
||||
private AddressSetView execNonFunc;
|
||||
private long minUndefinedRangeSize;
|
||||
private ProgramSelection userSelection;
|
||||
|
||||
/**
|
||||
* Creates a {@link Task} that creates a set of addresses to check for function starts. The
|
||||
* {code minUndefinedRangeSize} parameter determines how large a run of undefined bytes must be
|
||||
* to be checked for function starts
|
||||
* @param prog source program
|
||||
* @param minUndefinedRangeSize minimum size of undefined range
|
||||
* @param selection program selection used to filter addresses
|
||||
*/
|
||||
public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize,
|
||||
ProgramSelection selection) {
|
||||
super("Gathering Addresses to Classify", true, true, false, false);
|
||||
this.prog = prog;
|
||||
this.minUndefinedRangeSize = minUndefinedRangeSize;
|
||||
this.userSelection = selection;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link Task} that creates a set of addresses to check for function starts. The
|
||||
|
@ -38,25 +56,29 @@ public class GetAddressesToClassifyTask extends Task {
|
|||
* @param minUndefinedRangeSize minimum size of undefined range
|
||||
*/
|
||||
public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize) {
|
||||
super("Gathering Addresses to Classify", true, true, false, false);
|
||||
this.prog = prog;
|
||||
this.minUndefinedRangeSize = minUndefinedRangeSize;
|
||||
this(prog, minUndefinedRangeSize, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run(TaskMonitor monitor) throws CancelledException {
|
||||
execNonFunc = new AddressSet();
|
||||
AddressSetView executable = prog.getMemory().getExecuteSet();
|
||||
AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet();
|
||||
execNonFunc = executable.intersect(initialized);
|
||||
monitor.initialize(prog.getFunctionManager().getFunctionCount());
|
||||
FunctionIterator fIter = prog.getFunctionManager().getFunctions(true);
|
||||
while (fIter.hasNext()) {
|
||||
monitor.checkCancelled();
|
||||
monitor.incrementProgress(1);
|
||||
Function func = fIter.next();
|
||||
execNonFunc = execNonFunc.subtract(func.getBody());
|
||||
if (userSelection != null) {
|
||||
execNonFunc = userSelection;
|
||||
}
|
||||
else {
|
||||
AddressSetView executable = prog.getMemory().getExecuteSet();
|
||||
AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet();
|
||||
execNonFunc = executable.intersect(initialized);
|
||||
monitor.initialize(prog.getFunctionManager().getFunctionCount());
|
||||
FunctionIterator fIter = prog.getFunctionManager().getFunctions(true);
|
||||
while (fIter.hasNext()) {
|
||||
monitor.checkCancelled();
|
||||
monitor.incrementProgress(1);
|
||||
Function func = fIter.next();
|
||||
execNonFunc = execNonFunc.subtract(func.getBody());
|
||||
}
|
||||
}
|
||||
|
||||
//remove small undefined ranges to avoid (for example) searching for
|
||||
//function starts in an address range of length 3 between two known
|
||||
//functions. "small" is controlled by a plugin option.
|
||||
|
@ -77,7 +99,7 @@ public class GetAddressesToClassifyTask extends Task {
|
|||
* Returns the set of addresses to classify
|
||||
* @return addresses
|
||||
*/
|
||||
public AddressSet getAddressesToClassify() {
|
||||
public AddressSetView getAddressesToClassify() {
|
||||
return execNonFunc;
|
||||
}
|
||||
|
||||
|
@ -87,7 +109,7 @@ public class GetAddressesToClassifyTask extends Task {
|
|||
* @param modulus alignment modulus
|
||||
* @return aligned addresses
|
||||
*/
|
||||
public AddressSet getAddressesToClassify(long modulus) {
|
||||
public AddressSetView getAddressesToClassify(long modulus) {
|
||||
AddressSet aligned = new AddressSet();
|
||||
for (Address a : execNonFunc.getAddresses(true)) {
|
||||
if (a.getOffset() % modulus == 0) {
|
||||
|
|
|
@ -18,6 +18,7 @@ package ghidra.machinelearning.functionfinding;
|
|||
import java.util.*;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import ghidra.pcodeCPort.utils.MutableLong;
|
||||
import ghidra.program.model.address.*;
|
||||
import ghidra.util.exception.CancelledException;
|
||||
import ghidra.util.task.TaskMonitor;
|
||||
|
@ -43,25 +44,32 @@ public class RandomSubsetUtils {
|
|||
* @return random subset of size k
|
||||
* @throws CancelledException if monitor is canceled
|
||||
*/
|
||||
public static AddressSet randomSubset(AddressSetView addresses, long k, TaskMonitor monitor)
|
||||
throws CancelledException {
|
||||
List<Long> sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k);
|
||||
Collections.sort(sortedRandom);
|
||||
public static AddressSet randomSubset(AddressSetView addresses, long k,
|
||||
TaskMonitor monitor) throws CancelledException {
|
||||
long[] sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k);
|
||||
Arrays.sort(sortedRandom);
|
||||
AddressSet randomAddresses = new AddressSet();
|
||||
AddressIterator iter = addresses.getAddresses(true);
|
||||
int addressesAdded = 0;
|
||||
int addressesVisited = 0;
|
||||
|
||||
long addressesVisited = 0;
|
||||
int listIndex = 0;
|
||||
while (iter.hasNext() && addressesAdded < k) {
|
||||
monitor.checkCancelled();
|
||||
Address addr = iter.next();
|
||||
if (sortedRandom.get(listIndex) == addressesVisited) {
|
||||
for (AddressRange range : addresses) {
|
||||
long rangeEnd = addressesVisited + range.getLength();
|
||||
for (; listIndex < k; listIndex++) {
|
||||
monitor.checkCancelled();
|
||||
long next = sortedRandom[listIndex];
|
||||
if (next >= rangeEnd) {
|
||||
// Next address is outside of this range
|
||||
break;
|
||||
}
|
||||
Address addr = range.getMinAddress().add(next - addressesVisited);
|
||||
randomAddresses.add(addr);
|
||||
addressesAdded += 1;
|
||||
listIndex += 1;
|
||||
}
|
||||
addressesVisited += 1;
|
||||
if (listIndex == k) {
|
||||
break;
|
||||
}
|
||||
addressesVisited += range.getLength();
|
||||
}
|
||||
|
||||
return randomAddresses;
|
||||
}
|
||||
|
||||
|
@ -72,24 +80,34 @@ public class RandomSubsetUtils {
|
|||
* @param k size of random subset (must be >= 0)
|
||||
* @return list of indices of elements in random subset
|
||||
*/
|
||||
public static List<Long> generateRandomIntegerSubset(long n, long k) {
|
||||
public static long[] generateRandomIntegerSubset(long n, long k) {
|
||||
if (n < 0) {
|
||||
throw new IllegalArgumentException("n cannot be negative");
|
||||
}
|
||||
if (k < 0) {
|
||||
throw new IllegalArgumentException("k cannot be negative");
|
||||
}
|
||||
if (k > Integer.MAX_VALUE) {
|
||||
// Could probably just switch k to an int. Since we were using ArrayList before
|
||||
// that was already going to blow up if k > Integer.MAX_VALUE
|
||||
throw new IllegalArgumentException("k cannot exceed bounds of integer");
|
||||
}
|
||||
if (n < k) {
|
||||
throw new IllegalArgumentException(
|
||||
"size of subset (" + k + ") cannot be larger than size of set (" + n + ")");
|
||||
}
|
||||
Map<Long, Long> permutation = new HashMap<>();
|
||||
for (long i = 0; i < k; ++i) {
|
||||
|
||||
Map<Long, MutableLong> permutation = new HashMap<>();
|
||||
|
||||
for (long i = 0; i < k; i++) {
|
||||
swap(permutation, i, ThreadLocalRandom.current().nextLong(i, n));
|
||||
}
|
||||
List<Long> random = new ArrayList<>();
|
||||
for (long i = 0; i < k; i++) {
|
||||
random.add(permutation.getOrDefault(i, i));
|
||||
|
||||
long[] random = new long[(int) k];
|
||||
|
||||
for (int i = 0; i < k; i++) {
|
||||
random[i] =
|
||||
permutation.computeIfAbsent(Long.valueOf(i), key -> new MutableLong(key)).get();
|
||||
}
|
||||
return random;
|
||||
}
|
||||
|
@ -102,14 +120,15 @@ public class RandomSubsetUtils {
|
|||
* @param i index
|
||||
* @param j index
|
||||
*/
|
||||
public static void swap(Map<Long, Long> permutation, long i, long j) {
|
||||
public static void swap(Map<Long, MutableLong> permutation, long i, long j) {
|
||||
if (i == j) {
|
||||
return;
|
||||
}
|
||||
long ith = permutation.getOrDefault(i, i);
|
||||
long jth = permutation.getOrDefault(j, j);
|
||||
permutation.put(i, jth);
|
||||
permutation.put(j, ith);
|
||||
MutableLong ith = permutation.computeIfAbsent(i, key -> new MutableLong(key));
|
||||
MutableLong jth = permutation.computeIfAbsent(j, key -> new MutableLong(key));
|
||||
long temp = ith.get();
|
||||
ith.set(jth.get());
|
||||
jth.set(temp);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@ import java.util.List;
|
|||
|
||||
import docking.widgets.table.AbstractDynamicTableColumn;
|
||||
import docking.widgets.table.TableColumnDescriptor;
|
||||
import ghidra.app.util.PseudoDisassembler;
|
||||
import ghidra.app.util.PseudoInstruction;
|
||||
import ghidra.docking.settings.Settings;
|
||||
import ghidra.framework.plugintool.PluginTool;
|
||||
import ghidra.framework.plugintool.ServiceProvider;
|
||||
|
@ -75,6 +77,7 @@ public class SimilarStartsTableModel extends AddressBasedTableModel<SimilarStart
|
|||
descriptor.addVisibleColumn(new AddressTableColumn());
|
||||
descriptor.addVisibleColumn(new SimilarityTableColumn(), 1, false);
|
||||
descriptor.addVisibleColumn(new ByteStringTableColumn());
|
||||
descriptor.addVisibleColumn(new DisassemblyTableColumn());
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
|
@ -161,4 +164,61 @@ public class SimilarStartsTableModel extends AddressBasedTableModel<SimilarStart
|
|||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private class DisassemblyTableColumn
|
||||
extends AbstractDynamicTableColumn<SimilarStartRowObject, String, Object> {
|
||||
@Override
|
||||
public String getColumnName() {
|
||||
return "Disassembly";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getValue(SimilarStartRowObject rowObject, Settings settings, Object data,
|
||||
ServiceProvider services) throws IllegalArgumentException {
|
||||
PseudoDisassembler disasm = new PseudoDisassembler(program);
|
||||
|
||||
StringBuilder sb1 = new StringBuilder();
|
||||
try {
|
||||
Address addr = rowObject.funcStart().subtract(randomForestRow.getNumPreBytes());
|
||||
|
||||
while (addr.compareTo(rowObject.funcStart()) < 0) {
|
||||
PseudoInstruction instr = disasm.disassemble(addr);
|
||||
if (instr.isValid()) {
|
||||
sb1.append(instr.toString());
|
||||
sb1.append(" ");
|
||||
}
|
||||
else {
|
||||
sb1.append("? ");
|
||||
}
|
||||
addr = instr.getMaxAddress().add(1);
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
sb1 = new StringBuilder("?? ");
|
||||
}
|
||||
|
||||
StringBuilder sb2 = new StringBuilder();
|
||||
try {
|
||||
Address addr = rowObject.funcStart();
|
||||
while (addr.compareTo(
|
||||
rowObject.funcStart().add(randomForestRow.getNumInitialBytes())) < 0) {
|
||||
PseudoInstruction instr = disasm.disassemble(addr);
|
||||
if (instr.isValid()) {
|
||||
sb2.append(instr.toString());
|
||||
sb2.append(" ");
|
||||
}
|
||||
else {
|
||||
sb2.append("? ");
|
||||
}
|
||||
addr = instr.getMaxAddress().add(1);
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
sb2 = new StringBuilder("??");
|
||||
}
|
||||
|
||||
return sb1.toString() + "* " + sb2.toString();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -22,8 +22,8 @@ import java.util.*;
|
|||
import org.junit.Test;
|
||||
|
||||
import generic.test.AbstractGenericTest;
|
||||
import ghidra.program.model.address.AddressSet;
|
||||
import ghidra.program.model.address.TestAddress;
|
||||
import ghidra.pcodeCPort.utils.MutableLong;
|
||||
import ghidra.program.model.address.*;
|
||||
import ghidra.util.exception.CancelledException;
|
||||
import ghidra.util.task.TaskMonitor;
|
||||
|
||||
|
@ -31,64 +31,88 @@ public class RandomSubsetTest extends AbstractGenericTest {
|
|||
|
||||
@Test
|
||||
public void testGenerateTrivialSubsets() {
|
||||
List<Long> empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0);
|
||||
assertEquals(0, empty.size());
|
||||
long[] empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0);
|
||||
assertEquals(0, empty.length);
|
||||
empty = RandomSubsetUtils.generateRandomIntegerSubset(0, 0);
|
||||
assertEquals(0, empty.size());
|
||||
List<Long> complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000);
|
||||
Collections.sort(complete);
|
||||
Iterator<Long> iter = complete.iterator();
|
||||
long current = 0;
|
||||
while (iter.hasNext()) {
|
||||
long elem = iter.next();
|
||||
assertEquals(current++, elem);
|
||||
assertEquals(0, empty.length);
|
||||
long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000);
|
||||
Arrays.sort(complete);
|
||||
for (int current = 0; current < complete.length; current++) {
|
||||
long elem = complete[current];
|
||||
assertEquals(current, elem);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicRandomSubsetOfAddresses() throws CancelledException {
|
||||
// Check we are drawing unique addresses from the set
|
||||
AddressSet addrs = new AddressSet();
|
||||
for (long i = 0; i < 10000; ++i) {
|
||||
addrs.add(new TestAddress(i));
|
||||
}
|
||||
AddressSet rand = RandomSubsetUtils.randomSubset(addrs, 9998, TaskMonitor.DUMMY);
|
||||
assertEquals(9998, rand.getNumAddresses());
|
||||
|
||||
addrs.clear();
|
||||
|
||||
// Check we correctly draw from multiple non-contiguous ranges
|
||||
for (long i = 0; i < 10000; i += 1000) {
|
||||
if (i % 2000 != 0)
|
||||
continue;
|
||||
for (long j = i; j < i + 1000; j++) {
|
||||
addrs.add(new TestAddress(j));
|
||||
}
|
||||
}
|
||||
rand = RandomSubsetUtils.randomSubset(addrs, 4998, TaskMonitor.DUMMY);
|
||||
assertEquals(4998, rand.getNumAddresses());
|
||||
|
||||
for (Address addr : rand.getAddresses(true)) {
|
||||
assertTrue(addrs.contains(addr));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSwap() {
|
||||
Map<Long, Long> permuted = new HashMap<>();
|
||||
Map<Long, MutableLong> permuted = new HashMap<>();
|
||||
assertTrue(permuted.isEmpty());
|
||||
//should do nothing
|
||||
RandomSubsetUtils.swap(permuted, 1, 1);
|
||||
assertTrue(permuted.isEmpty());
|
||||
permuted.put(0l, 5l);
|
||||
permuted.put(1l, 10l);
|
||||
permuted.put(0l, new MutableLong(5l));
|
||||
permuted.put(1l, new MutableLong(10l));
|
||||
RandomSubsetUtils.swap(permuted, 0, 1);
|
||||
assertEquals(2, permuted.size());
|
||||
assertEquals(Long.valueOf(5), permuted.get(1l));
|
||||
assertEquals(Long.valueOf(10), permuted.get(0l));
|
||||
assertEquals(5l, permuted.get(1l).get());
|
||||
assertEquals(10l, permuted.get(0l).get());
|
||||
RandomSubsetUtils.swap(permuted, 100l, 200L);
|
||||
assertEquals(4, permuted.size());
|
||||
assertEquals(Long.valueOf(100), permuted.get(200l));
|
||||
assertEquals(Long.valueOf(200), permuted.get(100l));
|
||||
assertEquals(100l, permuted.get(200l).get());
|
||||
assertEquals(200l, permuted.get(100l).get());
|
||||
}
|
||||
|
||||
/**
|
||||
@Test
|
||||
public void timingTest() throws CancelledException {
|
||||
AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(999999));
|
||||
|
||||
long start = System.nanoTime();
|
||||
List<Long> complete = RandomSubset.generateRandomIntegerSubset(1000000, 500000);
|
||||
long end = System.nanoTime();
|
||||
Msg.info(this, "choosing random subset of integers: " +
|
||||
(end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
|
||||
start = System.nanoTime();
|
||||
AddressSet random = RandomSubset.randomSubset(big, 500000, TaskMonitor.DUMMY);
|
||||
end = System.nanoTime();
|
||||
Msg.info(this, "choosing random subset of addresses: " +
|
||||
(end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
|
||||
}*/
|
||||
// @Test
|
||||
// public void timingTest() throws CancelledException, InterruptedException {
|
||||
// AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(9999999));
|
||||
//
|
||||
// Thread.sleep(10000);
|
||||
// long start;
|
||||
// long end;
|
||||
//
|
||||
// for (int i = 0; i < 10; i++) {
|
||||
// start = System.nanoTime();
|
||||
// long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(10000000, 5000000);
|
||||
// end = System.nanoTime();
|
||||
// Msg.info(this, "choosing random subset of integers: " +
|
||||
// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
|
||||
// }
|
||||
//
|
||||
// for (int i = 0; i < 10; i++) {
|
||||
// start = System.nanoTime();
|
||||
// AddressSet random1 = RandomSubsetUtils.randomSubset(big, 5000000, TaskMonitor.DUMMY);
|
||||
// end = System.nanoTime();
|
||||
// Msg.info(this, "choosing random subset of addresses: " +
|
||||
// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
|
||||
// }
|
||||
// }
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue