From efb837ef34845b8885dcabcd263d2ce2fb5a96eb Mon Sep 17 00:00:00 2001
From: ghintern
Date: Tue, 24 Jun 2025 16:14:40 +0000
Subject: [PATCH 1/2] GP-4400: ML extension improvements
---
.../FindFunctionsRFExampleScript.java | 5 +-
.../RandomForestFunctionFinderPlugin.htm | 11 ++-
.../FunctionStartRFParamsDialog.java | 68 ++++++++++---
.../FunctionStartTableModel.java | 38 ++++++-
.../FunctionStartTableProvider.java | 10 +-
.../GetAddressesToClassifyTask.java | 58 +++++++----
.../functionfinding/RandomSubsetUtils.java | 73 +++++++++-----
.../SimilarStartsTableModel.java | 64 +++++++++++-
.../functionfinding/RandomSubsetTest.java | 98 ++++++++++++-------
9 files changed, 316 insertions(+), 109 deletions(-)
diff --git a/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java b/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java
index f44bb261bb..36ae82d9c8 100644
--- a/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java
+++ b/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java
@@ -24,8 +24,7 @@ import ghidra.app.cmd.disassemble.DisassembleCommand;
import ghidra.app.cmd.function.CreateFunctionCmd;
import ghidra.app.script.GhidraScript;
import ghidra.machinelearning.functionfinding.*;
-import ghidra.program.model.address.Address;
-import ghidra.program.model.address.AddressSet;
+import ghidra.program.model.address.*;
import ghidra.program.model.block.BasicBlockModel;
//NOTE: This script is referenced by name in the help for the
@@ -131,7 +130,7 @@ public class FindFunctionsRFExampleScript extends GhidraScript {
new GetAddressesToClassifyTask(currentProgram, minUndefinedRange);
getAddressTask.run(monitor);
- AddressSet toClassify = getAddressTask.getAddressesToClassify();
+ AddressSetView toClassify = getAddressTask.getAddressesToClassify();
Map potentialStarts = classifier.classify(toClassify, monitor);
diff --git a/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm b/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm
index d99f9b8772..334fd2bd80 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm
+++ b/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm
@@ -110,6 +110,11 @@
this to ensure that the value in Maximum Number
of Starts field doesn't cause all starts to be used for training (leaving
none for testing).
+
+ Minimum Undefined Range Size
+ This value is the minimum size of an undefined address range that will be considered when
+ applying the model to a program. Defaults to the value stored in the plugin options, see
+ Minimum Length of Undefined Ranges to Search.
Restrict Search to Aligned Addresses
If this is checked, only addresses which are zero modulo the value in the
@@ -139,11 +144,15 @@
options). The results are displayed in a
Function Start Table.
- Apply Model To... Action
+ Apply Model To Other Program... Action
This action will open a dialog to select another program in the current project and
then apply the model to it. Note that the only check that the model is compatible with
the selected program is that any context registers specified when training must be
present in the selected program.
+
+ Apply Model To Selection Action
+ This action will apply the model to the current selection in the program used to train it.
+
Debug Model Action
This action will display a Debug Model Table, which shows
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java
index 85900e10f1..e8b89eb6d9 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java
@@ -36,7 +36,7 @@ import ghidra.app.services.ProgramManager;
import ghidra.framework.main.ProgramFileChooser;
import ghidra.framework.model.DomainFile;
import ghidra.framework.preferences.Preferences;
-import ghidra.program.model.address.AddressSet;
+import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.*;
import ghidra.util.HelpLocation;
import ghidra.util.Msg;
@@ -97,6 +97,10 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
private static final String ALIGNMENT_MODULUS_TIP =
"Use to define the alignment for restricted search";
+ private static final String MIN_UNDEFINED_RANGE_SIZE_TEXT = "Minimum Undefined Range Size";
+ private static final String MIN_UNDEFINED_RANGE_SIZE_TIP =
+ "Minimum size of an undefined range of addresses to search over for function starts";
+
private static final String DEFAULT_INITIAL_BYTES = "8,16";
private static final String INITIAL_BYTES_PROPERTY = "functionStartRFParams_initialBytes";
private static final String DEFAULT_PRE_BYTES = "2,8";
@@ -123,16 +127,22 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
private static final String APPLY_MODEL_ACTION_NAME = "ApplyModel";
private static final String APPLY_MODEL_MENU_TEXT = "Apply Model";
- private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelTo";
- private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To...";
+ private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelToOtherProgram";
+ private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To Other Program...";
+ private static final String APPLY_MODEL_SELECTION_ACTION_NAME = "ApplyModelToSelection";
+ private static final String APPLY_MODEL_SELECTION_MENU_TEXT = "Apply Model To Selection";
private static final String DEBUG_MODEL_ACTION_NAME = "DebugModel";
private static final String DEBUG_MODEL_MENU_TEXT = "DEBUG - Show test set errors";
+ private static final String ACTION_GROUP_APPLY_LOCAL = "A0_ApplyLocal";
+ private static final String ACTION_GROUP_APPLY_OTHER = "A1_ApplyOther";
+
private JTextField initialBytesField;
private JTextField preBytesField;
private JTextField factorField;
private IntegerTextField minimumSizeField;
private IntegerTextField maxStartsField;
+ private IntegerTextField minUndefRangeField;
private JTextField contextRegistersField;
private JLabel numFuncsField;
private JScrollPane tableScrollPane;
@@ -284,19 +294,35 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(APPLY_MODEL_MENU_TEXT)
+ .popupMenuGroup(ACTION_GROUP_APPLY_LOCAL)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
- searchTrainingProgram(tableModel.getLastSelectedObjects().get(0));
+ searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), false);
})
.build();
addAction(applyAction);
+ DockingAction applySelectionAction =
+ new ActionBuilder(APPLY_MODEL_SELECTION_ACTION_NAME, plugin.getName())
+ .description("Apply Model to Current Program Selection")
+ .popupWhen(c -> trainingSource != null)
+ .enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
+ .popupMenuPath(APPLY_MODEL_SELECTION_MENU_TEXT)
+ .popupMenuGroup(ACTION_GROUP_APPLY_LOCAL)
+ .inWindow(ActionBuilder.When.ALWAYS)
+ .onAction(c -> {
+ searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), true);
+ })
+ .build();
+ addAction(applySelectionAction);
+
DockingAction applyToAction =
new ActionBuilder(APPLY_MODEL_TO_ACTION_NAME, plugin.getName())
.description("Choose Program and Apply Model to it")
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(APPLY_MODEL_TO_MENU_TEXT)
+ .popupMenuGroup(ACTION_GROUP_APPLY_OTHER)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
searchOtherProgram(tableModel.getLastSelectedObjects().get(0));
@@ -309,6 +335,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(DEBUG_MODEL_MENU_TEXT)
+ .popupMenuGroup(ACTION_GROUP_APPLY_OTHER)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
showTestErrors(tableModel.getLastSelectedObjects().get(0));
@@ -406,6 +433,14 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
updateNumFuncsField();
funcDataPanel.add(numFuncsField);
+ JLabel minUndefRangeLabel = new GDLabel(MIN_UNDEFINED_RANGE_SIZE_TEXT);
+ minUndefRangeLabel.setToolTipText(MIN_UNDEFINED_RANGE_SIZE_TIP);
+ funcDataPanel.add(minUndefRangeLabel);
+ minUndefRangeField = new IntegerTextField();
+ minUndefRangeField.setAllowNegativeValues(false);
+ minUndefRangeField.setValue(plugin.getMinUndefinedRangeSize());
+ funcDataPanel.add(minUndefRangeField.getComponent());
+
JLabel restrictLabel = new GDLabel(RESTRICT_SEARCH_TEXT);
restrictLabel.setToolTipText(RESTRICT_SEARCH_TIP);
funcDataPanel.add(restrictLabel);
@@ -478,8 +513,8 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
numFuncsField.setText(Integer.toString(numFuncs));
}
- private void searchTrainingProgram(RandomForestRowObject modelRow) {
- searchProgram(trainingSource, modelRow);
+ private void searchTrainingProgram(RandomForestRowObject modelRow, boolean useSelection) {
+ searchProgram(trainingSource, modelRow, useSelection);
}
private void searchOtherProgram(RandomForestRowObject modelRow) {
@@ -499,7 +534,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
" is not compatible with training source program " + trainingSource.getName());
return;
}
- searchProgram(p, modelRow);
+ searchProgram(p, modelRow, false);
}
private void showTestErrors(RandomForestRowObject modelRow) {
@@ -508,21 +543,32 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
addGeneralActions(provider, trainingSource);
}
- private void searchProgram(Program targetProgram, RandomForestRowObject modelRow) {
- GetAddressesToClassifyTask getTask =
- new GetAddressesToClassifyTask(targetProgram, plugin.getMinUndefinedRangeSize());
+ private void searchProgram(Program targetProgram, RandomForestRowObject modelRow,
+ boolean useSelection) {
+
+ GetAddressesToClassifyTask getTask = null;
+ if (useSelection) {
+ getTask =
+ new GetAddressesToClassifyTask(targetProgram, 1, plugin.getProgramSelection());
+ }
+ else {
+ getTask =
+ new GetAddressesToClassifyTask(targetProgram, minUndefRangeField.getLongValue());
+ }
+
//don't want to use the dialog's progress bar
TaskLauncher.launchModal("Gathering Addresses To Classify", getTask);
if (getTask.isCancelled()) {
return;
}
- AddressSet execNonFunc = null;
+ AddressSetView execNonFunc = null;
if (restrictBox.isSelected()) {
execNonFunc = getTask.getAddressesToClassify((long) modBox.getSelectedItem());
}
else {
execNonFunc = getTask.getAddressesToClassify();
}
+
FunctionStartTableProvider provider =
new FunctionStartTableProvider(plugin, targetProgram, execNonFunc, modelRow, false);
addGeneralActions(provider, targetProgram);
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java
index 1fc8aebe6c..79082c6b26 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java
@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -24,8 +24,9 @@ import ghidra.docking.settings.Settings;
import ghidra.framework.plugintool.PluginTool;
import ghidra.framework.plugintool.ServiceProvider;
import ghidra.program.model.address.Address;
-import ghidra.program.model.address.AddressSet;
+import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.block.BasicBlockModel;
+import ghidra.program.model.listing.Instruction;
import ghidra.program.model.listing.Program;
import ghidra.util.datastruct.Accumulator;
import ghidra.util.exception.CancelledException;
@@ -38,7 +39,7 @@ import ghidra.util.task.TaskMonitor;
*/
public class FunctionStartTableModel extends AddressBasedTableModel {
private RandomForestRowObject modelRow;
- private AddressSet addressesToClassify;
+ private AddressSetView addressesToClassify;
private boolean debug;
private BasicBlockModel blockModel;
private Map addressToProbability;
@@ -54,7 +55,7 @@ public class FunctionStartTableModel extends AddressBasedTableModel {
+
+ @Override
+ public String getColumnName() {
+ return "Is Fallthrough Target";
+ }
+
+ @Override
+ public Boolean getValue(FunctionStartRowObject rowObject, Settings settings, Object data,
+ ServiceProvider services) throws IllegalArgumentException {
+ Instruction preInstr =
+ program.getListing().getInstructionBefore(rowObject.getAddress());
+ if (preInstr == null) {
+ return false;
+ }
+
+ if (!preInstr.hasFallthrough()) {
+ return false;
+ }
+
+ return preInstr.getFallThrough().equals(rowObject.getAddress());
+ }
+
+ }
+
private class DataReferencesTableColumn
extends AbstractDynamicTableColumn {
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java
index f83e1f59c4..81d2845a39 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java
@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -25,7 +25,7 @@ import javax.swing.*;
import ghidra.framework.model.DomainObjectChangedEvent;
import ghidra.framework.model.DomainObjectListener;
import ghidra.framework.plugintool.ComponentProviderAdapter;
-import ghidra.program.model.address.AddressSet;
+import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.Program;
import ghidra.util.HelpLocation;
import ghidra.util.table.*;
@@ -42,7 +42,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid
private RandomForestFunctionFinderPlugin plugin;
private Program program;
private RandomForestRowObject modelRow;
- private AddressSet toClassify;
+ private AddressSetView toClassify;
private boolean debug;
private String subTitle;
private GhidraTable startTable;
@@ -59,7 +59,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid
* @param debug whether to display debug version of table
*/
public FunctionStartTableProvider(RandomForestFunctionFinderPlugin plugin, Program program,
- AddressSet toClassify, RandomForestRowObject modelRow, boolean debug) {
+ AddressSetView toClassify, RandomForestRowObject modelRow, boolean debug) {
super(
debug ? "Debug: Test Set Errors in " + program.getDomainFile().getPathname()
: "Potential Functions in " + program.getDomainFile().getPathname(),
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java
index daec84300d..837f8b0bad 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java
@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,6 +17,7 @@ package ghidra.machinelearning.functionfinding;
import ghidra.program.model.address.*;
import ghidra.program.model.listing.*;
+import ghidra.program.util.ProgramSelection;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.Task;
import ghidra.util.task.TaskMonitor;
@@ -27,8 +28,25 @@ import ghidra.util.task.TaskMonitor;
public class GetAddressesToClassifyTask extends Task {
private Program prog;
- private AddressSet execNonFunc;
+ private AddressSetView execNonFunc;
private long minUndefinedRangeSize;
+ private ProgramSelection userSelection;
+
+ /**
+ * Creates a {@link Task} that creates a set of addresses to check for function starts. The
+ * {code minUndefinedRangeSize} parameter determines how large a run of undefined bytes must be
+ * to be checked for function starts
+ * @param prog source program
+ * @param minUndefinedRangeSize minimum size of undefined range
+ * @param selection program selection used to filter addresses
+ */
+ public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize,
+ ProgramSelection selection) {
+ super("Gathering Addresses to Classify", true, true, false, false);
+ this.prog = prog;
+ this.minUndefinedRangeSize = minUndefinedRangeSize;
+ this.userSelection = selection;
+ }
/**
* Creates a {@link Task} that creates a set of addresses to check for function starts. The
@@ -38,25 +56,29 @@ public class GetAddressesToClassifyTask extends Task {
* @param minUndefinedRangeSize minimum size of undefined range
*/
public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize) {
- super("Gathering Addresses to Classify", true, true, false, false);
- this.prog = prog;
- this.minUndefinedRangeSize = minUndefinedRangeSize;
+ this(prog, minUndefinedRangeSize, null);
}
@Override
public void run(TaskMonitor monitor) throws CancelledException {
execNonFunc = new AddressSet();
- AddressSetView executable = prog.getMemory().getExecuteSet();
- AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet();
- execNonFunc = executable.intersect(initialized);
- monitor.initialize(prog.getFunctionManager().getFunctionCount());
- FunctionIterator fIter = prog.getFunctionManager().getFunctions(true);
- while (fIter.hasNext()) {
- monitor.checkCancelled();
- monitor.incrementProgress(1);
- Function func = fIter.next();
- execNonFunc = execNonFunc.subtract(func.getBody());
+ if (userSelection != null) {
+ execNonFunc = userSelection;
}
+ else {
+ AddressSetView executable = prog.getMemory().getExecuteSet();
+ AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet();
+ execNonFunc = executable.intersect(initialized);
+ monitor.initialize(prog.getFunctionManager().getFunctionCount());
+ FunctionIterator fIter = prog.getFunctionManager().getFunctions(true);
+ while (fIter.hasNext()) {
+ monitor.checkCancelled();
+ monitor.incrementProgress(1);
+ Function func = fIter.next();
+ execNonFunc = execNonFunc.subtract(func.getBody());
+ }
+ }
+
//remove small undefined ranges to avoid (for example) searching for
//function starts in an address range of length 3 between two known
//functions. "small" is controlled by a plugin option.
@@ -77,7 +99,7 @@ public class GetAddressesToClassifyTask extends Task {
* Returns the set of addresses to classify
* @return addresses
*/
- public AddressSet getAddressesToClassify() {
+ public AddressSetView getAddressesToClassify() {
return execNonFunc;
}
@@ -87,7 +109,7 @@ public class GetAddressesToClassifyTask extends Task {
* @param modulus alignment modulus
* @return aligned addresses
*/
- public AddressSet getAddressesToClassify(long modulus) {
+ public AddressSetView getAddressesToClassify(long modulus) {
AddressSet aligned = new AddressSet();
for (Address a : execNonFunc.getAddresses(true)) {
if (a.getOffset() % modulus == 0) {
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java
index 485e67b0c0..47d3bf6ffb 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java
@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -18,6 +18,7 @@ package ghidra.machinelearning.functionfinding;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
+import ghidra.pcodeCPort.utils.MutableLong;
import ghidra.program.model.address.*;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
@@ -43,25 +44,32 @@ public class RandomSubsetUtils {
* @return random subset of size k
* @throws CancelledException if monitor is canceled
*/
- public static AddressSet randomSubset(AddressSetView addresses, long k, TaskMonitor monitor)
- throws CancelledException {
- List sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k);
- Collections.sort(sortedRandom);
+ public static AddressSet randomSubset(AddressSetView addresses, long k,
+ TaskMonitor monitor) throws CancelledException {
+ long[] sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k);
+ Arrays.sort(sortedRandom);
AddressSet randomAddresses = new AddressSet();
- AddressIterator iter = addresses.getAddresses(true);
- int addressesAdded = 0;
- int addressesVisited = 0;
+
+ long addressesVisited = 0;
int listIndex = 0;
- while (iter.hasNext() && addressesAdded < k) {
- monitor.checkCancelled();
- Address addr = iter.next();
- if (sortedRandom.get(listIndex) == addressesVisited) {
+ for (AddressRange range : addresses) {
+ long rangeEnd = addressesVisited + range.getLength();
+ for (; listIndex < k; listIndex++) {
+ monitor.checkCancelled();
+ long next = sortedRandom[listIndex];
+ if (next >= rangeEnd) {
+ // Next address is outside of this range
+ break;
+ }
+ Address addr = range.getMinAddress().add(next - addressesVisited);
randomAddresses.add(addr);
- addressesAdded += 1;
- listIndex += 1;
}
- addressesVisited += 1;
+ if (listIndex == k) {
+ break;
+ }
+ addressesVisited += range.getLength();
}
+
return randomAddresses;
}
@@ -72,24 +80,34 @@ public class RandomSubsetUtils {
* @param k size of random subset (must be >= 0)
* @return list of indices of elements in random subset
*/
- public static List generateRandomIntegerSubset(long n, long k) {
+ public static long[] generateRandomIntegerSubset(long n, long k) {
if (n < 0) {
throw new IllegalArgumentException("n cannot be negative");
}
if (k < 0) {
throw new IllegalArgumentException("k cannot be negative");
}
+ if (k > Integer.MAX_VALUE) {
+ // Could probably just switch k to an int. Since we were using ArrayList before
+ // that was already going to blow up if k > Integer.MAX_VALUE
+ throw new IllegalArgumentException("k cannot exceed bounds of integer");
+ }
if (n < k) {
throw new IllegalArgumentException(
"size of subset (" + k + ") cannot be larger than size of set (" + n + ")");
}
- Map permutation = new HashMap<>();
- for (long i = 0; i < k; ++i) {
+
+ Map permutation = new HashMap<>();
+
+ for (long i = 0; i < k; i++) {
swap(permutation, i, ThreadLocalRandom.current().nextLong(i, n));
}
- List random = new ArrayList<>();
- for (long i = 0; i < k; i++) {
- random.add(permutation.getOrDefault(i, i));
+
+ long[] random = new long[(int) k];
+
+ for (int i = 0; i < k; i++) {
+ random[i] =
+ permutation.computeIfAbsent(Long.valueOf(i), key -> new MutableLong(key)).get();
}
return random;
}
@@ -102,14 +120,15 @@ public class RandomSubsetUtils {
* @param i index
* @param j index
*/
- public static void swap(Map permutation, long i, long j) {
+ public static void swap(Map permutation, long i, long j) {
if (i == j) {
return;
}
- long ith = permutation.getOrDefault(i, i);
- long jth = permutation.getOrDefault(j, j);
- permutation.put(i, jth);
- permutation.put(j, ith);
+ MutableLong ith = permutation.computeIfAbsent(i, key -> new MutableLong(key));
+ MutableLong jth = permutation.computeIfAbsent(j, key -> new MutableLong(key));
+ long temp = ith.get();
+ ith.set(jth.get());
+ jth.set(temp);
}
}
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java
index 12b6f21219..85a740d360 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java
@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -20,6 +20,8 @@ import java.util.List;
import docking.widgets.table.AbstractDynamicTableColumn;
import docking.widgets.table.TableColumnDescriptor;
+import ghidra.app.util.PseudoDisassembler;
+import ghidra.app.util.PseudoInstruction;
import ghidra.docking.settings.Settings;
import ghidra.framework.plugintool.PluginTool;
import ghidra.framework.plugintool.ServiceProvider;
@@ -75,6 +77,7 @@ public class SimilarStartsTableModel extends AddressBasedTableModel {
+ @Override
+ public String getColumnName() {
+ return "Disassembly";
+ }
+
+ @Override
+ public String getValue(SimilarStartRowObject rowObject, Settings settings, Object data,
+ ServiceProvider services) throws IllegalArgumentException {
+ PseudoDisassembler disasm = new PseudoDisassembler(program);
+
+ StringBuilder sb1 = new StringBuilder();
+ try {
+ Address addr = rowObject.funcStart().subtract(randomForestRow.getNumPreBytes());
+
+ while (addr.compareTo(rowObject.funcStart()) < 0) {
+ PseudoInstruction instr = disasm.disassemble(addr);
+ if (instr.isValid()) {
+ sb1.append(instr.toString());
+ sb1.append(" ");
+ }
+ else {
+ sb1.append("? ");
+ }
+ addr = instr.getMaxAddress().add(1);
+ }
+ }
+ catch (Exception e) {
+ sb1 = new StringBuilder("?? ");
+ }
+
+ StringBuilder sb2 = new StringBuilder();
+ try {
+ Address addr = rowObject.funcStart();
+ while (addr.compareTo(
+ rowObject.funcStart().add(randomForestRow.getNumInitialBytes())) < 0) {
+ PseudoInstruction instr = disasm.disassemble(addr);
+ if (instr.isValid()) {
+ sb2.append(instr.toString());
+ sb2.append(" ");
+ }
+ else {
+ sb2.append("? ");
+ }
+ addr = instr.getMaxAddress().add(1);
+ }
+ }
+ catch (Exception e) {
+ sb2 = new StringBuilder("??");
+ }
+
+ return sb1.toString() + "* " + sb2.toString();
+ }
+ }
+
}
diff --git a/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java b/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java
index db4ea497c1..9ebd96c679 100644
--- a/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java
+++ b/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java
@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -22,8 +22,8 @@ import java.util.*;
import org.junit.Test;
import generic.test.AbstractGenericTest;
-import ghidra.program.model.address.AddressSet;
-import ghidra.program.model.address.TestAddress;
+import ghidra.pcodeCPort.utils.MutableLong;
+import ghidra.program.model.address.*;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
@@ -31,64 +31,88 @@ public class RandomSubsetTest extends AbstractGenericTest {
@Test
public void testGenerateTrivialSubsets() {
- List empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0);
- assertEquals(0, empty.size());
+ long[] empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0);
+ assertEquals(0, empty.length);
empty = RandomSubsetUtils.generateRandomIntegerSubset(0, 0);
- assertEquals(0, empty.size());
- List complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000);
- Collections.sort(complete);
- Iterator iter = complete.iterator();
- long current = 0;
- while (iter.hasNext()) {
- long elem = iter.next();
- assertEquals(current++, elem);
+ assertEquals(0, empty.length);
+ long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000);
+ Arrays.sort(complete);
+ for (int current = 0; current < complete.length; current++) {
+ long elem = complete[current];
+ assertEquals(current, elem);
}
}
@Test
public void testBasicRandomSubsetOfAddresses() throws CancelledException {
+ // Check we are drawing unique addresses from the set
AddressSet addrs = new AddressSet();
for (long i = 0; i < 10000; ++i) {
addrs.add(new TestAddress(i));
}
AddressSet rand = RandomSubsetUtils.randomSubset(addrs, 9998, TaskMonitor.DUMMY);
assertEquals(9998, rand.getNumAddresses());
+
+ addrs.clear();
+
+ // Check we correctly draw from multiple non-contiguous ranges
+ for (long i = 0; i < 10000; i += 1000) {
+ if (i % 2000 != 0)
+ continue;
+ for (long j = i; j < i + 1000; j++) {
+ addrs.add(new TestAddress(j));
+ }
+ }
+ rand = RandomSubsetUtils.randomSubset(addrs, 4998, TaskMonitor.DUMMY);
+ assertEquals(4998, rand.getNumAddresses());
+
+ for (Address addr : rand.getAddresses(true)) {
+ assertTrue(addrs.contains(addr));
+ }
}
@Test
public void testSwap() {
- Map permuted = new HashMap<>();
+ Map permuted = new HashMap<>();
assertTrue(permuted.isEmpty());
//should do nothing
RandomSubsetUtils.swap(permuted, 1, 1);
assertTrue(permuted.isEmpty());
- permuted.put(0l, 5l);
- permuted.put(1l, 10l);
+ permuted.put(0l, new MutableLong(5l));
+ permuted.put(1l, new MutableLong(10l));
RandomSubsetUtils.swap(permuted, 0, 1);
assertEquals(2, permuted.size());
- assertEquals(Long.valueOf(5), permuted.get(1l));
- assertEquals(Long.valueOf(10), permuted.get(0l));
+ assertEquals(5l, permuted.get(1l).get());
+ assertEquals(10l, permuted.get(0l).get());
RandomSubsetUtils.swap(permuted, 100l, 200L);
assertEquals(4, permuted.size());
- assertEquals(Long.valueOf(100), permuted.get(200l));
- assertEquals(Long.valueOf(200), permuted.get(100l));
+ assertEquals(100l, permuted.get(200l).get());
+ assertEquals(200l, permuted.get(100l).get());
}
- /**
- @Test
- public void timingTest() throws CancelledException {
- AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(999999));
-
- long start = System.nanoTime();
- List complete = RandomSubset.generateRandomIntegerSubset(1000000, 500000);
- long end = System.nanoTime();
- Msg.info(this, "choosing random subset of integers: " +
- (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
- start = System.nanoTime();
- AddressSet random = RandomSubset.randomSubset(big, 500000, TaskMonitor.DUMMY);
- end = System.nanoTime();
- Msg.info(this, "choosing random subset of addresses: " +
- (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
- }*/
+// @Test
+// public void timingTest() throws CancelledException, InterruptedException {
+// AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(9999999));
+//
+// Thread.sleep(10000);
+// long start;
+// long end;
+//
+// for (int i = 0; i < 10; i++) {
+// start = System.nanoTime();
+// long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(10000000, 5000000);
+// end = System.nanoTime();
+// Msg.info(this, "choosing random subset of integers: " +
+// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
+// }
+//
+// for (int i = 0; i < 10; i++) {
+// start = System.nanoTime();
+// AddressSet random1 = RandomSubsetUtils.randomSubset(big, 5000000, TaskMonitor.DUMMY);
+// end = System.nanoTime();
+// Msg.info(this, "choosing random subset of addresses: " +
+// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
+// }
+// }
}
From 168cbc7e7abc169cec90872d1d8be5f657545806 Mon Sep 17 00:00:00 2001
From: James <49045138+ghidracadabra@users.noreply.github.com>
Date: Tue, 29 Jul 2025 17:47:44 +0000
Subject: [PATCH 2/2] GP-4400 minor tweaks
---
.../FunctionStartRFParamsDialog.java | 5 ++-
.../RandomForestFunctionFinderPlugin.java | 24 ++++++------
.../SimilarStartsTableModel.java | 38 ++++++-------------
3 files changed, 27 insertions(+), 40 deletions(-)
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java
index e8b89eb6d9..801ad2d73b 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java
@@ -132,10 +132,11 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
private static final String APPLY_MODEL_SELECTION_ACTION_NAME = "ApplyModelToSelection";
private static final String APPLY_MODEL_SELECTION_MENU_TEXT = "Apply Model To Selection";
private static final String DEBUG_MODEL_ACTION_NAME = "DebugModel";
- private static final String DEBUG_MODEL_MENU_TEXT = "DEBUG - Show test set errors";
+ private static final String DEBUG_MODEL_MENU_TEXT = "DEBUG - Show Test Set Errors";
private static final String ACTION_GROUP_APPLY_LOCAL = "A0_ApplyLocal";
private static final String ACTION_GROUP_APPLY_OTHER = "A1_ApplyOther";
+ private static final String ACTION_GROUP_DEBUG = "A2_Debug";
private JTextField initialBytesField;
private JTextField preBytesField;
@@ -335,7 +336,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(DEBUG_MODEL_MENU_TEXT)
- .popupMenuGroup(ACTION_GROUP_APPLY_OTHER)
+ .popupMenuGroup(ACTION_GROUP_DEBUG)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
showTestErrors(tableModel.getLastSelectedObjects().get(0));
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java
index 143c8294e4..868e1f7cf7 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java
@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -205,16 +205,16 @@ public class RandomForestFunctionFinderPlugin extends ProgramPlugin
private void createActions() {
new ActionBuilder(ACTION_NAME, getName())
- .menuPath(ToolConstants.MENU_SEARCH, MENU_PATH_ENTRY)
- .menuGroup("search for", null)
- .description("Train models to search for function starts")
- .helpLocation(new HelpLocation(getName(), getName()))
- .withContext(NavigatableActionContext.class, true)
- .validContextWhen(c -> !(c instanceof RestrictedAddressSetContext))
- .onAction(c -> {
- displayDialog(c);
- })
- .buildAndInstall(tool);
+ .menuPath(ToolConstants.MENU_SEARCH, MENU_PATH_ENTRY)
+ .menuGroup("search for", null)
+ .description("Train models to search for function starts")
+ .helpLocation(new HelpLocation(getName(), getName()))
+ .withContext(NavigatableActionContext.class, true)
+ .validWhen(c -> !(c instanceof RestrictedAddressSetContext))
+ .onAction(c -> {
+ displayDialog(c);
+ })
+ .buildAndInstall(tool);
}
private void displayDialog(NavigatableActionContext c) {
diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java
index 85a740d360..dff0a47aaa 100644
--- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java
+++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java
@@ -167,57 +167,43 @@ public class SimilarStartsTableModel extends AddressBasedTableModel {
+
@Override
public String getColumnName() {
return "Disassembly";
}
+ @Override
+ public String getColumnDescription() {
+ return "Disassembly (ignoring pre-bytes)";
+ }
+
@Override
public String getValue(SimilarStartRowObject rowObject, Settings settings, Object data,
ServiceProvider services) throws IllegalArgumentException {
PseudoDisassembler disasm = new PseudoDisassembler(program);
- StringBuilder sb1 = new StringBuilder();
- try {
- Address addr = rowObject.funcStart().subtract(randomForestRow.getNumPreBytes());
-
- while (addr.compareTo(rowObject.funcStart()) < 0) {
- PseudoInstruction instr = disasm.disassemble(addr);
- if (instr.isValid()) {
- sb1.append(instr.toString());
- sb1.append(" ");
- }
- else {
- sb1.append("? ");
- }
- addr = instr.getMaxAddress().add(1);
- }
- }
- catch (Exception e) {
- sb1 = new StringBuilder("?? ");
- }
-
- StringBuilder sb2 = new StringBuilder();
+ StringBuilder sb = new StringBuilder();
try {
Address addr = rowObject.funcStart();
while (addr.compareTo(
rowObject.funcStart().add(randomForestRow.getNumInitialBytes())) < 0) {
PseudoInstruction instr = disasm.disassemble(addr);
if (instr.isValid()) {
- sb2.append(instr.toString());
- sb2.append(" ");
+ sb.append(instr.toString());
+ sb.append(" ");
}
else {
- sb2.append("? ");
+ sb.append("? ");
}
addr = instr.getMaxAddress().add(1);
}
}
catch (Exception e) {
- sb2 = new StringBuilder("??");
+ sb = new StringBuilder("??");
}
- return sb1.toString() + "* " + sb2.toString();
+ return sb.toString();
}
}