GP-4400: ML extension improvements

This commit is contained in:
ghintern 2025-06-24 16:14:40 +00:00 committed by James
parent 47bd5a50cb
commit efb837ef34
9 changed files with 316 additions and 109 deletions

View file

@ -24,8 +24,7 @@ import ghidra.app.cmd.disassemble.DisassembleCommand;
import ghidra.app.cmd.function.CreateFunctionCmd;
import ghidra.app.script.GhidraScript;
import ghidra.machinelearning.functionfinding.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.address.*;
import ghidra.program.model.block.BasicBlockModel;
//NOTE: This script is referenced by name in the help for the
@ -131,7 +130,7 @@ public class FindFunctionsRFExampleScript extends GhidraScript {
new GetAddressesToClassifyTask(currentProgram, minUndefinedRange);
getAddressTask.run(monitor);
AddressSet toClassify = getAddressTask.getAddressesToClassify();
AddressSetView toClassify = getAddressTask.getAddressesToClassify();
Map<Address, Double> potentialStarts = classifier.classify(toClassify, monitor);

View file

@ -111,6 +111,11 @@
of Starts</A> field doesn't cause all starts to be used for training (leaving
none for testing).</P>
<H4><A name="MinimumUndefRangeSize"></A> Minimum Undefined Range Size </H4>
<P> This value is the minimum size of an undefined address range that will be considered when
applying the model to a program. Defaults to the value stored in the plugin options, see
<A href="#MinLengthUndefinedRange">Minimum Length of Undefined Ranges to Search</A>. </P>
<H4><A name="RestrictSearchToAlignedAddresses"></A> Restrict Search to Aligned Addresses </H4>
<P> If this is checked, only addresses which are zero modulo the value in the
<A href="#AlignmentModulus">Alignment Modulus</A> combo box are searched for function starts.
@ -139,12 +144,16 @@
options). The results are displayed in a
<A href="#FunctionStartTable"> Function Start Table</A>. </P>
<H4><A name="ApplyModelTo"></A> Apply Model To... Action </H4>
<H4><A name="ApplyModelToOtherProgram"></A> Apply Model To Other Program... Action </H4>
<P> This action will open a dialog to select another program in the current project and
then apply the model to it. Note that the only check that the model is compatible with
the selected program is that any context registers specified when training must be
present in the selected program. </P>
<H4><A name="ApplyModelToSelection"></A> Apply Model To Selection Action </H4>
<P> This action will apply the model to the current selection in the program used to train it.
</P>
<H4><A name="DebugModel"></A> Debug Model Action </H4>
<P> This action will display a <A href="#DebugModelTable"> Debug Model Table</A>, which shows
all of the errors encountered when applying the model to its test set. </P>

View file

@ -36,7 +36,7 @@ import ghidra.app.services.ProgramManager;
import ghidra.framework.main.ProgramFileChooser;
import ghidra.framework.model.DomainFile;
import ghidra.framework.preferences.Preferences;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.*;
import ghidra.util.HelpLocation;
import ghidra.util.Msg;
@ -97,6 +97,10 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
private static final String ALIGNMENT_MODULUS_TIP =
"Use to define the alignment for restricted search";
private static final String MIN_UNDEFINED_RANGE_SIZE_TEXT = "Minimum Undefined Range Size";
private static final String MIN_UNDEFINED_RANGE_SIZE_TIP =
"Minimum size of an undefined range of addresses to search over for function starts";
private static final String DEFAULT_INITIAL_BYTES = "8,16";
private static final String INITIAL_BYTES_PROPERTY = "functionStartRFParams_initialBytes";
private static final String DEFAULT_PRE_BYTES = "2,8";
@ -123,16 +127,22 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
private static final String APPLY_MODEL_ACTION_NAME = "ApplyModel";
private static final String APPLY_MODEL_MENU_TEXT = "Apply Model";
private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelTo";
private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To...";
private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelToOtherProgram";
private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To Other Program...";
private static final String APPLY_MODEL_SELECTION_ACTION_NAME = "ApplyModelToSelection";
private static final String APPLY_MODEL_SELECTION_MENU_TEXT = "Apply Model To Selection";
private static final String DEBUG_MODEL_ACTION_NAME = "DebugModel";
private static final String DEBUG_MODEL_MENU_TEXT = "DEBUG - Show test set errors";
private static final String ACTION_GROUP_APPLY_LOCAL = "A0_ApplyLocal";
private static final String ACTION_GROUP_APPLY_OTHER = "A1_ApplyOther";
private JTextField initialBytesField;
private JTextField preBytesField;
private JTextField factorField;
private IntegerTextField minimumSizeField;
private IntegerTextField maxStartsField;
private IntegerTextField minUndefRangeField;
private JTextField contextRegistersField;
private JLabel numFuncsField;
private JScrollPane tableScrollPane;
@ -284,19 +294,35 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(APPLY_MODEL_MENU_TEXT)
.popupMenuGroup(ACTION_GROUP_APPLY_LOCAL)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
searchTrainingProgram(tableModel.getLastSelectedObjects().get(0));
searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), false);
})
.build();
addAction(applyAction);
DockingAction applySelectionAction =
new ActionBuilder(APPLY_MODEL_SELECTION_ACTION_NAME, plugin.getName())
.description("Apply Model to Current Program Selection")
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(APPLY_MODEL_SELECTION_MENU_TEXT)
.popupMenuGroup(ACTION_GROUP_APPLY_LOCAL)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), true);
})
.build();
addAction(applySelectionAction);
DockingAction applyToAction =
new ActionBuilder(APPLY_MODEL_TO_ACTION_NAME, plugin.getName())
.description("Choose Program and Apply Model to it")
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(APPLY_MODEL_TO_MENU_TEXT)
.popupMenuGroup(ACTION_GROUP_APPLY_OTHER)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
searchOtherProgram(tableModel.getLastSelectedObjects().get(0));
@ -309,6 +335,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
.popupWhen(c -> trainingSource != null)
.enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1)
.popupMenuPath(DEBUG_MODEL_MENU_TEXT)
.popupMenuGroup(ACTION_GROUP_APPLY_OTHER)
.inWindow(ActionBuilder.When.ALWAYS)
.onAction(c -> {
showTestErrors(tableModel.getLastSelectedObjects().get(0));
@ -406,6 +433,14 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
updateNumFuncsField();
funcDataPanel.add(numFuncsField);
JLabel minUndefRangeLabel = new GDLabel(MIN_UNDEFINED_RANGE_SIZE_TEXT);
minUndefRangeLabel.setToolTipText(MIN_UNDEFINED_RANGE_SIZE_TIP);
funcDataPanel.add(minUndefRangeLabel);
minUndefRangeField = new IntegerTextField();
minUndefRangeField.setAllowNegativeValues(false);
minUndefRangeField.setValue(plugin.getMinUndefinedRangeSize());
funcDataPanel.add(minUndefRangeField.getComponent());
JLabel restrictLabel = new GDLabel(RESTRICT_SEARCH_TEXT);
restrictLabel.setToolTipText(RESTRICT_SEARCH_TIP);
funcDataPanel.add(restrictLabel);
@ -478,8 +513,8 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
numFuncsField.setText(Integer.toString(numFuncs));
}
private void searchTrainingProgram(RandomForestRowObject modelRow) {
searchProgram(trainingSource, modelRow);
private void searchTrainingProgram(RandomForestRowObject modelRow, boolean useSelection) {
searchProgram(trainingSource, modelRow, useSelection);
}
private void searchOtherProgram(RandomForestRowObject modelRow) {
@ -499,7 +534,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
" is not compatible with training source program " + trainingSource.getName());
return;
}
searchProgram(p, modelRow);
searchProgram(p, modelRow, false);
}
private void showTestErrors(RandomForestRowObject modelRow) {
@ -508,21 +543,32 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider
addGeneralActions(provider, trainingSource);
}
private void searchProgram(Program targetProgram, RandomForestRowObject modelRow) {
GetAddressesToClassifyTask getTask =
new GetAddressesToClassifyTask(targetProgram, plugin.getMinUndefinedRangeSize());
private void searchProgram(Program targetProgram, RandomForestRowObject modelRow,
boolean useSelection) {
GetAddressesToClassifyTask getTask = null;
if (useSelection) {
getTask =
new GetAddressesToClassifyTask(targetProgram, 1, plugin.getProgramSelection());
}
else {
getTask =
new GetAddressesToClassifyTask(targetProgram, minUndefRangeField.getLongValue());
}
//don't want to use the dialog's progress bar
TaskLauncher.launchModal("Gathering Addresses To Classify", getTask);
if (getTask.isCancelled()) {
return;
}
AddressSet execNonFunc = null;
AddressSetView execNonFunc = null;
if (restrictBox.isSelected()) {
execNonFunc = getTask.getAddressesToClassify((long) modBox.getSelectedItem());
}
else {
execNonFunc = getTask.getAddressesToClassify();
}
FunctionStartTableProvider provider =
new FunctionStartTableProvider(plugin, targetProgram, execNonFunc, modelRow, false);
addGeneralActions(provider, targetProgram);

View file

@ -24,8 +24,9 @@ import ghidra.docking.settings.Settings;
import ghidra.framework.plugintool.PluginTool;
import ghidra.framework.plugintool.ServiceProvider;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.block.BasicBlockModel;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.listing.Program;
import ghidra.util.datastruct.Accumulator;
import ghidra.util.exception.CancelledException;
@ -38,7 +39,7 @@ import ghidra.util.task.TaskMonitor;
*/
public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStartRowObject> {
private RandomForestRowObject modelRow;
private AddressSet addressesToClassify;
private AddressSetView addressesToClassify;
private boolean debug;
private BasicBlockModel blockModel;
private Map<Address, Double> addressToProbability;
@ -54,7 +55,7 @@ public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStar
* @param modelRow trained model info
* @param debug is table displaying debug data
*/
public FunctionStartTableModel(PluginTool plugin, Program program, AddressSet toClassify,
public FunctionStartTableModel(PluginTool plugin, Program program, AddressSetView toClassify,
RandomForestRowObject modelRow, boolean debug) {
super(program.getName(), plugin, program, null, false);
this.modelRow = modelRow;
@ -100,6 +101,7 @@ public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStar
descriptor.addVisibleColumn(new AddressTableColumn());
descriptor.addVisibleColumn(new ProbabilityTableColumn(), 0, false);
descriptor.addVisibleColumn(new InterpretationTableColumn());
descriptor.addVisibleColumn(new FallthroughTableColumn());
descriptor.addVisibleColumn(new DataReferencesTableColumn());
descriptor.addVisibleColumn(new UnconditionalFlowReferencesTableColumn());
descriptor.addVisibleColumn(new ConditionalFlowReferencesTableColumn());
@ -173,6 +175,32 @@ public class FunctionStartTableModel extends AddressBasedTableModel<FunctionStar
}
}
private class FallthroughTableColumn
extends AbstractDynamicTableColumn<FunctionStartRowObject, Boolean, Object> {
@Override
public String getColumnName() {
return "Is Fallthrough Target";
}
@Override
public Boolean getValue(FunctionStartRowObject rowObject, Settings settings, Object data,
ServiceProvider services) throws IllegalArgumentException {
Instruction preInstr =
program.getListing().getInstructionBefore(rowObject.getAddress());
if (preInstr == null) {
return false;
}
if (!preInstr.hasFallthrough()) {
return false;
}
return preInstr.getFallThrough().equals(rowObject.getAddress());
}
}
private class DataReferencesTableColumn
extends AbstractDynamicTableColumn<FunctionStartRowObject, Integer, Object> {

View file

@ -25,7 +25,7 @@ import javax.swing.*;
import ghidra.framework.model.DomainObjectChangedEvent;
import ghidra.framework.model.DomainObjectListener;
import ghidra.framework.plugintool.ComponentProviderAdapter;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.Program;
import ghidra.util.HelpLocation;
import ghidra.util.table.*;
@ -42,7 +42,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid
private RandomForestFunctionFinderPlugin plugin;
private Program program;
private RandomForestRowObject modelRow;
private AddressSet toClassify;
private AddressSetView toClassify;
private boolean debug;
private String subTitle;
private GhidraTable startTable;
@ -59,7 +59,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid
* @param debug whether to display debug version of table
*/
public FunctionStartTableProvider(RandomForestFunctionFinderPlugin plugin, Program program,
AddressSet toClassify, RandomForestRowObject modelRow, boolean debug) {
AddressSetView toClassify, RandomForestRowObject modelRow, boolean debug) {
super(
debug ? "Debug: Test Set Errors in " + program.getDomainFile().getPathname()
: "Potential Functions in " + program.getDomainFile().getPathname(),

View file

@ -17,6 +17,7 @@ package ghidra.machinelearning.functionfinding;
import ghidra.program.model.address.*;
import ghidra.program.model.listing.*;
import ghidra.program.util.ProgramSelection;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.Task;
import ghidra.util.task.TaskMonitor;
@ -27,8 +28,25 @@ import ghidra.util.task.TaskMonitor;
public class GetAddressesToClassifyTask extends Task {
private Program prog;
private AddressSet execNonFunc;
private AddressSetView execNonFunc;
private long minUndefinedRangeSize;
private ProgramSelection userSelection;
/**
* Creates a {@link Task} that creates a set of addresses to check for function starts. The
* {code minUndefinedRangeSize} parameter determines how large a run of undefined bytes must be
* to be checked for function starts
* @param prog source program
* @param minUndefinedRangeSize minimum size of undefined range
* @param selection program selection used to filter addresses
*/
public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize,
ProgramSelection selection) {
super("Gathering Addresses to Classify", true, true, false, false);
this.prog = prog;
this.minUndefinedRangeSize = minUndefinedRangeSize;
this.userSelection = selection;
}
/**
* Creates a {@link Task} that creates a set of addresses to check for function starts. The
@ -38,25 +56,29 @@ public class GetAddressesToClassifyTask extends Task {
* @param minUndefinedRangeSize minimum size of undefined range
*/
public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize) {
super("Gathering Addresses to Classify", true, true, false, false);
this.prog = prog;
this.minUndefinedRangeSize = minUndefinedRangeSize;
this(prog, minUndefinedRangeSize, null);
}
@Override
public void run(TaskMonitor monitor) throws CancelledException {
execNonFunc = new AddressSet();
AddressSetView executable = prog.getMemory().getExecuteSet();
AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet();
execNonFunc = executable.intersect(initialized);
monitor.initialize(prog.getFunctionManager().getFunctionCount());
FunctionIterator fIter = prog.getFunctionManager().getFunctions(true);
while (fIter.hasNext()) {
monitor.checkCancelled();
monitor.incrementProgress(1);
Function func = fIter.next();
execNonFunc = execNonFunc.subtract(func.getBody());
if (userSelection != null) {
execNonFunc = userSelection;
}
else {
AddressSetView executable = prog.getMemory().getExecuteSet();
AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet();
execNonFunc = executable.intersect(initialized);
monitor.initialize(prog.getFunctionManager().getFunctionCount());
FunctionIterator fIter = prog.getFunctionManager().getFunctions(true);
while (fIter.hasNext()) {
monitor.checkCancelled();
monitor.incrementProgress(1);
Function func = fIter.next();
execNonFunc = execNonFunc.subtract(func.getBody());
}
}
//remove small undefined ranges to avoid (for example) searching for
//function starts in an address range of length 3 between two known
//functions. "small" is controlled by a plugin option.
@ -77,7 +99,7 @@ public class GetAddressesToClassifyTask extends Task {
* Returns the set of addresses to classify
* @return addresses
*/
public AddressSet getAddressesToClassify() {
public AddressSetView getAddressesToClassify() {
return execNonFunc;
}
@ -87,7 +109,7 @@ public class GetAddressesToClassifyTask extends Task {
* @param modulus alignment modulus
* @return aligned addresses
*/
public AddressSet getAddressesToClassify(long modulus) {
public AddressSetView getAddressesToClassify(long modulus) {
AddressSet aligned = new AddressSet();
for (Address a : execNonFunc.getAddresses(true)) {
if (a.getOffset() % modulus == 0) {

View file

@ -18,6 +18,7 @@ package ghidra.machinelearning.functionfinding;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
import ghidra.pcodeCPort.utils.MutableLong;
import ghidra.program.model.address.*;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
@ -43,25 +44,32 @@ public class RandomSubsetUtils {
* @return random subset of size k
* @throws CancelledException if monitor is canceled
*/
public static AddressSet randomSubset(AddressSetView addresses, long k, TaskMonitor monitor)
throws CancelledException {
List<Long> sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k);
Collections.sort(sortedRandom);
public static AddressSet randomSubset(AddressSetView addresses, long k,
TaskMonitor monitor) throws CancelledException {
long[] sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k);
Arrays.sort(sortedRandom);
AddressSet randomAddresses = new AddressSet();
AddressIterator iter = addresses.getAddresses(true);
int addressesAdded = 0;
int addressesVisited = 0;
long addressesVisited = 0;
int listIndex = 0;
while (iter.hasNext() && addressesAdded < k) {
monitor.checkCancelled();
Address addr = iter.next();
if (sortedRandom.get(listIndex) == addressesVisited) {
for (AddressRange range : addresses) {
long rangeEnd = addressesVisited + range.getLength();
for (; listIndex < k; listIndex++) {
monitor.checkCancelled();
long next = sortedRandom[listIndex];
if (next >= rangeEnd) {
// Next address is outside of this range
break;
}
Address addr = range.getMinAddress().add(next - addressesVisited);
randomAddresses.add(addr);
addressesAdded += 1;
listIndex += 1;
}
addressesVisited += 1;
if (listIndex == k) {
break;
}
addressesVisited += range.getLength();
}
return randomAddresses;
}
@ -72,24 +80,34 @@ public class RandomSubsetUtils {
* @param k size of random subset (must be >= 0)
* @return list of indices of elements in random subset
*/
public static List<Long> generateRandomIntegerSubset(long n, long k) {
public static long[] generateRandomIntegerSubset(long n, long k) {
if (n < 0) {
throw new IllegalArgumentException("n cannot be negative");
}
if (k < 0) {
throw new IllegalArgumentException("k cannot be negative");
}
if (k > Integer.MAX_VALUE) {
// Could probably just switch k to an int. Since we were using ArrayList before
// that was already going to blow up if k > Integer.MAX_VALUE
throw new IllegalArgumentException("k cannot exceed bounds of integer");
}
if (n < k) {
throw new IllegalArgumentException(
"size of subset (" + k + ") cannot be larger than size of set (" + n + ")");
}
Map<Long, Long> permutation = new HashMap<>();
for (long i = 0; i < k; ++i) {
Map<Long, MutableLong> permutation = new HashMap<>();
for (long i = 0; i < k; i++) {
swap(permutation, i, ThreadLocalRandom.current().nextLong(i, n));
}
List<Long> random = new ArrayList<>();
for (long i = 0; i < k; i++) {
random.add(permutation.getOrDefault(i, i));
long[] random = new long[(int) k];
for (int i = 0; i < k; i++) {
random[i] =
permutation.computeIfAbsent(Long.valueOf(i), key -> new MutableLong(key)).get();
}
return random;
}
@ -102,14 +120,15 @@ public class RandomSubsetUtils {
* @param i index
* @param j index
*/
public static void swap(Map<Long, Long> permutation, long i, long j) {
public static void swap(Map<Long, MutableLong> permutation, long i, long j) {
if (i == j) {
return;
}
long ith = permutation.getOrDefault(i, i);
long jth = permutation.getOrDefault(j, j);
permutation.put(i, jth);
permutation.put(j, ith);
MutableLong ith = permutation.computeIfAbsent(i, key -> new MutableLong(key));
MutableLong jth = permutation.computeIfAbsent(j, key -> new MutableLong(key));
long temp = ith.get();
ith.set(jth.get());
jth.set(temp);
}
}

View file

@ -20,6 +20,8 @@ import java.util.List;
import docking.widgets.table.AbstractDynamicTableColumn;
import docking.widgets.table.TableColumnDescriptor;
import ghidra.app.util.PseudoDisassembler;
import ghidra.app.util.PseudoInstruction;
import ghidra.docking.settings.Settings;
import ghidra.framework.plugintool.PluginTool;
import ghidra.framework.plugintool.ServiceProvider;
@ -75,6 +77,7 @@ public class SimilarStartsTableModel extends AddressBasedTableModel<SimilarStart
descriptor.addVisibleColumn(new AddressTableColumn());
descriptor.addVisibleColumn(new SimilarityTableColumn(), 1, false);
descriptor.addVisibleColumn(new ByteStringTableColumn());
descriptor.addVisibleColumn(new DisassemblyTableColumn());
return descriptor;
}
@ -161,4 +164,61 @@ public class SimilarStartsTableModel extends AddressBasedTableModel<SimilarStart
return sb.toString();
}
}
private class DisassemblyTableColumn
extends AbstractDynamicTableColumn<SimilarStartRowObject, String, Object> {
@Override
public String getColumnName() {
return "Disassembly";
}
@Override
public String getValue(SimilarStartRowObject rowObject, Settings settings, Object data,
ServiceProvider services) throws IllegalArgumentException {
PseudoDisassembler disasm = new PseudoDisassembler(program);
StringBuilder sb1 = new StringBuilder();
try {
Address addr = rowObject.funcStart().subtract(randomForestRow.getNumPreBytes());
while (addr.compareTo(rowObject.funcStart()) < 0) {
PseudoInstruction instr = disasm.disassemble(addr);
if (instr.isValid()) {
sb1.append(instr.toString());
sb1.append(" ");
}
else {
sb1.append("? ");
}
addr = instr.getMaxAddress().add(1);
}
}
catch (Exception e) {
sb1 = new StringBuilder("?? ");
}
StringBuilder sb2 = new StringBuilder();
try {
Address addr = rowObject.funcStart();
while (addr.compareTo(
rowObject.funcStart().add(randomForestRow.getNumInitialBytes())) < 0) {
PseudoInstruction instr = disasm.disassemble(addr);
if (instr.isValid()) {
sb2.append(instr.toString());
sb2.append(" ");
}
else {
sb2.append("? ");
}
addr = instr.getMaxAddress().add(1);
}
}
catch (Exception e) {
sb2 = new StringBuilder("??");
}
return sb1.toString() + "* " + sb2.toString();
}
}
}

View file

@ -22,8 +22,8 @@ import java.util.*;
import org.junit.Test;
import generic.test.AbstractGenericTest;
import ghidra.program.model.address.AddressSet;
import ghidra.program.model.address.TestAddress;
import ghidra.pcodeCPort.utils.MutableLong;
import ghidra.program.model.address.*;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
@ -31,64 +31,88 @@ public class RandomSubsetTest extends AbstractGenericTest {
@Test
public void testGenerateTrivialSubsets() {
List<Long> empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0);
assertEquals(0, empty.size());
long[] empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0);
assertEquals(0, empty.length);
empty = RandomSubsetUtils.generateRandomIntegerSubset(0, 0);
assertEquals(0, empty.size());
List<Long> complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000);
Collections.sort(complete);
Iterator<Long> iter = complete.iterator();
long current = 0;
while (iter.hasNext()) {
long elem = iter.next();
assertEquals(current++, elem);
assertEquals(0, empty.length);
long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000);
Arrays.sort(complete);
for (int current = 0; current < complete.length; current++) {
long elem = complete[current];
assertEquals(current, elem);
}
}
@Test
public void testBasicRandomSubsetOfAddresses() throws CancelledException {
// Check we are drawing unique addresses from the set
AddressSet addrs = new AddressSet();
for (long i = 0; i < 10000; ++i) {
addrs.add(new TestAddress(i));
}
AddressSet rand = RandomSubsetUtils.randomSubset(addrs, 9998, TaskMonitor.DUMMY);
assertEquals(9998, rand.getNumAddresses());
addrs.clear();
// Check we correctly draw from multiple non-contiguous ranges
for (long i = 0; i < 10000; i += 1000) {
if (i % 2000 != 0)
continue;
for (long j = i; j < i + 1000; j++) {
addrs.add(new TestAddress(j));
}
}
rand = RandomSubsetUtils.randomSubset(addrs, 4998, TaskMonitor.DUMMY);
assertEquals(4998, rand.getNumAddresses());
for (Address addr : rand.getAddresses(true)) {
assertTrue(addrs.contains(addr));
}
}
@Test
public void testSwap() {
Map<Long, Long> permuted = new HashMap<>();
Map<Long, MutableLong> permuted = new HashMap<>();
assertTrue(permuted.isEmpty());
//should do nothing
RandomSubsetUtils.swap(permuted, 1, 1);
assertTrue(permuted.isEmpty());
permuted.put(0l, 5l);
permuted.put(1l, 10l);
permuted.put(0l, new MutableLong(5l));
permuted.put(1l, new MutableLong(10l));
RandomSubsetUtils.swap(permuted, 0, 1);
assertEquals(2, permuted.size());
assertEquals(Long.valueOf(5), permuted.get(1l));
assertEquals(Long.valueOf(10), permuted.get(0l));
assertEquals(5l, permuted.get(1l).get());
assertEquals(10l, permuted.get(0l).get());
RandomSubsetUtils.swap(permuted, 100l, 200L);
assertEquals(4, permuted.size());
assertEquals(Long.valueOf(100), permuted.get(200l));
assertEquals(Long.valueOf(200), permuted.get(100l));
assertEquals(100l, permuted.get(200l).get());
assertEquals(200l, permuted.get(100l).get());
}
/**
@Test
public void timingTest() throws CancelledException {
AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(999999));
long start = System.nanoTime();
List<Long> complete = RandomSubset.generateRandomIntegerSubset(1000000, 500000);
long end = System.nanoTime();
Msg.info(this, "choosing random subset of integers: " +
(end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
start = System.nanoTime();
AddressSet random = RandomSubset.randomSubset(big, 500000, TaskMonitor.DUMMY);
end = System.nanoTime();
Msg.info(this, "choosing random subset of addresses: " +
(end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
}*/
// @Test
// public void timingTest() throws CancelledException, InterruptedException {
// AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(9999999));
//
// Thread.sleep(10000);
// long start;
// long end;
//
// for (int i = 0; i < 10; i++) {
// start = System.nanoTime();
// long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(10000000, 5000000);
// end = System.nanoTime();
// Msg.info(this, "choosing random subset of integers: " +
// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
// }
//
// for (int i = 0; i < 10; i++) {
// start = System.nanoTime();
// AddressSet random1 = RandomSubsetUtils.randomSubset(big, 5000000, TaskMonitor.DUMMY);
// end = System.nanoTime();
// Msg.info(this, "choosing random subset of addresses: " +
// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND);
// }
// }
}