diff --git a/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java b/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java index f44bb261bb..36ae82d9c8 100644 --- a/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java +++ b/Ghidra/Extensions/MachineLearning/ghidra_scripts/FindFunctionsRFExampleScript.java @@ -24,8 +24,7 @@ import ghidra.app.cmd.disassemble.DisassembleCommand; import ghidra.app.cmd.function.CreateFunctionCmd; import ghidra.app.script.GhidraScript; import ghidra.machinelearning.functionfinding.*; -import ghidra.program.model.address.Address; -import ghidra.program.model.address.AddressSet; +import ghidra.program.model.address.*; import ghidra.program.model.block.BasicBlockModel; //NOTE: This script is referenced by name in the help for the @@ -131,7 +130,7 @@ public class FindFunctionsRFExampleScript extends GhidraScript { new GetAddressesToClassifyTask(currentProgram, minUndefinedRange); getAddressTask.run(monitor); - AddressSet toClassify = getAddressTask.getAddressesToClassify(); + AddressSetView toClassify = getAddressTask.getAddressesToClassify(); Map potentialStarts = classifier.classify(toClassify, monitor); diff --git a/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm b/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm index d99f9b8772..334fd2bd80 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm +++ b/Ghidra/Extensions/MachineLearning/src/main/help/help/topics/RandomForestFunctionFinderPlugin/RandomForestFunctionFinderPlugin.htm @@ -110,6 +110,11 @@ this to ensure that the value in Maximum Number of Starts field doesn't cause all starts to be used for training (leaving none for testing).

+ +

Minimum Undefined Range Size

+

This value is the minimum size of an undefined address range that will be considered when + applying the model to a program. Defaults to the value stored in the plugin options, see + Minimum Length of Undefined Ranges to Search.

Restrict Search to Aligned Addresses

If this is checked, only addresses which are zero modulo the value in the @@ -139,11 +144,15 @@ options). The results are displayed in a Function Start Table.

-

Apply Model To... Action

+

Apply Model To Other Program... Action

This action will open a dialog to select another program in the current project and then apply the model to it. Note that the only check that the model is compatible with the selected program is that any context registers specified when training must be present in the selected program.

+ +

Apply Model To Selection Action

+

This action will apply the model to the current selection in the program used to train it. +

Debug Model Action

This action will display a Debug Model Table, which shows diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java index 85900e10f1..801ad2d73b 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java +++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartRFParamsDialog.java @@ -36,7 +36,7 @@ import ghidra.app.services.ProgramManager; import ghidra.framework.main.ProgramFileChooser; import ghidra.framework.model.DomainFile; import ghidra.framework.preferences.Preferences; -import ghidra.program.model.address.AddressSet; +import ghidra.program.model.address.AddressSetView; import ghidra.program.model.listing.*; import ghidra.util.HelpLocation; import ghidra.util.Msg; @@ -97,6 +97,10 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider private static final String ALIGNMENT_MODULUS_TIP = "Use to define the alignment for restricted search"; + private static final String MIN_UNDEFINED_RANGE_SIZE_TEXT = "Minimum Undefined Range Size"; + private static final String MIN_UNDEFINED_RANGE_SIZE_TIP = + "Minimum size of an undefined range of addresses to search over for function starts"; + private static final String DEFAULT_INITIAL_BYTES = "8,16"; private static final String INITIAL_BYTES_PROPERTY = "functionStartRFParams_initialBytes"; private static final String DEFAULT_PRE_BYTES = "2,8"; @@ -123,16 +127,23 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider private static final String APPLY_MODEL_ACTION_NAME = "ApplyModel"; private static final String APPLY_MODEL_MENU_TEXT = "Apply Model"; - private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelTo"; - private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To..."; + private static final String APPLY_MODEL_TO_ACTION_NAME = "ApplyModelToOtherProgram"; + private static final String APPLY_MODEL_TO_MENU_TEXT = "Apply Model To Other Program..."; + private static final String APPLY_MODEL_SELECTION_ACTION_NAME = "ApplyModelToSelection"; + private static final String APPLY_MODEL_SELECTION_MENU_TEXT = "Apply Model To Selection"; private static final String DEBUG_MODEL_ACTION_NAME = "DebugModel"; - private static final String DEBUG_MODEL_MENU_TEXT = "DEBUG - Show test set errors"; + private static final String DEBUG_MODEL_MENU_TEXT = "DEBUG - Show Test Set Errors"; + + private static final String ACTION_GROUP_APPLY_LOCAL = "A0_ApplyLocal"; + private static final String ACTION_GROUP_APPLY_OTHER = "A1_ApplyOther"; + private static final String ACTION_GROUP_DEBUG = "A2_Debug"; private JTextField initialBytesField; private JTextField preBytesField; private JTextField factorField; private IntegerTextField minimumSizeField; private IntegerTextField maxStartsField; + private IntegerTextField minUndefRangeField; private JTextField contextRegistersField; private JLabel numFuncsField; private JScrollPane tableScrollPane; @@ -284,19 +295,35 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider .popupWhen(c -> trainingSource != null) .enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1) .popupMenuPath(APPLY_MODEL_MENU_TEXT) + .popupMenuGroup(ACTION_GROUP_APPLY_LOCAL) .inWindow(ActionBuilder.When.ALWAYS) .onAction(c -> { - searchTrainingProgram(tableModel.getLastSelectedObjects().get(0)); + searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), false); }) .build(); addAction(applyAction); + DockingAction applySelectionAction = + new ActionBuilder(APPLY_MODEL_SELECTION_ACTION_NAME, plugin.getName()) + .description("Apply Model to Current Program Selection") + .popupWhen(c -> trainingSource != null) + .enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1) + .popupMenuPath(APPLY_MODEL_SELECTION_MENU_TEXT) + .popupMenuGroup(ACTION_GROUP_APPLY_LOCAL) + .inWindow(ActionBuilder.When.ALWAYS) + .onAction(c -> { + searchTrainingProgram(tableModel.getLastSelectedObjects().get(0), true); + }) + .build(); + addAction(applySelectionAction); + DockingAction applyToAction = new ActionBuilder(APPLY_MODEL_TO_ACTION_NAME, plugin.getName()) .description("Choose Program and Apply Model to it") .popupWhen(c -> trainingSource != null) .enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1) .popupMenuPath(APPLY_MODEL_TO_MENU_TEXT) + .popupMenuGroup(ACTION_GROUP_APPLY_OTHER) .inWindow(ActionBuilder.When.ALWAYS) .onAction(c -> { searchOtherProgram(tableModel.getLastSelectedObjects().get(0)); @@ -309,6 +336,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider .popupWhen(c -> trainingSource != null) .enabledWhen(c -> tableModel.getLastSelectedObjects().size() == 1) .popupMenuPath(DEBUG_MODEL_MENU_TEXT) + .popupMenuGroup(ACTION_GROUP_DEBUG) .inWindow(ActionBuilder.When.ALWAYS) .onAction(c -> { showTestErrors(tableModel.getLastSelectedObjects().get(0)); @@ -406,6 +434,14 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider updateNumFuncsField(); funcDataPanel.add(numFuncsField); + JLabel minUndefRangeLabel = new GDLabel(MIN_UNDEFINED_RANGE_SIZE_TEXT); + minUndefRangeLabel.setToolTipText(MIN_UNDEFINED_RANGE_SIZE_TIP); + funcDataPanel.add(minUndefRangeLabel); + minUndefRangeField = new IntegerTextField(); + minUndefRangeField.setAllowNegativeValues(false); + minUndefRangeField.setValue(plugin.getMinUndefinedRangeSize()); + funcDataPanel.add(minUndefRangeField.getComponent()); + JLabel restrictLabel = new GDLabel(RESTRICT_SEARCH_TEXT); restrictLabel.setToolTipText(RESTRICT_SEARCH_TIP); funcDataPanel.add(restrictLabel); @@ -478,8 +514,8 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider numFuncsField.setText(Integer.toString(numFuncs)); } - private void searchTrainingProgram(RandomForestRowObject modelRow) { - searchProgram(trainingSource, modelRow); + private void searchTrainingProgram(RandomForestRowObject modelRow, boolean useSelection) { + searchProgram(trainingSource, modelRow, useSelection); } private void searchOtherProgram(RandomForestRowObject modelRow) { @@ -499,7 +535,7 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider " is not compatible with training source program " + trainingSource.getName()); return; } - searchProgram(p, modelRow); + searchProgram(p, modelRow, false); } private void showTestErrors(RandomForestRowObject modelRow) { @@ -508,21 +544,32 @@ public class FunctionStartRFParamsDialog extends ReusableDialogComponentProvider addGeneralActions(provider, trainingSource); } - private void searchProgram(Program targetProgram, RandomForestRowObject modelRow) { - GetAddressesToClassifyTask getTask = - new GetAddressesToClassifyTask(targetProgram, plugin.getMinUndefinedRangeSize()); + private void searchProgram(Program targetProgram, RandomForestRowObject modelRow, + boolean useSelection) { + + GetAddressesToClassifyTask getTask = null; + if (useSelection) { + getTask = + new GetAddressesToClassifyTask(targetProgram, 1, plugin.getProgramSelection()); + } + else { + getTask = + new GetAddressesToClassifyTask(targetProgram, minUndefRangeField.getLongValue()); + } + //don't want to use the dialog's progress bar TaskLauncher.launchModal("Gathering Addresses To Classify", getTask); if (getTask.isCancelled()) { return; } - AddressSet execNonFunc = null; + AddressSetView execNonFunc = null; if (restrictBox.isSelected()) { execNonFunc = getTask.getAddressesToClassify((long) modBox.getSelectedItem()); } else { execNonFunc = getTask.getAddressesToClassify(); } + FunctionStartTableProvider provider = new FunctionStartTableProvider(plugin, targetProgram, execNonFunc, modelRow, false); addGeneralActions(provider, targetProgram); diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java index 1fc8aebe6c..79082c6b26 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java +++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableModel.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -24,8 +24,9 @@ import ghidra.docking.settings.Settings; import ghidra.framework.plugintool.PluginTool; import ghidra.framework.plugintool.ServiceProvider; import ghidra.program.model.address.Address; -import ghidra.program.model.address.AddressSet; +import ghidra.program.model.address.AddressSetView; import ghidra.program.model.block.BasicBlockModel; +import ghidra.program.model.listing.Instruction; import ghidra.program.model.listing.Program; import ghidra.util.datastruct.Accumulator; import ghidra.util.exception.CancelledException; @@ -38,7 +39,7 @@ import ghidra.util.task.TaskMonitor; */ public class FunctionStartTableModel extends AddressBasedTableModel { private RandomForestRowObject modelRow; - private AddressSet addressesToClassify; + private AddressSetView addressesToClassify; private boolean debug; private BasicBlockModel blockModel; private Map addressToProbability; @@ -54,7 +55,7 @@ public class FunctionStartTableModel extends AddressBasedTableModel { + + @Override + public String getColumnName() { + return "Is Fallthrough Target"; + } + + @Override + public Boolean getValue(FunctionStartRowObject rowObject, Settings settings, Object data, + ServiceProvider services) throws IllegalArgumentException { + Instruction preInstr = + program.getListing().getInstructionBefore(rowObject.getAddress()); + if (preInstr == null) { + return false; + } + + if (!preInstr.hasFallthrough()) { + return false; + } + + return preInstr.getFallThrough().equals(rowObject.getAddress()); + } + + } + private class DataReferencesTableColumn extends AbstractDynamicTableColumn { diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java index f83e1f59c4..81d2845a39 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java +++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/FunctionStartTableProvider.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -25,7 +25,7 @@ import javax.swing.*; import ghidra.framework.model.DomainObjectChangedEvent; import ghidra.framework.model.DomainObjectListener; import ghidra.framework.plugintool.ComponentProviderAdapter; -import ghidra.program.model.address.AddressSet; +import ghidra.program.model.address.AddressSetView; import ghidra.program.model.listing.Program; import ghidra.util.HelpLocation; import ghidra.util.table.*; @@ -42,7 +42,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid private RandomForestFunctionFinderPlugin plugin; private Program program; private RandomForestRowObject modelRow; - private AddressSet toClassify; + private AddressSetView toClassify; private boolean debug; private String subTitle; private GhidraTable startTable; @@ -59,7 +59,7 @@ public class FunctionStartTableProvider extends ProgramAssociatedComponentProvid * @param debug whether to display debug version of table */ public FunctionStartTableProvider(RandomForestFunctionFinderPlugin plugin, Program program, - AddressSet toClassify, RandomForestRowObject modelRow, boolean debug) { + AddressSetView toClassify, RandomForestRowObject modelRow, boolean debug) { super( debug ? "Debug: Test Set Errors in " + program.getDomainFile().getPathname() : "Potential Functions in " + program.getDomainFile().getPathname(), diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java index daec84300d..837f8b0bad 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java +++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/GetAddressesToClassifyTask.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ package ghidra.machinelearning.functionfinding; import ghidra.program.model.address.*; import ghidra.program.model.listing.*; +import ghidra.program.util.ProgramSelection; import ghidra.util.exception.CancelledException; import ghidra.util.task.Task; import ghidra.util.task.TaskMonitor; @@ -27,8 +28,25 @@ import ghidra.util.task.TaskMonitor; public class GetAddressesToClassifyTask extends Task { private Program prog; - private AddressSet execNonFunc; + private AddressSetView execNonFunc; private long minUndefinedRangeSize; + private ProgramSelection userSelection; + + /** + * Creates a {@link Task} that creates a set of addresses to check for function starts. The + * {code minUndefinedRangeSize} parameter determines how large a run of undefined bytes must be + * to be checked for function starts + * @param prog source program + * @param minUndefinedRangeSize minimum size of undefined range + * @param selection program selection used to filter addresses + */ + public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize, + ProgramSelection selection) { + super("Gathering Addresses to Classify", true, true, false, false); + this.prog = prog; + this.minUndefinedRangeSize = minUndefinedRangeSize; + this.userSelection = selection; + } /** * Creates a {@link Task} that creates a set of addresses to check for function starts. The @@ -38,25 +56,29 @@ public class GetAddressesToClassifyTask extends Task { * @param minUndefinedRangeSize minimum size of undefined range */ public GetAddressesToClassifyTask(Program prog, long minUndefinedRangeSize) { - super("Gathering Addresses to Classify", true, true, false, false); - this.prog = prog; - this.minUndefinedRangeSize = minUndefinedRangeSize; + this(prog, minUndefinedRangeSize, null); } @Override public void run(TaskMonitor monitor) throws CancelledException { execNonFunc = new AddressSet(); - AddressSetView executable = prog.getMemory().getExecuteSet(); - AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet(); - execNonFunc = executable.intersect(initialized); - monitor.initialize(prog.getFunctionManager().getFunctionCount()); - FunctionIterator fIter = prog.getFunctionManager().getFunctions(true); - while (fIter.hasNext()) { - monitor.checkCancelled(); - monitor.incrementProgress(1); - Function func = fIter.next(); - execNonFunc = execNonFunc.subtract(func.getBody()); + if (userSelection != null) { + execNonFunc = userSelection; } + else { + AddressSetView executable = prog.getMemory().getExecuteSet(); + AddressSetView initialized = prog.getMemory().getLoadedAndInitializedAddressSet(); + execNonFunc = executable.intersect(initialized); + monitor.initialize(prog.getFunctionManager().getFunctionCount()); + FunctionIterator fIter = prog.getFunctionManager().getFunctions(true); + while (fIter.hasNext()) { + monitor.checkCancelled(); + monitor.incrementProgress(1); + Function func = fIter.next(); + execNonFunc = execNonFunc.subtract(func.getBody()); + } + } + //remove small undefined ranges to avoid (for example) searching for //function starts in an address range of length 3 between two known //functions. "small" is controlled by a plugin option. @@ -77,7 +99,7 @@ public class GetAddressesToClassifyTask extends Task { * Returns the set of addresses to classify * @return addresses */ - public AddressSet getAddressesToClassify() { + public AddressSetView getAddressesToClassify() { return execNonFunc; } @@ -87,7 +109,7 @@ public class GetAddressesToClassifyTask extends Task { * @param modulus alignment modulus * @return aligned addresses */ - public AddressSet getAddressesToClassify(long modulus) { + public AddressSetView getAddressesToClassify(long modulus) { AddressSet aligned = new AddressSet(); for (Address a : execNonFunc.getAddresses(true)) { if (a.getOffset() % modulus == 0) { diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java index 143c8294e4..868e1f7cf7 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java +++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomForestFunctionFinderPlugin.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -205,16 +205,16 @@ public class RandomForestFunctionFinderPlugin extends ProgramPlugin private void createActions() { new ActionBuilder(ACTION_NAME, getName()) - .menuPath(ToolConstants.MENU_SEARCH, MENU_PATH_ENTRY) - .menuGroup("search for", null) - .description("Train models to search for function starts") - .helpLocation(new HelpLocation(getName(), getName())) - .withContext(NavigatableActionContext.class, true) - .validContextWhen(c -> !(c instanceof RestrictedAddressSetContext)) - .onAction(c -> { - displayDialog(c); - }) - .buildAndInstall(tool); + .menuPath(ToolConstants.MENU_SEARCH, MENU_PATH_ENTRY) + .menuGroup("search for", null) + .description("Train models to search for function starts") + .helpLocation(new HelpLocation(getName(), getName())) + .withContext(NavigatableActionContext.class, true) + .validWhen(c -> !(c instanceof RestrictedAddressSetContext)) + .onAction(c -> { + displayDialog(c); + }) + .buildAndInstall(tool); } private void displayDialog(NavigatableActionContext c) { diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java index 485e67b0c0..47d3bf6ffb 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java +++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/RandomSubsetUtils.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -18,6 +18,7 @@ package ghidra.machinelearning.functionfinding; import java.util.*; import java.util.concurrent.ThreadLocalRandom; +import ghidra.pcodeCPort.utils.MutableLong; import ghidra.program.model.address.*; import ghidra.util.exception.CancelledException; import ghidra.util.task.TaskMonitor; @@ -43,25 +44,32 @@ public class RandomSubsetUtils { * @return random subset of size k * @throws CancelledException if monitor is canceled */ - public static AddressSet randomSubset(AddressSetView addresses, long k, TaskMonitor monitor) - throws CancelledException { - List sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k); - Collections.sort(sortedRandom); + public static AddressSet randomSubset(AddressSetView addresses, long k, + TaskMonitor monitor) throws CancelledException { + long[] sortedRandom = generateRandomIntegerSubset(addresses.getNumAddresses(), k); + Arrays.sort(sortedRandom); AddressSet randomAddresses = new AddressSet(); - AddressIterator iter = addresses.getAddresses(true); - int addressesAdded = 0; - int addressesVisited = 0; + + long addressesVisited = 0; int listIndex = 0; - while (iter.hasNext() && addressesAdded < k) { - monitor.checkCancelled(); - Address addr = iter.next(); - if (sortedRandom.get(listIndex) == addressesVisited) { + for (AddressRange range : addresses) { + long rangeEnd = addressesVisited + range.getLength(); + for (; listIndex < k; listIndex++) { + monitor.checkCancelled(); + long next = sortedRandom[listIndex]; + if (next >= rangeEnd) { + // Next address is outside of this range + break; + } + Address addr = range.getMinAddress().add(next - addressesVisited); randomAddresses.add(addr); - addressesAdded += 1; - listIndex += 1; } - addressesVisited += 1; + if (listIndex == k) { + break; + } + addressesVisited += range.getLength(); } + return randomAddresses; } @@ -72,24 +80,34 @@ public class RandomSubsetUtils { * @param k size of random subset (must be >= 0) * @return list of indices of elements in random subset */ - public static List generateRandomIntegerSubset(long n, long k) { + public static long[] generateRandomIntegerSubset(long n, long k) { if (n < 0) { throw new IllegalArgumentException("n cannot be negative"); } if (k < 0) { throw new IllegalArgumentException("k cannot be negative"); } + if (k > Integer.MAX_VALUE) { + // Could probably just switch k to an int. Since we were using ArrayList before + // that was already going to blow up if k > Integer.MAX_VALUE + throw new IllegalArgumentException("k cannot exceed bounds of integer"); + } if (n < k) { throw new IllegalArgumentException( "size of subset (" + k + ") cannot be larger than size of set (" + n + ")"); } - Map permutation = new HashMap<>(); - for (long i = 0; i < k; ++i) { + + Map permutation = new HashMap<>(); + + for (long i = 0; i < k; i++) { swap(permutation, i, ThreadLocalRandom.current().nextLong(i, n)); } - List random = new ArrayList<>(); - for (long i = 0; i < k; i++) { - random.add(permutation.getOrDefault(i, i)); + + long[] random = new long[(int) k]; + + for (int i = 0; i < k; i++) { + random[i] = + permutation.computeIfAbsent(Long.valueOf(i), key -> new MutableLong(key)).get(); } return random; } @@ -102,14 +120,15 @@ public class RandomSubsetUtils { * @param i index * @param j index */ - public static void swap(Map permutation, long i, long j) { + public static void swap(Map permutation, long i, long j) { if (i == j) { return; } - long ith = permutation.getOrDefault(i, i); - long jth = permutation.getOrDefault(j, j); - permutation.put(i, jth); - permutation.put(j, ith); + MutableLong ith = permutation.computeIfAbsent(i, key -> new MutableLong(key)); + MutableLong jth = permutation.computeIfAbsent(j, key -> new MutableLong(key)); + long temp = ith.get(); + ith.set(jth.get()); + jth.set(temp); } } diff --git a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java index 12b6f21219..dff0a47aaa 100644 --- a/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java +++ b/Ghidra/Extensions/MachineLearning/src/main/java/ghidra/machinelearning/functionfinding/SimilarStartsTableModel.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -20,6 +20,8 @@ import java.util.List; import docking.widgets.table.AbstractDynamicTableColumn; import docking.widgets.table.TableColumnDescriptor; +import ghidra.app.util.PseudoDisassembler; +import ghidra.app.util.PseudoInstruction; import ghidra.docking.settings.Settings; import ghidra.framework.plugintool.PluginTool; import ghidra.framework.plugintool.ServiceProvider; @@ -75,6 +77,7 @@ public class SimilarStartsTableModel extends AddressBasedTableModel { + + @Override + public String getColumnName() { + return "Disassembly"; + } + + @Override + public String getColumnDescription() { + return "Disassembly (ignoring pre-bytes)"; + } + + @Override + public String getValue(SimilarStartRowObject rowObject, Settings settings, Object data, + ServiceProvider services) throws IllegalArgumentException { + PseudoDisassembler disasm = new PseudoDisassembler(program); + + StringBuilder sb = new StringBuilder(); + try { + Address addr = rowObject.funcStart(); + while (addr.compareTo( + rowObject.funcStart().add(randomForestRow.getNumInitialBytes())) < 0) { + PseudoInstruction instr = disasm.disassemble(addr); + if (instr.isValid()) { + sb.append(instr.toString()); + sb.append(" "); + } + else { + sb.append("? "); + } + addr = instr.getMaxAddress().add(1); + } + } + catch (Exception e) { + sb = new StringBuilder("??"); + } + + return sb.toString(); + } + } + } diff --git a/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java b/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java index db4ea497c1..9ebd96c679 100644 --- a/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java +++ b/Ghidra/Extensions/MachineLearning/src/test/java/ghidra/machinelearning/functionfinding/RandomSubsetTest.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -22,8 +22,8 @@ import java.util.*; import org.junit.Test; import generic.test.AbstractGenericTest; -import ghidra.program.model.address.AddressSet; -import ghidra.program.model.address.TestAddress; +import ghidra.pcodeCPort.utils.MutableLong; +import ghidra.program.model.address.*; import ghidra.util.exception.CancelledException; import ghidra.util.task.TaskMonitor; @@ -31,64 +31,88 @@ public class RandomSubsetTest extends AbstractGenericTest { @Test public void testGenerateTrivialSubsets() { - List empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0); - assertEquals(0, empty.size()); + long[] empty = RandomSubsetUtils.generateRandomIntegerSubset(10, 0); + assertEquals(0, empty.length); empty = RandomSubsetUtils.generateRandomIntegerSubset(0, 0); - assertEquals(0, empty.size()); - List complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000); - Collections.sort(complete); - Iterator iter = complete.iterator(); - long current = 0; - while (iter.hasNext()) { - long elem = iter.next(); - assertEquals(current++, elem); + assertEquals(0, empty.length); + long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(1000000, 1000000); + Arrays.sort(complete); + for (int current = 0; current < complete.length; current++) { + long elem = complete[current]; + assertEquals(current, elem); } } @Test public void testBasicRandomSubsetOfAddresses() throws CancelledException { + // Check we are drawing unique addresses from the set AddressSet addrs = new AddressSet(); for (long i = 0; i < 10000; ++i) { addrs.add(new TestAddress(i)); } AddressSet rand = RandomSubsetUtils.randomSubset(addrs, 9998, TaskMonitor.DUMMY); assertEquals(9998, rand.getNumAddresses()); + + addrs.clear(); + + // Check we correctly draw from multiple non-contiguous ranges + for (long i = 0; i < 10000; i += 1000) { + if (i % 2000 != 0) + continue; + for (long j = i; j < i + 1000; j++) { + addrs.add(new TestAddress(j)); + } + } + rand = RandomSubsetUtils.randomSubset(addrs, 4998, TaskMonitor.DUMMY); + assertEquals(4998, rand.getNumAddresses()); + + for (Address addr : rand.getAddresses(true)) { + assertTrue(addrs.contains(addr)); + } } @Test public void testSwap() { - Map permuted = new HashMap<>(); + Map permuted = new HashMap<>(); assertTrue(permuted.isEmpty()); //should do nothing RandomSubsetUtils.swap(permuted, 1, 1); assertTrue(permuted.isEmpty()); - permuted.put(0l, 5l); - permuted.put(1l, 10l); + permuted.put(0l, new MutableLong(5l)); + permuted.put(1l, new MutableLong(10l)); RandomSubsetUtils.swap(permuted, 0, 1); assertEquals(2, permuted.size()); - assertEquals(Long.valueOf(5), permuted.get(1l)); - assertEquals(Long.valueOf(10), permuted.get(0l)); + assertEquals(5l, permuted.get(1l).get()); + assertEquals(10l, permuted.get(0l).get()); RandomSubsetUtils.swap(permuted, 100l, 200L); assertEquals(4, permuted.size()); - assertEquals(Long.valueOf(100), permuted.get(200l)); - assertEquals(Long.valueOf(200), permuted.get(100l)); + assertEquals(100l, permuted.get(200l).get()); + assertEquals(200l, permuted.get(100l).get()); } - /** - @Test - public void timingTest() throws CancelledException { - AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(999999)); - - long start = System.nanoTime(); - List complete = RandomSubset.generateRandomIntegerSubset(1000000, 500000); - long end = System.nanoTime(); - Msg.info(this, "choosing random subset of integers: " + - (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND); - start = System.nanoTime(); - AddressSet random = RandomSubset.randomSubset(big, 500000, TaskMonitor.DUMMY); - end = System.nanoTime(); - Msg.info(this, "choosing random subset of addresses: " + - (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND); - }*/ +// @Test +// public void timingTest() throws CancelledException, InterruptedException { +// AddressSet big = new AddressSet(new TestAddress(0), new TestAddress(9999999)); +// +// Thread.sleep(10000); +// long start; +// long end; +// +// for (int i = 0; i < 10; i++) { +// start = System.nanoTime(); +// long[] complete = RandomSubsetUtils.generateRandomIntegerSubset(10000000, 5000000); +// end = System.nanoTime(); +// Msg.info(this, "choosing random subset of integers: " + +// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND); +// } +// +// for (int i = 0; i < 10; i++) { +// start = System.nanoTime(); +// AddressSet random1 = RandomSubsetUtils.randomSubset(big, 5000000, TaskMonitor.DUMMY); +// end = System.nanoTime(); +// Msg.info(this, "choosing random subset of addresses: " + +// (end - start) / RandomForestTrainingTask.NANOSECONDS_PER_SECOND); +// } +// } }