From e645d74a5fa862e9c7ffb5b0ec7b08eb9c396188 Mon Sep 17 00:00:00 2001 From: Dan <46821332+nsadeveloper789@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:15:25 -0500 Subject: [PATCH] GP-4287: Format. Revise. Certify. --- .../WildcardAssembler/certification.manifest | 2 +- .../FindInstructionWithWildcard.java | 54 ++++----- .../WildSleighAssemblerInfo.java | 32 +++-- .../Wildcard_Assembler.html | 109 ++++++++++-------- .../java/ghidra/asm/wild/WildOperandInfo.java | 16 +++ .../ghidra/asm/wild/WildSleighAssembler.java | 6 + .../asm/wild/WildSleighAssemblerBuilder.java | 22 ++++ ...ildAssemblyFixedNumericStateGenerator.java | 1 - .../sem/WildAssemblyResolvedPatterns.java | 18 +++ .../sleigh/sem/AssemblyPatternBlock.java | 1 + 10 files changed, 160 insertions(+), 101 deletions(-) diff --git a/Ghidra/Features/WildcardAssembler/certification.manifest b/Ghidra/Features/WildcardAssembler/certification.manifest index 7815f11406..8f1aa79186 100644 --- a/Ghidra/Features/WildcardAssembler/certification.manifest +++ b/Ghidra/Features/WildcardAssembler/certification.manifest @@ -2,5 +2,5 @@ ##MODULE IP: FAMFAMFAM Icons - CC 2.5 ##MODULE IP: Oxygen Icons - LGPL 3.0 Module.manifest||GHIDRA||||END| -src/main/help/help/TOC_Source.xml||GHIDRA||reviewed||END| +src/main/help/help/TOC_Source.xml||GHIDRA||||END| src/main/help/help/topics/WildcardAssemblerModule/Wildcard_Assembler.html||GHIDRA||||END| diff --git a/Ghidra/Features/WildcardAssembler/ghidra_scripts/FindInstructionWithWildcard.java b/Ghidra/Features/WildcardAssembler/ghidra_scripts/FindInstructionWithWildcard.java index c69734de66..a1f220bfc4 100644 --- a/Ghidra/Features/WildcardAssembler/ghidra_scripts/FindInstructionWithWildcard.java +++ b/Ghidra/Features/WildcardAssembler/ghidra_scripts/FindInstructionWithWildcard.java @@ -29,35 +29,28 @@ // See the "WildSleighAssemblerInfo" script for a simpler use of the WildSleighAssembler. // @category Examples -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import ghidra.app.plugin.assembler.AssemblySelector; import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyPatternBlock; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolutionResults; +import ghidra.app.plugin.assembler.sleigh.sem.*; import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.script.GhidraScript; -import ghidra.asm.wild.WildOperandInfo; -import ghidra.asm.wild.WildSleighAssembler; -import ghidra.asm.wild.WildSleighAssemblerBuilder; +import ghidra.asm.wild.*; import ghidra.asm.wild.sem.WildAssemblyResolvedPatterns; -import ghidra.program.model.mem.*; -import ghidra.program.model.address.*; +import ghidra.program.model.address.Address; +import ghidra.program.model.mem.Memory; +import ghidra.program.model.mem.MemoryAccessException; public class FindInstructionWithWildcard extends GhidraScript { + @Override public void run() throws Exception { - var instruction = askString("Instruction to search", - "Instruction to search for with wildcard (example is for x86_64, adjust if you are using a different architecture):", - "XOR R13D,`Q1/R1(2|3)D`"); + var instruction = askString("Instruction to search", """ + Instruction to search for with wildcard (example is for x86_64, adjust if you are \ + using a different architecture): \ + XOR R13D,`Q1/R1(2|3)D`"""); var allValidResults = getAllResolvedPatterns(instruction); var encodings = getMapOfUniqueInstructionEncodings(allValidResults); @@ -175,8 +168,7 @@ public class FindInstructionWithWildcard extends GhidraScript { * Returns true of the given value shares the same {@code maskedInstruction} and wildcard(s) * as this instance. * - * @param other - * Value to compare against + * @param other Value to compare against * @return True if both values share the same maskedInstruction and wildcard(s) */ boolean sameBaseEncoding(ReducedWildcardAssemblyResolvedPattern other) { @@ -194,7 +186,7 @@ public class FindInstructionWithWildcard extends GhidraScript { // Check all of other's WildOperandInfo for (WildOperandInfo otherInfo : other.parent.getOperandInfo()) { // Check if we have matching wildcards (names), expressions, and locations. - // Notice that we're *NOT* checking choice here, as we expect those to be different. + // We're *NOT* checking choice here, as we expect those to be different. if (info.wildcard().equals(otherInfo.wildcard()) && info.expression().equals(otherInfo.expression()) && info.location().equals(otherInfo.location())) { @@ -222,10 +214,8 @@ public class FindInstructionWithWildcard extends GhidraScript { * Does not currently print wildcard information about the search results, but this could be * added. * - * @param encodings - * HashMap of encodings to that encoding's possible WildOperandInfo values. - * @throws MemoryAccessException - * If we find bytes but can't read them + * @param encodings Map of encodings to that encoding's possible WildOperandInfo values. + * @throws MemoryAccessException If we find bytes but can't read them */ private void searchMemoryForEncodings( Map> encodings, @@ -274,12 +264,11 @@ public class FindInstructionWithWildcard extends GhidraScript { * NOTE: This is certainly not the highest performance way to do this, but it is reasonably * simple and shows what is possible. * - * @param matchAddress - * The address where our search hit occurred - * @param matchData - * The bytes found at matchAddress. Must include the entire matching instruction! - * @param allValidResolvedPatterns - * All resolved patterns which were searched from (used to find wildcard information) + * @param matchAddress The address where our search hit occurred + * @param matchData The bytes found at matchAddress. Must include the entire matching + * instruction! + * @param allValidResolvedPatterns All resolved patterns which were searched from (used to find + * wildcard information) */ private void printSearchHitInfo(Address matchAddress, byte[] matchData, List allValidResolvedPatterns) { @@ -321,8 +310,7 @@ public class FindInstructionWithWildcard extends GhidraScript { * Return all items from {@code results} which are instances of * {@link WildAssemblyResolvedPatterns} * - * @param results - * The results to return {@link WildAssemblyResolvePatterns} from + * @param results The results to return {@link WildAssemblyResolvePatterns} from * @return All {@link WildAssemblyResolvedPatterns} which were found in the input */ private List getValidResults(AssemblyResolutionResults results) { diff --git a/Ghidra/Features/WildcardAssembler/ghidra_scripts/WildSleighAssemblerInfo.java b/Ghidra/Features/WildcardAssembler/ghidra_scripts/WildSleighAssemblerInfo.java index 6a7ad8b435..30421549c2 100644 --- a/Ghidra/Features/WildcardAssembler/ghidra_scripts/WildSleighAssemblerInfo.java +++ b/Ghidra/Features/WildcardAssembler/ghidra_scripts/WildSleighAssemblerInfo.java @@ -24,7 +24,6 @@ // See the "FindInstructionWithWildcard" script for another example of using the WildSleighAssembler // @category Examples -import java.util.Arrays; import java.util.Collection; import java.util.List; import java.util.stream.Collectors; @@ -34,16 +33,17 @@ import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.script.GhidraScript; -import ghidra.asm.wild.WildOperandInfo; -import ghidra.asm.wild.WildSleighAssembler; -import ghidra.asm.wild.WildSleighAssemblerBuilder; +import ghidra.asm.wild.*; import ghidra.asm.wild.sem.WildAssemblyResolvedPatterns; public class WildSleighAssemblerInfo extends GhidraScript { - List sampleInstructions = - Arrays.asList("MOV EAX,`Q1`", "MOV RDI,qword ptr [`Q1` + -0x30]", "Custom"); + List sampleInstructions = List.of( + "MOV EAX,`Q1`", + "MOV RDI,qword ptr [`Q1` + -0x30]", + "Custom"); + @Override public void run() throws Exception { String instruction = askChoice("Instruction to assemble", "Assemble this instruction:", @@ -61,8 +61,8 @@ public class WildSleighAssemblerInfo extends GhidraScript { /** * Use a {@link WildSleighAssembler} to assemble the given {@code wildcardedInstruction} * - * @param wildcardedInstruction - * String of the instruction to assemble, possibly including a wildcard + * @param wildcardedInstruction String of the instruction to assemble, possibly including a + * wildcard * @return All AssemblyParseResult produced from the given input */ private List getAllAssemblyResolutions( @@ -74,10 +74,10 @@ public class WildSleighAssemblerInfo extends GhidraScript { // correct architecture. if (sampleInstructions.contains(wildcardedInstruction) && !language.getLanguageID().toString().equals("x86:LE:64:default")) { - popup( - "The current program is not a \"x86:LE:64:default\" binary that the example was " + - "designed for. This script will continue and try anyway, but the results might " + - "not be as expected. Retry with a custom instruction in your architecture!"); + popup(""" + The current program is not a \"x86:LE:64:default\" binary that the example was \ + designed for. This script will continue and try anyway, but the results might \ + not be as expected. Retry with a custom instruction in your architecture!"""); } // Create a WildSleighAssembler that we'll use to assemble our wildcard-included instruction @@ -123,9 +123,8 @@ public class WildSleighAssemblerInfo extends GhidraScript { } if (errorCount > 0) { - println( - "Additionally " + errorCount + - " non-WildAssemblyResolvedPatterns were not printed"); + println("Additionally, " + errorCount + + " non-WildAssemblyResolvedPatterns were not printed"); } } @@ -134,8 +133,7 @@ public class WildSleighAssemblerInfo extends GhidraScript { * Print information about a single {@link WildAssemblyResolvedPatterns}, including information * about each of its wildcards. * - * @param x - * The value to print information about. + * @param x The value to print information about. */ private void printWildAssemblyResolvedPatterns(WildAssemblyResolvedPatterns x) { println("Instruction bits (including wildcard values): " + x.getInstruction()); diff --git a/Ghidra/Features/WildcardAssembler/src/main/help/help/topics/WildcardAssemblerModule/Wildcard_Assembler.html b/Ghidra/Features/WildcardAssembler/src/main/help/help/topics/WildcardAssemblerModule/Wildcard_Assembler.html index d908f7a8c7..7cfc4fba8a 100644 --- a/Ghidra/Features/WildcardAssembler/src/main/help/help/topics/WildcardAssemblerModule/Wildcard_Assembler.html +++ b/Ghidra/Features/WildcardAssembler/src/main/help/help/topics/WildcardAssemblerModule/Wildcard_Assembler.html @@ -2,6 +2,8 @@ + @@ -13,84 +15,93 @@

Wildcard Assembler Module

+

This feature is currently only available as an API for Ghidra scripts and plugins. For + an example of how to use the API, see the FindInstructionWithWildcard and + WildSleighAssemblerInfo scripts in the Script Manager.

-

This feature is currently only available as an API for Ghidra - scripts and plugins. For an example of how to use the API, see the - FindInstructionWithWildcard and WildSleighAssemblerInfo scripts in the - Script Manager.

- -

The Wildcard Assembler extends Ghidra's assembler to enable - assembling instructions with specific tokens replaced with wildcards.

+

The Wildcard Assembler extends Ghidra's assembler to enable assembling instructions + with specific tokens replaced with wildcards.

-

This assembler will return metadata for each wildcard in an assembled - instruction. This metadata includes details of which specific bits of an - assembled instruction are used to derive the value of the wildcarded token - and the expression used to derive the value.

+

This assembler will return metadata for each wildcard in an assembled instruction. This + metadata includes details of which specific bits of an assembled instruction are used to + derive the value of the wildcarded token and the expression used to derive the value.

Wildcard Syntax

-

Wildcards in instructions are specified by replacing the - to-be-wildcarded token with a wildcard name surrounded by backticks (e.g. - `Q1` where Q1 is an arbitrary wildcard name) and passing the - entire instruction to the Wildcard Assembler.

+

Wildcards in instructions are specified by replacing the to-be-wildcarded token with a + wildcard name surrounded by backticks (e.g. `Q1` where Q1 is an arbitrary + wildcard name) and passing the entire instruction to the Wildcard Assembler.

-

By default, the Wildcard Assembler will return metadata about all - possible values that a wildcarded token could take and all the encodings - of all these values. This behavior can be limited by filtering the - wildcard by appending specific syntax after the wildcard name:

+

By default, the Wildcard Assembler will return metadata about all possible values that a + wildcarded token could take and all the encodings of all these values. This behavior can be + limited by filtering the wildcard by appending specific syntax after the wildcard name:

    -
  • Numeric Filter: +
  • + Numeric Filter: +
      -
    • Appending [..] will constrain the wildcarded token - to only numeric values (and not registers or other strings).
    • -
    • Appending [0x0..0x100] (where 0x0 and 0x100 are - arbitrary hexadecimal values with the smaller number first) will - constrain the wildcarded token to only numeric values between the - two given values. This can be used to ensure that the returned - encodings can hold values of a desired size. Multiple non-contiguous - ranges can be specified by separating them with commas (e.g. - [0x0..0x5,0x1000-0x4000])
    • +
    • Appending [..] e.g., MOV RAX, `Q1[..]`, will constrain + the wildcarded token to only numeric values (and not registers or other strings).
    • + +
    • Appending [0x0..0x100] (where 0x0 and 0x100 are arbitrary hexadecimal + values with the smaller number first) will constrain the wildcarded token to only + numeric values between the two given values. This can be used to ensure that the + returned encodings can hold values of a desired size. Multiple non-contiguous ranges + can be specified by separating them with commas (e.g. + [0x0..0x5,0x1000..0x4000])
    -
  • -
  • Regex Filter: -
      -
    • Appending /ABCD where ABCD is an arbitrary - regular expression will constrain the wildcarded token to only be - string tokens matching the given regular expression. This is most - likely used for filtering register names; for example appending - /(sp)|(lr) to a wildcard in a register position in - ARM assembly will limit the wildcard results to only encodings - using the sp or lr registers in that - position.
    • -
    -
  • + + +
  • + Regex Filter: + +
      +
    • Appending /ABCD where ABCD is an arbitrary regular expression will + constrain the wildcarded token to only be string tokens matching the given regular + expression. This is most likely used for filtering register names; for example + appending /(sp)|(lr) to a wildcard in a register position in ARM assembly + will limit the wildcard results to only encodings using the sp or + lr registers in that position.
    • +
    +
-

Normally a wildcard will only match a single token. To allow a single - wildcard to match multiple related tokens: precede the wildcard name with a - ! character. For example, in a x86:LE:32:default binary:

+

Normally a wildcard will only match a single token. For example, in a x86:LE:32:default + binary:

+
No wildcard:
+
MOVSD.REP ES:EDI,ESI
Single token:
+
MOVSD.REP `Q1`:EDI,ESI
Single token:
+
MOVSD.REP ES:`Q2`,ESI
+
+
+ +

To allow a single wildcard to match multiple related tokens: precede the wildcard name + with a ! character:

+ +
+
+
Multi-token:
+ +
MOVSD.REP `!Q4`,ESI
Single token (Does NOT assemble):
-
MOVSD.REP `Q3`,ESI
-
Multi-token:
-
MOVSD.REP `!Q4`,ESI
+
MOVSD.REP `Q3`,ESI

Provided by: Wildcard Assembler Module

-
diff --git a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildOperandInfo.java b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildOperandInfo.java index bacec42be0..10da5f8ce9 100644 --- a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildOperandInfo.java +++ b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildOperandInfo.java @@ -21,9 +21,25 @@ import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyPatternBlock; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; +/** + * Information about an operand that was matched to a wildcard + * + * @param wildcard the name of the wildcard that matched the operand + * @param path the hierarchy of Sleigh constructors leading to the operand + * @param location the bit pattern giving the location of the operand's field(s) in the machine + * instruction + * @param expression the expression describing how to encode the operand in the field(s) + * @param choice if applicable, the value encoded in the result containing this information + */ public record WildOperandInfo(String wildcard, List path, AssemblyPatternBlock location, PatternExpression expression, Object choice) { + /** + * Copy this wildcard info, but with an increased shift amount + * + * @param amt the number of bits to shift (right) + * @return the copy + */ public WildOperandInfo shift(int amt) { return new WildOperandInfo(wildcard, path, location.shift(amt), expression, choice); } diff --git a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssembler.java b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssembler.java index 6117cdf71b..1569d18b4f 100644 --- a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssembler.java +++ b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssembler.java @@ -26,6 +26,12 @@ import ghidra.asm.wild.sem.WildAssemblyTreeResolver; import ghidra.program.model.address.Address; import ghidra.program.model.listing.Program; +/** + * An assembler implementation that allows for wildcard operands + * + *

+ * Construct these using {@link WildSleighAssemblerBuilder}. + */ public class WildSleighAssembler extends AbstractSleighAssembler { protected WildSleighAssembler( diff --git a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssemblerBuilder.java b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssemblerBuilder.java index ed99170351..bf862a0278 100644 --- a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssemblerBuilder.java +++ b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/WildSleighAssemblerBuilder.java @@ -19,6 +19,7 @@ import java.util.*; import ghidra.app.plugin.assembler.AssemblySelector; import ghidra.app.plugin.assembler.sleigh.AbstractSleighAssemblerBuilder; +import ghidra.app.plugin.assembler.sleigh.SleighAssemblerBuilder; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential; import ghidra.app.plugin.assembler.sleigh.sem.AbstractAssemblyResolutionFactory; @@ -33,11 +34,32 @@ import ghidra.asm.wild.sem.WildAssemblyResolvedPatterns; import ghidra.asm.wild.symbol.*; import ghidra.program.model.listing.Program; +/** + * The builder for wildcard-enabled assemblers. + * + *

+ * Ideally, only one of these is created and cached per language, to save on the cost of building + * the assembler. However, if heap space needs to be freed up, then the builder must be disposed. + * + *

+ * This is based on the same abstract class as {@link SleighAssemblerBuilder}. See its documentation + * for more information. + */ public class WildSleighAssemblerBuilder extends AbstractSleighAssemblerBuilder { protected final Map wildNTs = new HashMap<>(); + /** + * Construct a builder for the given language + * + *

+ * Once a builder is prepared for the given language, it can be used to build an assembler for + * any number of programs using that same language. Clients should take advantage of this to + * avoid re-incurring the steep cost of constructing an assembler for the same language. + * + * @param lang the language + */ public WildSleighAssemblerBuilder(SleighLanguage lang) { super(lang); } diff --git a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyFixedNumericStateGenerator.java b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyFixedNumericStateGenerator.java index 5433aa359a..bf779131db 100644 --- a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyFixedNumericStateGenerator.java +++ b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyFixedNumericStateGenerator.java @@ -20,7 +20,6 @@ import java.util.stream.Stream; import ghidra.app.plugin.assembler.sleigh.sem.*; import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; import ghidra.asm.wild.tree.WildAssemblyParseToken; -import ghidra.asm.wild.tree.WildAssemblyParseToken.RegexWildcard; public class WildAssemblyFixedNumericStateGenerator extends AbstractAssemblyStateGenerator { diff --git a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyResolvedPatterns.java b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyResolvedPatterns.java index 3ab0fd5dc0..fd2e0ec96b 100644 --- a/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyResolvedPatterns.java +++ b/Ghidra/Features/WildcardAssembler/src/main/java/ghidra/asm/wild/sem/WildAssemblyResolvedPatterns.java @@ -22,10 +22,28 @@ import ghidra.app.plugin.assembler.sleigh.sem.*; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; import ghidra.asm.wild.WildOperandInfo; +/** + * The result of assembling an instruction with the wildcard assembler + */ public interface WildAssemblyResolvedPatterns extends AssemblyResolvedPatterns { + /** + * The information for wildcarded operands in this instruction + * + * @return the set of information + */ Set getOperandInfo(); + /** + * Create a copy of this result with added wilcard information + * + * @param wildcard see {@link WildOperandInfo} + * @param path see {@link WildOperandInfo} + * @param location see {@link WildOperandInfo} + * @param expression see {@link WildOperandInfo} + * @param choice see {@link WildOperandInfo} + * @return the copy + */ WildAssemblyResolvedPatterns withWildInfo(String wildcard, List path, AssemblyPatternBlock location, PatternExpression expression, Object choice); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java index c1b71284f2..0456fc591f 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java @@ -157,6 +157,7 @@ public class AssemblyPatternBlock implements Comparable { * Convert a block from a disjoint pattern into an assembly pattern block * * @param pat the pattern to convert + * @param minLen the minimum byte length of the block * @param context true to select the context block, false to select the instruction block * @return the converted pattern block */