Merge remote-tracking branch 'origin/GP-587_FixMultilineAssemblyWithDelaySlots'

This commit is contained in:
ghidra1 2021-04-02 18:14:56 -04:00
commit f79f7e84da
5 changed files with 133 additions and 47 deletions

View file

@ -21,13 +21,14 @@ import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOverflowException;
import ghidra.program.model.lang.InstructionBlock;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.listing.InstructionIterator;
import ghidra.program.model.mem.MemoryAccessException;
/**
* The primary interface for performing assembly in Ghidra.
*
* <p>
* Use the {@link Assemblers} class to obtain a suitable implementation for a given program or
* language.
*/
@ -35,26 +36,30 @@ public interface Assembler {
/**
* Assemble a sequence of instructions and place them at the given address.
*
* <p>
* This method is only valid if the assembler is bound to a program. An instance may optionally
* implement this method without a program binding. In that case, the returned instruction
* block will refer to pseudo instructions.
* implement this method without a program binding. In that case, the returned iterator will
* refer to pseudo instructions.
*
* <p>
* NOTE: There must be an active transaction on the bound program for this method to succeed.
*
* @param at the location where the resulting instructions should be placed
* @param listing a new-line separated or array sequence of instructions
* @return the block of resulting instructions
* @return an iterator over the resulting instructions
* @throws AssemblySyntaxException a textual instruction is non well-formed
* @throws AssemblySemanticException a well-formed instruction cannot be assembled
* @throws MemoryAccessException there is an issue writing the result to program memory
* @throws AddressOverflowException the resulting block is beyond the valid address range
*/
public InstructionBlock assemble(Address at, String... listing) throws AssemblySyntaxException,
public InstructionIterator assemble(Address at, String... listing)
throws AssemblySyntaxException,
AssemblySemanticException, MemoryAccessException, AddressOverflowException;
/**
* Assemble a line instruction at the given address.
*
* <p>
* This method is valid with or without a bound program. Even if bound, the program is not
* modified; however, the appropriate context information is taken from the bound program.
* Without a program, the language's default context is taken at the given location.
@ -71,6 +76,7 @@ public interface Assembler {
/**
* Assemble a line instruction at the given address, assuming the given context.
*
* <p>
* This method works like {@link #assembleLine(Address, String)} except that it allows you to
* override the assumed context at that location.
*
@ -87,15 +93,18 @@ public interface Assembler {
/**
* Parse a line instruction.
*
* <p>
* Generally, you should just use {@link #assembleLine(Address, String)}, but if you'd like
* access to the parse trees outside of an {@link AssemblySelector}, then this may be an
* acceptable option. Most notably, this is an excellent way to obtain suggestions for
* auto-completion.
*
* <p>
* Each item in the returned collection is either a complete parse tree, or a syntax error
* Because all parse paths are attempted, it's possible to get many mixed results. For example,
* The input line may be a valid instruction; however, there may be suggestions to continue the
* line toward another valid instruction.
*
* @param line the line (or partial line) to parse
* @return the results of parsing
*/
@ -104,12 +113,15 @@ public interface Assembler {
/**
* Resolve a given parse tree at the given address, assuming the given context
*
* <p>
* Each item in the returned collection is either a completely resolved instruction, or a
* semantic error. Because all resolutions are attempted, it's possible to get many mixed
* results.
*
* NOTE: The resolved instructions are given as masks and values. Where the mask does not
* cover, you can choose any value.
* <p>
* NOTE: The resolved instructions are given as masks and values. Where the mask does not cover,
* you can choose any value.
*
* @param parse a parse result giving a valid tree
* @param at the location of the start of the instruction
* @param ctx the context register value at the start of the instruction
@ -121,12 +133,15 @@ public interface Assembler {
/**
* Resolve a given parse tree at the given address.
*
* <p>
* Each item in the returned collection is either a completely resolved instruction, or a
* semantic error. Because all resolutions are attempted, it's possible to get many mixed
* results.
*
* NOTE: The resolved instructions are given as masks and values. Where the mask does not
* cover, you can choose any value.
* <p>
* NOTE: The resolved instructions are given as masks and values. Where the mask does not cover,
* you can choose any value.
*
* @param parse a parse result giving a valid tree
* @param at the location of the start of the instruction
* @return the results of semantic resolution
@ -136,8 +151,10 @@ public interface Assembler {
/**
* Assemble a line instruction at the given address.
*
* <p>
* This method works like {@link #resolveLine(Address, String, AssemblyPatternBlock)}, except
* that it derives the context using {@link #getContextAt(Address)}.
*
* @param at the location of the start of the instruction
* @param line the textual assembly code
* @return the collection of semantic resolution results
@ -149,9 +166,11 @@ public interface Assembler {
/**
* Assemble a line instruction at the given address, assuming the given context.
*
* <p>
* This method works like {@link #assembleLine(Address, String, AssemblyPatternBlock)}, except
* that it returns all possible resolutions for the parse trees that pass the
* {@link AssemblySelector}.
*
* @param at the location of the start of the instruction
* @param line the textual assembly code
* @param ctx the context register value at the start of the instruction
@ -164,8 +183,10 @@ public interface Assembler {
/**
* Place a resolved (and fully-masked) instruction into the bound program.
*
* This method is not valid without a program binding. Also, this method must be called during
* a program database transaction.
* <p>
* This method is not valid without a program binding. Also, this method must be called during a
* program database transaction.
*
* @param res the resolved and fully-masked instruction
* @param at the location of the start of the instruction
* @return the new {@link Instruction} code unit
@ -175,22 +196,27 @@ public interface Assembler {
throws MemoryAccessException;
/**
* Place an instruction into the bound program.
* Place instruction bytes into the bound program.
*
* <p>
* This method is not valid without a program binding. Also, this method must be called during a
* program database transaction.
*
* This method is not valid without a program binding. Also, this method must be called during
* a program database transaction.
* @param insbytes the instruction data
* @param at the location of the start of the instruction
* @return the new {@link Instruction} code unit
* @return an iterator over the disassembled instructions
* @throws MemoryAccessException there is an issue writing the result to program memory
*/
public Instruction patchProgram(byte[] insbytes, Address at) throws MemoryAccessException;
public InstructionIterator patchProgram(byte[] insbytes, Address at)
throws MemoryAccessException;
/**
* Get the context at a given address
*
* <p>
* If there is a program binding, this will extract the actual context at the given address.
* Otherwise, it will obtain the default context at the given address for the language.
*
* @param addr the address
* @return the context
*/

View file

@ -28,16 +28,15 @@ import ghidra.program.model.listing.Program;
/**
* The primary class for obtaining an {@link Assembler} for a Ghidra-supported language.
*
* <p>
* The general flow is: First, obtain an assembler for a language or program. Second, call its
* {@link Assembler#assemble(Address, String...)} and related methods to perform assembly. More
* advanced uses pass a {@link AssemblySelector} to control certain aspects of assembly instruction
* selection, and to obtain advanced diagnostics, like detailed errors and code completion.
*
* <pre>
* {@code
* Assembler asm = Assemblers.getAssembler(currentProgram);
* asm.assemble(currentAddress, "ADD ...");
* }
* </pre>
*/
public final class Assemblers {
@ -45,6 +44,7 @@ public final class Assemblers {
/**
* Get a builder for the given language, possibly using a cached one.
*
* @param lang the language
* @return the builder for that language, if successful
*/
@ -64,10 +64,11 @@ public final class Assemblers {
/**
* Get an assembler for the given program.
*
* Provides an assembler suitable for the program's language, and bound to the program. Calls
* to its Assembler#assemble() function will cause modifications to the bound program. If this
* is the first time an assembler for the program's language has been requested, this function
* may take some time to build the assembler.
* <p>
* Provides an assembler suitable for the program's language, and bound to the program. Calls to
* its Assembler#assemble() function will cause modifications to the bound program. If this is
* the first time an assembler for the program's language has been requested, this function may
* take some time to build the assembler.
*
* @param selector a method to select a single result from many
* @param program the program for which an assembler is requested
@ -81,6 +82,7 @@ public final class Assemblers {
/**
* Get an assembler for the given language.
*
* <p>
* Provides a suitable assembler for the given language. Only calls to its
* Assembler#assembleLine() method are valid. If this is the first time a language has been
* requested, this function may take some time to build the assembler. Otherwise, it returns a

View file

@ -15,6 +15,8 @@
*/
package ghidra.app.plugin.assembler.sleigh;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.*;
import ghidra.app.plugin.assembler.*;
@ -25,7 +27,8 @@ import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.program.disassemble.Disassembler;
import ghidra.program.disassemble.DisassemblerMessageListener;
import ghidra.program.model.address.*;
import ghidra.program.model.lang.*;
import ghidra.program.model.lang.Register;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.*;
import ghidra.program.model.mem.Memory;
import ghidra.program.model.mem.MemoryAccessException;
@ -98,22 +101,25 @@ public class SleighAssembler implements Assembler {
if (!res.getInstruction().isFullMask()) {
throw new AssemblySelectionError("Selected instruction must have a full mask.");
}
return patchProgram(res.getInstruction().getVals(), at);
return patchProgram(res.getInstruction().getVals(), at).next();
}
@Override
public Instruction patchProgram(byte[] insbytes, Address at) throws MemoryAccessException {
listing.clearCodeUnits(at, at.add(insbytes.length - 1), false);
public InstructionIterator patchProgram(byte[] insbytes, Address at)
throws MemoryAccessException {
Address end = at.add(insbytes.length - 1);
listing.clearCodeUnits(at, end, false);
memory.setBytes(at, insbytes);
dis.disassemble(at, new AddressSet(at));
return listing.getInstructionAt(at);
return listing.getInstructions(new AddressSet(at, end), true);
}
@Override
public InstructionBlock assemble(Address at, String... assembly) throws AssemblySyntaxException,
AssemblySemanticException, MemoryAccessException, AddressOverflowException {
InstructionBlock block = new InstructionBlock(at);
public InstructionIterator assemble(Address at, String... assembly)
throws AssemblySyntaxException, AssemblySemanticException, MemoryAccessException,
AddressOverflowException {
Address start = at;
ByteArrayOutputStream buf = new ByteArrayOutputStream();
for (String part : assembly) {
for (String line : part.split("\n")) {
RegisterValue rv = program.getProgramContext().getDisassemblyContext(at);
@ -124,13 +130,16 @@ public class SleighAssembler implements Assembler {
if (insbytes == null) {
return null;
}
Instruction ins = patchProgram(insbytes, at);
block.addInstruction(ins);
try {
buf.write(insbytes);
}
catch (IOException e) {
throw new AssertionError(e);
}
at = at.addNoWrap(insbytes.length);
}
}
return block;
return patchProgram(buf.toByteArray(), start);
}
@Override
@ -221,10 +230,11 @@ public class SleighAssembler implements Assembler {
/**
* A convenience to obtain a map of program labels strings to long values
*
* @return the map
*
* {@literal TODO Use a Map<String, Address> instead so that, if possible, symbol values can be checked}
* lest they be an invalid substitution for a given operand.
* {@literal TODO Use a Map<String, Address> instead so that, if possible, symbol values can be checked}
* lest they be an invalid substitution for a given operand.
*/
protected Map<String, Long> getProgramLabels() {
Map<String, Long> labels = new HashMap<>();

View file

@ -24,12 +24,10 @@ import ghidra.program.model.listing.Program;
* This is meant to be used as an idiom in a try-with-resources block:
*
* <pre>
* {@code
* try (ProgramTransaction t = ProgramTransaction.open(program, "Demo")) {
* program.getMemory().....
* t.commit();
* }
* }
* </pre>
*
* <p>

View file

@ -15,36 +15,86 @@
*/
package ghidra.app.plugin.assembler.sleigh;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import java.util.ArrayList;
import java.util.List;
import org.junit.*;
import generic.test.AbstractGenericTest;
import ghidra.app.plugin.assembler.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguageProvider;
import ghidra.program.database.ProgramDB;
import ghidra.program.database.util.ProgramTransaction;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOverflowException;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.LanguageID;
import ghidra.program.model.listing.*;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
public class PublicAPITest extends AbstractGenericTest {
private Language x86;
Language x86;
Language toy;
Program program;
@Before
public void setUp() throws Exception {
SleighLanguageProvider provider = new SleighLanguageProvider();
x86 = provider.getLanguage(new LanguageID("x86:LE:64:default"));
toy = provider.getLanguage(new LanguageID("Toy:BE:64:default"));
}
@After
public void tearDown() throws Exception {
if (program != null) {
program.release(this);
}
}
@Test
public void testADD0() throws AssemblySyntaxException, AssemblySemanticException {
// Mostly just test that it doesn't crash
Assembler asm = Assemblers.getAssembler(x86);
byte[] b =
asm.assembleLine(x86.getDefaultSpace().getAddress(0x40000000), "ADD byte ptr [RBX],BL");
printArray(b);
assertNotEquals(0, b.length);
}
public static void printArray(byte[] arr) {
for (int i = 0; i < arr.length; i++) {
System.out.printf("%02x", arr[i]);
protected Address addr(long offset) {
return program.getAddressFactory().getDefaultAddressSpace().getAddress(offset);
}
@Test
public void testAssembleWithDelaySlot() throws Exception,
AddressOverflowException, CancelledException {
program = new ProgramDB("test", toy, toy.getDefaultCompilerSpec(), this);
InstructionIterator it;
try (ProgramTransaction tid = ProgramTransaction.open(program, "Test")) {
program.getMemory()
.createInitializedBlock(".text", addr(0x00400000), 0x1000, (byte) 0,
TaskMonitor.DUMMY, false);
Assembler asm = Assemblers.getAssembler(program);
it = asm.assemble(addr(0x00400000),
"brds 0x00400004",
"add r0, #6");
tid.commit();
}
System.out.println();
List<Instruction> result = new ArrayList<>();
while (it.hasNext()) {
result.add(it.next());
}
assertEquals(2, result.size());
assertEquals("brds", result.get(0).getMnemonicString());
assertEquals("_add", result.get(1).getMnemonicString());
}
}