GP-587: Assembling bytes into a side buffer, then patching all at once.

This commit is contained in:
Dan 2021-02-09 08:03:40 -05:00
parent ed3cc10344
commit 98331405b8
2 changed files with 49 additions and 28 deletions

View file

@ -21,8 +21,8 @@ import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
import ghidra.app.plugin.assembler.sleigh.sem.*; import ghidra.app.plugin.assembler.sleigh.sem.*;
import ghidra.program.model.address.Address; import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOverflowException; import ghidra.program.model.address.AddressOverflowException;
import ghidra.program.model.lang.InstructionBlock;
import ghidra.program.model.listing.Instruction; import ghidra.program.model.listing.Instruction;
import ghidra.program.model.listing.InstructionIterator;
import ghidra.program.model.mem.MemoryAccessException; import ghidra.program.model.mem.MemoryAccessException;
/** /**
@ -36,8 +36,8 @@ public interface Assembler {
* Assemble a sequence of instructions and place them at the given address. * Assemble a sequence of instructions and place them at the given address.
* *
* This method is only valid if the assembler is bound to a program. An instance may optionally * This method is only valid if the assembler is bound to a program. An instance may optionally
* implement this method without a program binding. In that case, the returned instruction * implement this method without a program binding. In that case, the returned instruction block
* block will refer to pseudo instructions. * will refer to pseudo instructions.
* *
* NOTE: There must be an active transaction on the bound program for this method to succeed. * NOTE: There must be an active transaction on the bound program for this method to succeed.
* *
@ -49,7 +49,8 @@ public interface Assembler {
* @throws MemoryAccessException there is an issue writing the result to program memory * @throws MemoryAccessException there is an issue writing the result to program memory
* @throws AddressOverflowException the resulting block is beyond the valid address range * @throws AddressOverflowException the resulting block is beyond the valid address range
*/ */
public InstructionBlock assemble(Address at, String... listing) throws AssemblySyntaxException, public InstructionIterator assemble(Address at, String... listing)
throws AssemblySyntaxException,
AssemblySemanticException, MemoryAccessException, AddressOverflowException; AssemblySemanticException, MemoryAccessException, AddressOverflowException;
/** /**
@ -96,6 +97,7 @@ public interface Assembler {
* Because all parse paths are attempted, it's possible to get many mixed results. For example, * Because all parse paths are attempted, it's possible to get many mixed results. For example,
* The input line may be a valid instruction; however, there may be suggestions to continue the * The input line may be a valid instruction; however, there may be suggestions to continue the
* line toward another valid instruction. * line toward another valid instruction.
*
* @param line the line (or partial line) to parse * @param line the line (or partial line) to parse
* @return the results of parsing * @return the results of parsing
*/ */
@ -108,8 +110,9 @@ public interface Assembler {
* semantic error. Because all resolutions are attempted, it's possible to get many mixed * semantic error. Because all resolutions are attempted, it's possible to get many mixed
* results. * results.
* *
* NOTE: The resolved instructions are given as masks and values. Where the mask does not * NOTE: The resolved instructions are given as masks and values. Where the mask does not cover,
* cover, you can choose any value. * you can choose any value.
*
* @param parse a parse result giving a valid tree * @param parse a parse result giving a valid tree
* @param at the location of the start of the instruction * @param at the location of the start of the instruction
* @param ctx the context register value at the start of the instruction * @param ctx the context register value at the start of the instruction
@ -125,8 +128,9 @@ public interface Assembler {
* semantic error. Because all resolutions are attempted, it's possible to get many mixed * semantic error. Because all resolutions are attempted, it's possible to get many mixed
* results. * results.
* *
* NOTE: The resolved instructions are given as masks and values. Where the mask does not * NOTE: The resolved instructions are given as masks and values. Where the mask does not cover,
* cover, you can choose any value. * you can choose any value.
*
* @param parse a parse result giving a valid tree * @param parse a parse result giving a valid tree
* @param at the location of the start of the instruction * @param at the location of the start of the instruction
* @return the results of semantic resolution * @return the results of semantic resolution
@ -138,6 +142,7 @@ public interface Assembler {
* *
* This method works like {@link #resolveLine(Address, String, AssemblyPatternBlock)}, except * This method works like {@link #resolveLine(Address, String, AssemblyPatternBlock)}, except
* that it derives the context using {@link #getContextAt(Address)}. * that it derives the context using {@link #getContextAt(Address)}.
*
* @param at the location of the start of the instruction * @param at the location of the start of the instruction
* @param line the textual assembly code * @param line the textual assembly code
* @return the collection of semantic resolution results * @return the collection of semantic resolution results
@ -152,6 +157,7 @@ public interface Assembler {
* This method works like {@link #assembleLine(Address, String, AssemblyPatternBlock)}, except * This method works like {@link #assembleLine(Address, String, AssemblyPatternBlock)}, except
* that it returns all possible resolutions for the parse trees that pass the * that it returns all possible resolutions for the parse trees that pass the
* {@link AssemblySelector}. * {@link AssemblySelector}.
*
* @param at the location of the start of the instruction * @param at the location of the start of the instruction
* @param line the textual assembly code * @param line the textual assembly code
* @param ctx the context register value at the start of the instruction * @param ctx the context register value at the start of the instruction
@ -164,8 +170,9 @@ public interface Assembler {
/** /**
* Place a resolved (and fully-masked) instruction into the bound program. * Place a resolved (and fully-masked) instruction into the bound program.
* *
* This method is not valid without a program binding. Also, this method must be called during * This method is not valid without a program binding. Also, this method must be called during a
* a program database transaction. * program database transaction.
*
* @param res the resolved and fully-masked instruction * @param res the resolved and fully-masked instruction
* @param at the location of the start of the instruction * @param at the location of the start of the instruction
* @return the new {@link Instruction} code unit * @return the new {@link Instruction} code unit
@ -177,20 +184,23 @@ public interface Assembler {
/** /**
* Place an instruction into the bound program. * Place an instruction into the bound program.
* *
* This method is not valid without a program binding. Also, this method must be called during * This method is not valid without a program binding. Also, this method must be called during a
* a program database transaction. * program database transaction.
*
* @param insbytes the instruction data * @param insbytes the instruction data
* @param at the location of the start of the instruction * @param at the location of the start of the instruction
* @return the new {@link Instruction} code unit * @return the new {@link Instruction} code unit
* @throws MemoryAccessException there is an issue writing the result to program memory * @throws MemoryAccessException there is an issue writing the result to program memory
*/ */
public Instruction patchProgram(byte[] insbytes, Address at) throws MemoryAccessException; public InstructionIterator patchProgram(byte[] insbytes, Address at)
throws MemoryAccessException;
/** /**
* Get the context at a given address * Get the context at a given address
* *
* If there is a program binding, this will extract the actual context at the given address. * If there is a program binding, this will extract the actual context at the given address.
* Otherwise, it will obtain the default context at the given address for the language. * Otherwise, it will obtain the default context at the given address for the language.
*
* @param addr the address * @param addr the address
* @return the context * @return the context
*/ */

View file

@ -15,6 +15,8 @@
*/ */
package ghidra.app.plugin.assembler.sleigh; package ghidra.app.plugin.assembler.sleigh;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.*; import java.util.*;
import ghidra.app.plugin.assembler.*; import ghidra.app.plugin.assembler.*;
@ -25,7 +27,8 @@ import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.program.disassemble.Disassembler; import ghidra.program.disassemble.Disassembler;
import ghidra.program.disassemble.DisassemblerMessageListener; import ghidra.program.disassemble.DisassemblerMessageListener;
import ghidra.program.model.address.*; import ghidra.program.model.address.*;
import ghidra.program.model.lang.*; import ghidra.program.model.lang.Register;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.*; import ghidra.program.model.listing.*;
import ghidra.program.model.mem.Memory; import ghidra.program.model.mem.Memory;
import ghidra.program.model.mem.MemoryAccessException; import ghidra.program.model.mem.MemoryAccessException;
@ -98,22 +101,26 @@ public class SleighAssembler implements Assembler {
if (!res.getInstruction().isFullMask()) { if (!res.getInstruction().isFullMask()) {
throw new AssemblySelectionError("Selected instruction must have a full mask."); throw new AssemblySelectionError("Selected instruction must have a full mask.");
} }
return patchProgram(res.getInstruction().getVals(), at); return patchProgram(res.getInstruction().getVals(), at).next();
} }
@Override @Override
public Instruction patchProgram(byte[] insbytes, Address at) throws MemoryAccessException { public InstructionIterator patchProgram(byte[] insbytes, Address at)
listing.clearCodeUnits(at, at.add(insbytes.length - 1), false); throws MemoryAccessException {
Address end = at.add(insbytes.length - 1);
listing.clearCodeUnits(at, end, false);
memory.setBytes(at, insbytes); memory.setBytes(at, insbytes);
dis.disassemble(at, new AddressSet(at)); dis.disassemble(at, new AddressSet(at));
return listing.getInstructionAt(at); List<Instruction> result = new ArrayList<>();
return listing.getInstructions(new AddressSet(at, end), true);
} }
@Override @Override
public InstructionBlock assemble(Address at, String... assembly) throws AssemblySyntaxException, public InstructionIterator assemble(Address at, String... assembly)
AssemblySemanticException, MemoryAccessException, AddressOverflowException { throws AssemblySyntaxException, AssemblySemanticException, MemoryAccessException,
InstructionBlock block = new InstructionBlock(at); AddressOverflowException {
Address start = at;
ByteArrayOutputStream buf = new ByteArrayOutputStream();
for (String part : assembly) { for (String part : assembly) {
for (String line : part.split("\n")) { for (String line : part.split("\n")) {
RegisterValue rv = program.getProgramContext().getDisassemblyContext(at); RegisterValue rv = program.getProgramContext().getDisassemblyContext(at);
@ -124,13 +131,16 @@ public class SleighAssembler implements Assembler {
if (insbytes == null) { if (insbytes == null) {
return null; return null;
} }
try {
Instruction ins = patchProgram(insbytes, at); buf.write(insbytes);
block.addInstruction(ins); }
catch (IOException e) {
throw new AssertionError(e);
}
at = at.addNoWrap(insbytes.length); at = at.addNoWrap(insbytes.length);
} }
} }
return block; return patchProgram(buf.toByteArray(), start);
} }
@Override @Override
@ -221,10 +231,11 @@ public class SleighAssembler implements Assembler {
/** /**
* A convenience to obtain a map of program labels strings to long values * A convenience to obtain a map of program labels strings to long values
*
* @return the map * @return the map
* *
* {@literal TODO Use a Map<String, Address> instead so that, if possible, symbol values can be checked} * {@literal TODO Use a Map<String, Address> instead so that, if possible, symbol values can be checked}
* lest they be an invalid substitution for a given operand. * lest they be an invalid substitution for a given operand.
*/ */
protected Map<String, Long> getProgramLabels() { protected Map<String, Long> getProgramLabels() {
Map<String, Long> labels = new HashMap<>(); Map<String, Long> labels = new HashMap<>();