Merge remote-tracking branch 'origin/GP-1208_Dan_emuSyscalls-4--SQUASHED'

This commit is contained in:
Ryan Kurtz 2022-05-20 13:53:48 -04:00
commit d428ecd97a
144 changed files with 12712 additions and 804 deletions

View file

@ -0,0 +1,211 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example emulation script that integrates well with the Debgger UI.
//It provides the set-up code and then demonstrates some use cases.
//It should work with any x64 program, but some snippets may require specific conditions.
//It should be easily ported to other platforms just by adjusting register names.
//@author
//@category Emulation
//@keybinding
//@menupath
//@toolbar
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.assembler.Assembler;
import ghidra.app.plugin.assembler.Assemblers;
import ghidra.app.plugin.core.debug.service.emulation.DebuggerTracePcodeEmulator;
import ghidra.app.plugin.core.debug.service.emulation.ProgramEmulationUtils;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.app.services.DebuggerTraceManagerService;
import ghidra.app.services.ProgramManager;
import ghidra.framework.plugintool.PluginTool;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.trace.TraceSleighUtils;
import ghidra.pcode.utils.Utils;
import ghidra.program.database.ProgramDB;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.LanguageID;
import ghidra.program.model.listing.InstructionIterator;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.Memory;
import ghidra.trace.model.Trace;
import ghidra.trace.model.thread.TraceThread;
import ghidra.trace.model.time.TraceSnapshot;
import ghidra.trace.model.time.TraceTimeManager;
import ghidra.util.database.UndoableTransaction;
public class DebuggerEmuExampleScript extends GhidraScript {
private final static Charset UTF8 = Charset.forName("utf8");
@Override
protected void run() throws Exception {
/*
* First, get all the services and stuff:
*/
PluginTool tool = state.getTool();
ProgramManager programManager = tool.getService(ProgramManager.class);
DebuggerTraceManagerService traceManager =
tool.getService(DebuggerTraceManagerService.class);
SleighLanguage language = (SleighLanguage) getLanguage(new LanguageID("x86:LE:64:default"));
/*
* I'll generate a new program, because I don't want to require the user to pick something
* specific.
*/
Address entry;
Address injectHere;
Program program = null;
try {
program =
new ProgramDB("emu_example", language, language.getDefaultCompilerSpec(), this);
// Save the program into the project so it has a URL for the trace's static mapping
tool.getProject()
.getProjectData()
.getRootFolder()
.createFile("emu_example", program, monitor);
try (UndoableTransaction tid = UndoableTransaction.start(program, "Init", true)) {
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
entry = space.getAddress(0x00400000);
Address dataEntry = space.getAddress(0x00600000);
Memory memory = program.getMemory();
memory.createInitializedBlock(".text", entry, 0x1000, (byte) 0, monitor, false);
Assembler asm = Assemblers.getAssembler(program);
InstructionIterator ii = asm.assemble(entry,
"MOV RCX, 0x" + dataEntry,
"MOV RAX, 1",
"SYSCALL",
"MOV RAX, 2",
"SYSCALL");
ii.next(); // drop MOV RCX
injectHere = ii.next().getAddress();
memory.createInitializedBlock(".data", dataEntry, 0x1000, (byte) 0, monitor, false);
memory.setBytes(dataEntry, "Hello, World!\n".getBytes(UTF8));
}
program.save("Init", monitor);
// Display the program in the UI
programManager.openProgram(program);
}
finally {
if (program != null) {
program.release(this);
}
}
/*
* Now, load the program into a trace. This doesn't copy any bytes, it just sets up a static
* mapping. The emulator will know how to read through to the mapped program. We use a
* utility, which is the same used by the "Emulate Program" action in the UI. It will load
* the program, allocate a stack, and initialize the first thread to the given entry.
*/
Trace trace = null;
try {
trace = ProgramEmulationUtils.launchEmulationTrace(program, entry, this);
// Display the trace in the UI
traceManager.openTrace(trace);
traceManager.activateTrace(trace);
}
finally {
if (trace != null) {
trace.release(this);
}
}
// Get the initial thread
TraceThread traceThread = trace.getThreadManager().getAllThreads().iterator().next();
traceManager.activateThread(traceThread);
/*
* Instead of using the UI's emulator, this script will create its own with a custom
* library. This emulator will still know how to integrate with the UI, reading through to
* open programs and writing state back into the trace.
*/
DebuggerTracePcodeEmulator emulator = new DebuggerTracePcodeEmulator(tool, trace, 0, null) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return new DemoPcodeUseropLibrary(language, DebuggerEmuExampleScript.this);
}
};
// Conventionally, emulator threads are named after their trace thread's path.
PcodeThread<byte[]> thread = emulator.getThread(traceThread.getPath(), true);
/*
* Inject a call to our custom print userop. Otherwise, the language itself will never
* invoke it.
*/
emulator.inject(injectHere, List.of(
"print_utf8(RCX);",
"emu_exec_decoded();"));
/*
* Run the experiment: This should interrupt on the second SYSCALL, because any value other
* than 1 calls emu_swi.
*
* For demonstration, we'll record a trace snapshot for every step of emulation. This is not
* ordinarily recommended except for very small experiments. A more reasonable approach in
* practice may be to snapshot on specific breakpoints.
*/
TraceTimeManager time = trace.getTimeManager();
TraceSnapshot snapshot = time.getSnapshot(0, true);
try (UndoableTransaction tid = UndoableTransaction.start(trace, "Emulate", true)) {
for (int i = 0; i < 10; i++) {
println("Executing: " + thread.getCounter());
thread.stepInstruction();
snapshot =
time.createSnapshot("Stepped to " + thread.getCounter());
emulator.writeDown(trace, snapshot.getKey(), 0, false);
}
printerr("We should not have completed 10 steps!");
}
catch (InterruptPcodeExecutionException e) {
println("Terminated via interrupt. Good.");
}
// Display the final snapshot in the UI
traceManager.activateSnap(snapshot.getKey());
/*
* Inspect the machine. You can always do this by accessing the state directly, but for
* anything other than simple variables, you may find compiling an expression more
* convenient.
*
* This works the same as in the stand-alone case.
*/
println("RCX = " +
Utils.bytesToLong(thread.getState().getVar(language.getRegister("RCX")), 8,
language.isBigEndian()));
println("RCX = " + Utils.bytesToLong(
SleighProgramCompiler.compileExpression(language, "RCX").evaluate(thread.getExecutor()),
8, language.isBigEndian()));
println("RCX+4 = " +
Utils.bytesToLong(SleighProgramCompiler.compileExpression(language, "RCX+4")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
/*
* To evaluate a Sleigh expression against the trace: The result is the same as evaluating
* directly against the emulator, but these work with any trace, no matter the original data
* source (live target, emulated, imported, etc.) It's also built into utilities, making it
* easier to use.
*/
println("RCX+4 (trace) = " +
TraceSleighUtils.evaluate("RCX+4", trace, snapshot.getKey(), traceThread, 0));
}
}

View file

@ -0,0 +1,131 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.exec.*;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.CompilerSpec;
import ghidra.program.model.pcode.Varnode;
/**
* A userop library for the emulator
*
* <p>
* If you do not have need of a custom userop library, use {@link PcodeUseropLibrary#NIL}. These
* libraries allow you to implement userop, including those declared by the language. Without these,
* the emulator must interrupt whenever a userop ({@code CALLOTHER}) is encountered. You can also
* define new userops, which can be invoked from Sleigh code injected into the emulator.
*
* <p>
* These libraries can have both Java-callback and p-code implementations of userops. If only using
* p-code implementations, the library can be parameterized with type {@code <T>} and just pass that
* over to {@link AnnotatedPcodeUseropLibrary}. Because this will demo a Java callback that assumes
* concrete bytes, we will fix the library's type to {@code byte[]}.
*
* <p>
* Methods in this class (not including those in its nested classes) are implemented as Java
* callbacks.
*/
public class DemoPcodeUseropLibrary extends AnnotatedPcodeUseropLibrary<byte[]> {
private final static Charset UTF8 = Charset.forName("utf8");
private final SleighLanguage language;
private final GhidraScript script;
private final AddressSpace space;
public DemoPcodeUseropLibrary(SleighLanguage language, GhidraScript script) {
this.language = language;
this.script = script;
this.space = language.getDefaultSpace();
new DemoStructuredPart(language.getDefaultCompilerSpec()).generate(ops);
}
/**
* Treats the input as an offset to a C-style string and prints it to the console
*
* <p>
* Because we want to dereference start, we will need access to the emulator's state, so we
* employ the {@link OpState} annotation. {@code start} takes the one input we expect. Because
* its type is the value type rather than {@link Varnode}, we will get the input's value.
* Similarly, we can just return the resulting value, and the emulator will place that into the
* output variable for us.
*
* @param state the calling thread's state
* @param start the offset of the first character
* @return the length of the string in bytes
*/
@PcodeUserop
public byte[] print_utf8(@OpState PcodeExecutorStatePiece<byte[], byte[]> state,
byte[] start) {
long offset = Utils.bytesToLong(start, start.length, language.isBigEndian());
long end = offset;
while (state.getVar(space, end, 1, true)[0] != 0) {
end++;
}
if (end == offset) {
script.println("");
return Utils.longToBytes(0, Long.BYTES, language.isBigEndian());
}
byte[] bytes = state.getVar(space, offset, (int) (end - offset), true);
String str = new String(bytes, UTF8);
script.println(str);
return Utils.longToBytes(end - offset, Long.BYTES, language.isBigEndian());
}
/**
* Methods in this class are implemented using p-code compiled from Structured Sleigh
*/
public class DemoStructuredPart extends StructuredSleigh {
final Var RAX = lang("RAX", type("long"));
final Var RCX = lang("RAX", type("byte *"));
final UseropDecl emu_swi = userop(type("void"), "emu_swi", List.of());
protected DemoStructuredPart(CompilerSpec cs) {
super(cs);
}
/**
* Not really a syscall dispatcher
*
* <p>
* In cases where the userop expects parameters, you would annotate them with {@link Param}
* and use them just like other {@link Var}s. See the javadocs.
*
* <p>
* This is just a cheesy demo: If RAX is 1, then this method computes the number of bytes in
* the C-style string pointed to by RCX and stores the result in RAX. Otherwise, interrupt
* the emulator. See {@link DemoSyscallLibrary} for actual system call simulation.
*/
@StructuredUserop
public void syscall() {
_if(RAX.eq(1), () -> {
Var i = local("i", RCX);
_while(i.deref().neq(0), () -> {
i.inc();
});
RAX.set(i.subi(RAX));
})._else(() -> {
emu_swi.call();
});
}
}
}

View file

@ -0,0 +1,211 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.charset.Charset;
import java.util.Collection;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.linux.EmuLinuxAmd64SyscallUseropLibrary;
import ghidra.pcode.emu.linux.EmuLinuxX86SyscallUseropLibrary;
import ghidra.pcode.emu.sys.AnnotatedEmuSyscallUseropLibrary;
import ghidra.pcode.emu.sys.EmuSyscallLibrary;
import ghidra.pcode.exec.*;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.lang.Register;
import ghidra.program.model.listing.Program;
/**
* A userop library that includes system call simulation
*
* <p>
* Such a library needs to implement {@link EmuSyscallLibrary}. Here we extend
* {@link AnnotatedEmuSyscallUseropLibrary}, which allows us to implement it using annotated
* methods. {@link EmuSyscallLibrary#syscall(PcodeExecutor, PcodeUseropLibrary)} is the system call
* dispatcher, and it requires that each system call implement {@link EmuSyscallDefinition}. System
* call libraries typically implement that interface by annotating p-code userops with
* {@link EmuSyscall}. This allows system calls to be implemented via Java callback or Structured
* Sleigh. Conventionally, the Java method names of system calls should be
* <em>platform</em>_<em>name</em>. This is to prevent name-space pollution of userops.
*
* <p>
* Stock implementations for a limited set of Linux system calls are provided for x86 and amd64 in
* {@link EmuLinuxX86SyscallUseropLibrary} and {@link EmuLinuxAmd64SyscallUseropLibrary},
* respectively. The type hierarchy is designed to facilitate the implementation of related systems
* without (too much) code duplication. Because they derive from the annotation-based
* implementations, you can add missing system calls by extending one and adding annotated methods
* as needed.
*
* <p>
* For demonstration, this will implement one from scratch for no particular operating system, but
* it will borrow many conventions from linux-amd64.
*/
public class DemoSyscallLibrary extends AnnotatedEmuSyscallUseropLibrary<byte[]> {
private final static Charset UTF8 = Charset.forName("utf8");
// Implement all the required plumbing first:
/**
* An exception type for "user errors." These errors should be communicated back to the target
* program rather than causing the emulator to interrupt. This is a bare minimum implementation.
* In practice more information should be communicated internally, in case things go further
* wrong. Also, a hierarchy of exceptions may be appropriate.
*/
static class UserError extends PcodeExecutionException {
private final int errno;
public UserError(int errno) {
super("errno: " + errno);
this.errno = errno;
}
}
private final Register regRAX;
private final GhidraScript script;
/**
* Because the system call numbering is derived from the "syscall" overlay on OTHER space, a
* program is required. The system call analyzer must be applied to it. The program and its
* compiler spec are also used to derive (what it can of) the system call ABI. Notably, it
* applies the calling convention of the functions placed in syscall overlay. Those parts which
* cannot (yet) be derived from the program are instead implemented as abstract methods of this
* class, e.g., {@link #readSyscallNumber(PcodeExecutorStatePiece)} and
* {@link #handleError(PcodeExecutor, PcodeExecutionException)}.
*
* @param machine the emulator
* @param program the program being emulated
*/
public DemoSyscallLibrary(PcodeMachine<byte[]> machine, Program program, GhidraScript script) {
super(machine, program);
this.script = script;
this.regRAX = machine.getLanguage().getRegister("RAX");
if (regRAX == null) {
throw new AssertionError("This library only works on x64 targets");
}
}
/**
* The dispatcher doesn't know where the system call number is stored. It relies on this method
* to read that number from the state. Here we'll assume the target is x64 and RAX contains the
* syscall number.
*/
@Override
public long readSyscallNumber(PcodeExecutorStatePiece<byte[], byte[]> state) {
return Utils.bytesToLong(state.getVar(regRAX), regRAX.getNumBytes(),
machine.getLanguage().isBigEndian());
}
/**
* If the error is a user error, put the errno into the machine as expected by the target
* program. Here we negate the errno and put it into RAX. If it's not a user error, we return
* false letting the dispatcher know it should interrupt the emulator.
*/
@Override
public boolean handleError(PcodeExecutor<byte[]> executor, PcodeExecutionException err) {
if (err instanceof UserError) {
executor.getState()
.setVar(regRAX, executor.getArithmetic()
.fromConst(-((UserError) err).errno, regRAX.getNumBytes()));
return true;
}
return false;
}
/**
* Support for Structured Sleigh is built-in. To enable it, override this method and instantiate
* the appropriate (usually nested) class.
*/
@Override
protected StructuredPart newStructuredPart() {
return new DemoStructuredPart();
}
@Override
protected Collection<DataTypeManager> getAdditionalArchives() {
// Add platform-specific data type archives, if needed
return super.getAdditionalArchives();
}
// Now, implement some system calls!
// First, a Java callback example
/**
* Write a buffer of utf-8 characters to the console
*
* <p>
* The {@link EmuSyscall} annotation allows us to specify the system call name, because the
* userop name should be prefixed with the platform name, to avoid naming collisions among
* userops.
*
* <p>
* For demonstration, we will export this as a system call, though that is not required for
* {@link DemoStructuredPart#demo_console(StructuredSleigh.Var)} to invoke it. It does need to
* be a userop, but it doesn't need to be a syscall.
*
* @param str a pointer to the start of the buffer
* @param end a pointer to the end (exclusive) of the buffer
*/
@PcodeUserop
@EmuSyscall("write")
public void demo_write(byte[] str, byte[] end) {
AddressSpace space = machine.getLanguage().getDefaultSpace();
/**
* Because we have concrete {@code byte[]}, we could use Utils.bytesToLong, but for
* demonstration, here's how it can be done if we extended
* {@link AnnotatedEmuSyscallUseropLibrary}{@code <T>} instead. If the value cannot be made
* concrete, an exception will be thrown. For abstract types, it's a good idea to save a
* copy of the arithmetic as a field at library construction time.
*/
PcodeArithmetic<byte[]> arithmetic = machine.getArithmetic();
long strLong = arithmetic.toConcrete(str).longValue();
long endLong = arithmetic.toConcrete(end).longValue();
byte[] stringBytes =
machine.getSharedState().getVar(space, strLong, (int) (endLong - strLong), true);
String string = new String(stringBytes, UTF8);
script.println(string);
}
// Second, a Structured Sleigh example
/**
* The nested class for syscall implemented using StructuredSleigh. Note that no matter the
* implementation type, the Java method is annotated with {@link EmuSyscall}. We declare it
* public so that the annotation processor can access the methods. Alternatively, we could
* override {@link #getMethodLookup()}.
*/
public class DemoStructuredPart extends StructuredPart {
UseropDecl write = userop(type("void"), "demo_write", types("char *", "char *"));
/**
* Write a C-style string to the console
*
* @param str the null-terminated utf-8 string
*/
@StructuredUserop
@EmuSyscall("console")
public void demo_console(@Param(type = "char *") Var str) {
// Measure the string's length and then invoke write
Var end = local("end", type("char *"));
_for(end.set(str), end.deref().neq(0), end.inc(), () -> {
});
write.call(str, end);
}
}
}

View file

@ -0,0 +1,179 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example emulation script that uses a stand-alone emulator.
//It provides the set-up code and then demonstrates some use cases.
//@author
//@category Emulation
//@keybinding
//@menupath
//@toolbar
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.assembler.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.emu.PcodeEmulator;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.exec.*;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.LanguageID;
public class StandAloneEmuExampleScript extends GhidraScript {
private final static Charset UTF8 = Charset.forName("utf8");
private SleighLanguage language;
private PcodeEmulator emulator;
@Override
protected void run() throws Exception {
/*
* Create an emulator and start a thread
*/
language = (SleighLanguage) getLanguage(new LanguageID("x86:LE:64:default"));
emulator = new PcodeEmulator(language) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return new DemoPcodeUseropLibrary(language, StandAloneEmuExampleScript.this);
}
// Uncomment this to see instructions printed as they are decoded
/*
protected BytesPcodeThread createThread(String name) {
return new BytesPcodeThread(name, this) {
@Override
protected SleighInstructionDecoder createInstructionDecoder(
PcodeExecutorState<byte[]> sharedState) {
return new SleighInstructionDecoder(language, sharedState) {
@Override
public Instruction decodeInstruction(Address address,
RegisterValue context) {
Instruction instruction = super.decodeInstruction(address, context);
println("Decoded " + address + ": " + instruction);
return instruction;
}
};
}
};
}
*/
};
PcodeThread<byte[]> thread = emulator.newThread();
// The emulator composes the full library for each thread
PcodeUseropLibrary<byte[]> library = thread.getUseropLibrary();
AddressSpace dyn = language.getDefaultSpace();
/*
* Assemble a little test program and write it into the emulator
*
* We're not really going to implement system calls here. We're just using it to demonstrate
* the implementation of a language-defined userop.
*/
Address entry = dyn.getAddress(0x00400000);
Assembler asm = Assemblers.getAssembler(language);
CodeBuffer buffer = new CodeBuffer(asm, entry);
buffer.assemble("MOV RCX, 0xdeadbeef");
Address injectHere = buffer.getNext();
buffer.assemble("MOV RAX, 1");
buffer.assemble("SYSCALL");
buffer.assemble("MOV RAX, 2"); // Induce the interrupt we need to terminate
buffer.assemble("SYSCALL");
byte[] code = buffer.getBytes();
emulator.getSharedState().setVar(dyn, entry.getOffset(), code.length, true, code);
/*
* Initialize other parts of the emulator and thread state. Note the use of the L suffix on
* 0xdeadbeefL, because Java with sign extend the (negative) int to a long otherwise.
*/
byte[] hw = "Hello, World!\n".getBytes(UTF8);
emulator.getSharedState().setVar(dyn, 0xdeadbeefL, hw.length, true, hw);
PcodeProgram init = SleighProgramCompiler.compileProgram(language, "init", List.of(
"RIP = 0x" + entry + ";",
"RSP = 0x00001000;"),
library);
thread.getExecutor().execute(init, library);
thread.overrideContextWithDefault();
thread.reInitialize();
/*
* Inject a call to our custom print userop. Otherwise, the language itself will never
* invoke it.
*/
emulator.inject(injectHere, List.of(
"print_utf8(RCX);",
"emu_exec_decoded();"));
/*
* Run the experiment: This should interrupt on the second SYSCALL, because any value other
* than 1 calls emu_swi.
*/
try {
thread.stepInstruction(10);
printerr("We should not have completed 10 steps!");
}
catch (InterruptPcodeExecutionException e) {
println("Terminated via interrupt. Good.");
}
/*
* Inspect the machine. You can always do this by accessing the state directly, but for
* anything other than simple variables, you may find compiling an expression more
* convenient.
*/
println("RCX = " +
Utils.bytesToLong(thread.getState().getVar(language.getRegister("RCX")), 8,
language.isBigEndian()));
println("RCX = " + Utils.bytesToLong(
SleighProgramCompiler.compileExpression(language, "RCX").evaluate(thread.getExecutor()),
8, language.isBigEndian()));
println("RCX+4 = " +
Utils.bytesToLong(SleighProgramCompiler.compileExpression(language, "RCX+4")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
}
public static class CodeBuffer {
private final ByteArrayOutputStream baos = new ByteArrayOutputStream();
private final Assembler asm;
private final Address entry;
public CodeBuffer(Assembler asm, Address entry) {
this.asm = asm;
this.entry = entry;
}
public Address getNext() {
return entry.add(baos.size());
}
public byte[] assemble(String line)
throws AssemblySyntaxException, AssemblySemanticException, IOException {
byte[] bytes = asm.assembleLine(getNext(), line);
baos.write(bytes);
return bytes;
}
public byte[] getBytes() {
return baos.toByteArray();
}
}
}

View file

@ -0,0 +1,112 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example script for using Structured Sleigh stand alone
//@author
//@category Sleigh
//@keybinding
//@menupath
//@toolbar
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodHandles.Lookup;
import java.util.Map;
import java.util.stream.Collectors;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.exec.SleighPcodeUseropDefinition;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.program.model.lang.LanguageID;
public class StandAloneStructuredSleighScript extends GhidraScript {
private SleighLanguage language;
/**
* This exists mostly so we can access the methods of anonymous nested classes deriving from
* this one. The "compiler" will need to be able to access the methods, and that's not
* ordinarily allowed since anonymous classes are implicitly "private." Conveniently, it also
* allows us to implement a default constructor, so that can be elided where used, too.
*/
class LookupStructuredSleigh extends StructuredSleigh {
protected LookupStructuredSleigh() {
super(language.getDefaultCompilerSpec());
}
@Override
protected Lookup getMethodLookup() {
return MethodHandles.lookup();
}
}
@Override
protected void run() throws Exception {
/*
* If you have a target language in mind, perhaps use it, but DATA provides a minimal
* context
*/
language = (SleighLanguage) getLanguage(new LanguageID("DATA:BE:64:default"));
Map<String, SleighPcodeUseropDefinition<Object>> ops = new LookupStructuredSleigh() {
/**
* Add two in-memory vectors of 16 longs and store the result in memory
*
* @param d pointer to the destination vector
* @param s1 pointer to the first operand vector
* @param s2 pointer to the second operand vector
*/
@StructuredUserop
public void vector_add(
@Param(name = "d", type = "int *") Var d,
@Param(name = "s1", type = "int *") Var s1,
@Param(name = "s2", type = "int *") Var s2) {
// Use Java's "for" to generate an unrolled loop
// We could choose a Sleigh loop, instead. Consider both emu and analysis tradeoffs
for (int i = 0; i < 16; i++) {
// This will generate +0 on the first elements, but whatever
d.index(i).deref().set(s1.index(i).deref().addi(s2.index(i).deref()));
}
}
@StructuredUserop
public void memcpy(
@Param(name = "d", type = "void *") Var d,
@Param(name = "s", type = "void *") Var s,
@Param(name = "n", type = "long") Var n) { // size_t is not built-in
Var i = local("i", type("long"));
// Note that these 2 casts don't generate Sleigh statements
Var db = d.cast(type("byte *"));
Var sb = s.cast(type("byte *"));
// Must use a Sleigh loop here
_for(i.set(0), i.ltiu(n), i.inc(), () -> {
db.index(i).deref().set(sb.index(i).deref());
});
}
}.generate();
/*
* Now, dump the generated Sleigh source
*/
for (SleighPcodeUseropDefinition<?> userop : ops.values()) {
print(userop.getName() + "(");
print(userop.getInputs().stream().collect(Collectors.joining(",")));
print(") {\n");
for (String line : userop.getLines()) {
print(line);
}
print("}\n\n");
}
}
}

View file

@ -0,0 +1,233 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example emulation script that uses a stand-alone emulator with syscalls.
//It provides the set-up code and then demonstrates some use cases.
//@author
//@category Emulation
//@keybinding
//@menupath
//@toolbar
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.assembler.Assembler;
import ghidra.app.plugin.assembler.Assemblers;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.emu.PcodeEmulator;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.emu.sys.EmuInvalidSystemCallException;
import ghidra.pcode.emu.sys.EmuSyscallLibrary;
import ghidra.pcode.exec.*;
import ghidra.pcode.utils.Utils;
import ghidra.program.database.ProgramDB;
import ghidra.program.model.address.*;
import ghidra.program.model.data.DataTypeConflictHandler;
import ghidra.program.model.data.PointerDataType;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.Memory;
import ghidra.program.model.mem.MemoryBlock;
import ghidra.program.model.symbol.SourceType;
import ghidra.util.database.UndoableTransaction;
public class StandAloneSyscallEmuExampleScript extends GhidraScript {
private final static Charset UTF8 = Charset.forName("utf8");
Program program = null;
@Override
protected void run() throws Exception {
/*
* First, get all the services and stuff:
*/
SleighLanguage language = (SleighLanguage) getLanguage(new LanguageID("x86:LE:64:default"));
/*
* I'll generate a new program, because I don't want to require the user to pick something
* specific. It won't be displayed, though, so we'll just release it when we're done.
*/
Address entry;
try {
/*
* "gcc" is the name of the compiler spec, but we're really interested in the Linux
* syscall calling conventions.
*/
program =
new ProgramDB("syscall_example", language,
language.getCompilerSpecByID(new CompilerSpecID("gcc")), this);
try (UndoableTransaction tid = UndoableTransaction.start(program, "Init", true)) {
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
entry = space.getAddress(0x00400000);
Address dataEntry = space.getAddress(0x00600000);
Memory memory = program.getMemory();
memory.createInitializedBlock(".text", entry, 0x1000, (byte) 0, monitor, false);
Assembler asm = Assemblers.getAssembler(program);
asm.assemble(entry,
"MOV RDI, 0x" + dataEntry,
"MOV RAX, 1",
"SYSCALL",
"MOV RAX, 20",
"SYSCALL");
memory.createInitializedBlock(".data", dataEntry, 0x1000, (byte) 0, monitor, false);
memory.setBytes(dataEntry, "Hello, World!\n".getBytes(UTF8));
/*
* Because "pointer" is a built-in type, and the emulator does not modify the
* program, we must ensure it has been resolved on the program's data type manager.
*/
program.getDataTypeManager()
.resolve(PointerDataType.dataType, DataTypeConflictHandler.DEFAULT_HANDLER);
/*
* We must also populate the system call numbering map. Ordinarily, this would be done
* using the system call analyzer or another script. Here, we'll just fake it out.
*/
AddressSpace other =
program.getAddressFactory().getAddressSpace(SpaceNames.OTHER_SPACE_NAME);
MemoryBlock blockSyscall = program.getMemory()
.createUninitializedBlock(EmuSyscallLibrary.SYSCALL_SPACE_NAME,
other.getAddress(0), 0x1000, true);
blockSyscall.setPermissions(true, false, true);
AddressSpace syscall = program.getAddressFactory()
.getAddressSpace(EmuSyscallLibrary.SYSCALL_SPACE_NAME);
/*
* The system call names must match those from the EmuSyscall annotations in the
* system call library, in our case from DemoSyscallLibrary. Because the x64
* compiler specs define a "syscall" convention, we'll apply it. The syscall
* dispatcher will use that convention to fetch the parameters out of the machine
* state, pass them into the system call defintion, and store the result back into
* the machine.
*/
// Map system call 0 to "write"
program.getFunctionManager()
.createFunction("write", syscall.getAddress(0),
new AddressSet(syscall.getAddress(0)), SourceType.USER_DEFINED)
.setCallingConvention(EmuSyscallLibrary.SYSCALL_CONVENTION_NAME);
// Map system call 1 to "console"
program.getFunctionManager()
.createFunction("console", syscall.getAddress(1),
new AddressSet(syscall.getAddress(1)), SourceType.USER_DEFINED)
.setCallingConvention(EmuSyscallLibrary.SYSCALL_CONVENTION_NAME);
}
/*
* Create an emulator and start a thread
*/
PcodeEmulator emulator = new PcodeEmulator(language) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return new DemoSyscallLibrary(this, program,
StandAloneSyscallEmuExampleScript.this);
}
// Uncomment this to see instructions printed as they are decoded
/*
@Override
protected BytesPcodeThread createThread(String name) {
return new BytesPcodeThread(name, this) {
@Override
protected SleighInstructionDecoder createInstructionDecoder(
PcodeExecutorState<byte[]> sharedState) {
return new SleighInstructionDecoder(language, sharedState) {
@Override
public Instruction decodeInstruction(Address address,
RegisterValue context) {
Instruction instruction = super.decodeInstruction(address, context);
println("Decoded " + address + ": " + instruction);
return instruction;
}
};
}
};
}
*/
};
PcodeThread<byte[]> thread = emulator.newThread();
// The emulator composes the full library for each thread
PcodeUseropLibrary<byte[]> library = thread.getUseropLibrary();
/*
* The library has a reference to the program and uses it to derive types and the system
* call numbering. However, the emulator itself does not have access to the program. If we
* followed the pattern in DebuggerEmuExampleScript, the emulator would have its state bound
* (indirectly) to the program. We'll need to copy the bytes in. Because we created blocks
* that were 0x1000 bytes in size, we can be fast and loose with our buffer. Ordinarily, you
* may want to copy in chunks rather than taking entire memory blocks at a time.
*/
byte[] data = new byte[0x1000];
for (MemoryBlock block : program.getMemory().getBlocks()) {
if (!block.isInitialized()) {
continue; // Skip the syscall/OTHER block
}
Address addr = block.getStart();
block.getBytes(addr, data);
emulator.getSharedState()
.setVar(addr.getAddressSpace(), addr.getOffset(), data.length, true, data);
}
/*
* Initialize the thread
*/
PcodeProgram init = SleighProgramCompiler.compileProgram(language, "init", List.of(
"RIP = 0x" + entry + ";",
"RSP = 0x00001000;"),
library);
thread.getExecutor().execute(init, library);
thread.overrideContextWithDefault();
thread.reInitialize();
/*
* Run the experiment: This should interrupt on the second SYSCALL, because we didn't
* provide a system call name in OTHER space for 20.
*/
try {
thread.stepInstruction(10);
printerr("We should not have completed 10 steps!");
}
catch (EmuInvalidSystemCallException e) {
println("Terminated via invalid syscall. Good.");
}
/*
* Inspect the machine. You can always do this by accessing the state directly, but for
* anything other than simple variables, you may find compiling an expression more
* convenient.
*/
println("RDI = " +
Utils.bytesToLong(thread.getState().getVar(language.getRegister("RDI")), 8,
language.isBigEndian()));
println("RDI = " + Utils.bytesToLong(
SleighProgramCompiler.compileExpression(language, "RDI")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
println("RDI+4 = " +
Utils.bytesToLong(SleighProgramCompiler.compileExpression(language, "RDI+4")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
}
finally {
if (program != null) {
program.release(this);
}
}
}
}

View file

@ -88,11 +88,11 @@ public abstract class DebuggerGoToTrait {
if (space == null) {
throw new IllegalArgumentException("No such address space: " + spaceName);
}
SleighExpression expr = SleighProgramCompiler.compileExpression(slang, expression);
PcodeExpression expr = SleighProgramCompiler.compileExpression(slang, expression);
return goToSleigh(space, expr);
}
public CompletableFuture<Boolean> goToSleigh(AddressSpace space, SleighExpression expression) {
public CompletableFuture<Boolean> goToSleigh(AddressSpace space, PcodeExpression expression) {
AsyncPcodeExecutor<byte[]> executor = TracePcodeUtils.executorForCoordinates(current);
CompletableFuture<byte[]> result = expression.evaluate(executor);
return result.thenApply(offset -> {

View file

@ -58,7 +58,7 @@ public class WatchRow {
private String typePath;
private DataType dataType;
private SleighExpression compiled;
private PcodeExpression compiled;
private TraceMemoryState state;
private Address address;
private AddressSet reads;
@ -208,7 +208,7 @@ public class WatchRow {
@Override
public PcodeFrame execute(PcodeProgram program,
SleighUseropLibrary<Pair<byte[], Address>> library) {
PcodeUseropLibrary<Pair<byte[], Address>> library) {
depsState.reset();
return super.execute(program, library);
}

View file

@ -36,8 +36,8 @@ public abstract class AbstractReadsTargetPcodeExecutorState
abstract class AbstractReadsTargetCachedSpace extends CachedSpace {
public AbstractReadsTargetCachedSpace(Language language, AddressSpace space,
TraceMemorySpace source, long snap) {
super(language, space, source, snap);
TraceMemorySpace backing, long snap) {
super(language, space, backing, snap);
}
protected abstract void fillUninitialized(AddressSet uninitialized);
@ -47,15 +47,15 @@ public abstract class AbstractReadsTargetPcodeExecutorState
}
protected AddressSet computeUnknown(AddressSet uninitialized) {
return uninitialized.subtract(source.getAddressesWithState(snap, uninitialized,
return uninitialized.subtract(backing.getAddressesWithState(snap, uninitialized,
s -> s != null && s != TraceMemoryState.UNKNOWN));
}
@Override
public byte[] read(long offset, int size) {
if (source != null) {
if (backing != null) {
AddressSet uninitialized =
addrSet(cache.getUninitialized(offset, offset + size - 1));
addrSet(bytes.getUninitialized(offset, offset + size - 1));
if (uninitialized.isEmpty()) {
return super.read(offset, size);
}
@ -63,7 +63,7 @@ public abstract class AbstractReadsTargetPcodeExecutorState
fillUninitialized(uninitialized);
AddressSet unknown =
computeUnknown(addrSet(cache.getUninitialized(offset, offset + size - 1)));
computeUnknown(addrSet(bytes.getUninitialized(offset, offset + size - 1)));
if (!unknown.isEmpty()) {
warnUnknown(unknown);
}

View file

@ -40,8 +40,8 @@ public class ReadsTargetMemoryPcodeExecutorState
protected class ReadsTargetMemoryCachedSpace extends AbstractReadsTargetCachedSpace {
public ReadsTargetMemoryCachedSpace(Language language, AddressSpace space,
TraceMemorySpace source, long snap) {
super(language, space, source, snap);
TraceMemorySpace backing, long snap) {
super(language, space, backing, snap);
}
@Override
@ -108,7 +108,7 @@ public class ReadsTargetMemoryPcodeExecutorState
" bytes");
}
// write(lower - shift, data, 0 ,read);
cache.putData(lower - shift, data, 0, read);
bytes.putData(lower - shift, data, 0, read);
}
catch (MemoryAccessException | AddressOutOfBoundsException e) {
throw new AssertionError(e);

View file

@ -28,7 +28,7 @@ import ghidra.program.model.pcode.Varnode;
* An executor which can perform (some of) its work asynchronously
*
* <p>
* Note that a future returned from, e.g., {@link #executeAsync(SleighProgram, SleighUseropLibrary)}
* Note that a future returned from, e.g., {@link #executeAsync(SleighProgram, PcodeUseropLibrary)}
* may complete before the computation has actually been performed. They complete when all of the
* operations have been scheduled, and the last future has been written into the state. (This
* typically happens when any branch conditions have completed). Instead, a caller should read from
@ -46,7 +46,7 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
}
public CompletableFuture<Void> stepOpAsync(PcodeOp op, PcodeFrame frame,
SleighUseropLibrary<CompletableFuture<T>> library) {
PcodeUseropLibrary<CompletableFuture<T>> library) {
if (op.getOpcode() == PcodeOp.CBRANCH) {
return executeConditionalBranchAsync(op, frame);
}
@ -55,7 +55,7 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
}
public CompletableFuture<Void> stepAsync(PcodeFrame frame,
SleighUseropLibrary<CompletableFuture<T>> library) {
PcodeUseropLibrary<CompletableFuture<T>> library) {
try {
return stepOpAsync(frame.nextOp(), frame, library);
}
@ -80,12 +80,12 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
}
public CompletableFuture<Void> executeAsync(PcodeProgram program,
SleighUseropLibrary<CompletableFuture<T>> library) {
PcodeUseropLibrary<CompletableFuture<T>> library) {
return executeAsync(program.code, program.useropNames, library);
}
protected CompletableFuture<Void> executeAsyncLoop(PcodeFrame frame,
SleighUseropLibrary<CompletableFuture<T>> library) {
PcodeUseropLibrary<CompletableFuture<T>> library) {
if (frame.isFinished()) {
return AsyncUtils.NIL;
}
@ -94,7 +94,7 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
}
public CompletableFuture<Void> executeAsync(List<PcodeOp> code,
Map<Integer, String> useropNames, SleighUseropLibrary<CompletableFuture<T>> library) {
Map<Integer, String> useropNames, PcodeUseropLibrary<CompletableFuture<T>> library) {
PcodeFrame frame = new PcodeFrame(language, code, useropNames);
return executeAsyncLoop(frame, library);
}

View file

@ -83,4 +83,9 @@ public class AsyncWrappedPcodeArithmetic<T> implements PcodeArithmetic<Completab
}
return arithmetic.toConcrete(cond.getNow(null), isContextreg);
}
@Override
public CompletableFuture<T> sizeOf(CompletableFuture<T> value) {
return value.thenApply(v -> arithmetic.sizeOf(v));
}
}

View file

@ -169,7 +169,7 @@ public class DebuggerPcodeStepperProviderTest extends AbstractGhidraHeadedDebugg
protected List<PcodeRow> format(List<String> sleigh) {
SleighLanguage language = (SleighLanguage) getToyBE64Language();
PcodeProgram prog = SleighProgramCompiler.compileProgram(language, "test", sleigh,
SleighUseropLibrary.nil());
PcodeUseropLibrary.nil());
PcodeExecutor<byte[]> executor =
new PcodeExecutor<>(language, PcodeArithmetic.BYTES_BE, null);
PcodeFrame frame = executor.begin(prog);

View file

@ -55,7 +55,7 @@ public class TraceRecorderAsyncPcodeExecTest extends AbstractGhidraHeadedDebugge
Trace trace = recorder.getTrace();
SleighLanguage language = (SleighLanguage) trace.getBaseLanguage();
SleighExpression expr = SleighProgramCompiler
PcodeExpression expr = SleighProgramCompiler
.compileExpression(language, "r0 + r1");
Register r0 = language.getRegister("r0");
@ -99,7 +99,7 @@ public class TraceRecorderAsyncPcodeExecTest extends AbstractGhidraHeadedDebugge
SleighLanguage language = (SleighLanguage) trace.getBaseLanguage();
PcodeProgram prog = SleighProgramCompiler.compileProgram(language, "test",
List.of("r2 = r0 + r1;"), SleighUseropLibrary.NIL);
List.of("r2 = r0 + r1;"), PcodeUseropLibrary.NIL);
Register r0 = language.getRegister("r0");
Register r1 = language.getRegister("r1");
@ -119,7 +119,7 @@ public class TraceRecorderAsyncPcodeExecTest extends AbstractGhidraHeadedDebugge
AsyncPcodeExecutor<byte[]> executor = new AsyncPcodeExecutor<>(
language, AsyncWrappedPcodeArithmetic.forLanguage(language), asyncState);
waitOn(executor.executeAsync(prog, SleighUseropLibrary.nil()));
waitOn(executor.executeAsync(prog, PcodeUseropLibrary.nil()));
waitOn(asyncState.getVar(language.getRegister("r2")));
assertEquals(BigInteger.valueOf(11), new BigInteger(1, regs.regVals.get("r2")));