GP-4643: Add a JIT-accelerated p-code emulator (API/scripting only)

This commit is contained in:
Dan 2025-01-03 10:27:38 -05:00
parent 20285e267d
commit a8fae1fe5b
320 changed files with 32638 additions and 630 deletions

View file

@ -34,7 +34,7 @@ import ghidra.program.util.ProgramLocation;
import ghidra.trace.model.*;
import ghidra.trace.model.modules.*;
import ghidra.trace.model.program.TraceProgramView;
import ghidra.util.ComparatorMath;
import ghidra.util.MathUtilities;
import ghidra.util.Msg;
public enum DebuggerStaticMappingUtils {
@ -163,8 +163,8 @@ public enum DebuggerStaticMappingUtils {
private Address max = null;
public void consider(Address min, Address max) {
this.min = this.min == null ? min : ComparatorMath.cmin(this.min, min);
this.max = this.max == null ? max : ComparatorMath.cmax(this.max, max);
this.min = this.min == null ? min : MathUtilities.cmin(this.min, min);
this.max = this.max == null ? max : MathUtilities.cmax(this.max, max);
}
public void consider(AddressRange range) {

View file

@ -19,6 +19,7 @@ import ghidra.app.plugin.core.debug.stack.Sym.ConstSym;
import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.*;
import ghidra.program.model.pcode.PcodeOp;
@ -70,14 +71,14 @@ class SymPcodeArithmetic implements PcodeArithmetic<Sym> {
}
@Override
public Sym modBeforeStore(int sizeout, int sizeinAddress, Sym inAddress,
int sizeinValue, Sym inValue) {
public Sym modBeforeStore(int sizeinOffset, AddressSpace space, Sym inOffset, int sizeinValue,
Sym inValue) {
return inValue;
}
@Override
public Sym modAfterLoad(int sizeout, int sizeinAddress, Sym inAddress,
int sizeinValue, Sym inValue) {
public Sym modAfterLoad(int sizeinOffset, AddressSpace space, Sym inOffset, int sizeinValue,
Sym inValue) {
return inValue;
}

View file

@ -505,33 +505,33 @@ public enum DebuggerPcodeUtils {
}
@Override
public WatchValue modBeforeStore(int sizeout, int sizeinAddress, WatchValue inAddress,
public WatchValue modBeforeStore(int sizeinOffset, AddressSpace space, WatchValue inOffset,
int sizeinValue, WatchValue inValue) {
return new WatchValue(
new PrettyBytes(inValue.bytes.bigEndian,
bytes.modBeforeStore(sizeout, sizeinAddress, inAddress.bytes.bytes,
sizeinValue, inValue.bytes.bytes)),
STATE.modBeforeStore(sizeout, sizeinAddress, inAddress.state,
sizeinValue, inValue.state),
location.modBeforeStore(sizeout, sizeinAddress, inAddress.location,
sizeinValue, inValue.location),
READS.modBeforeStore(sizeout, sizeinAddress, inAddress.reads,
sizeinValue, inValue.reads));
bytes.modBeforeStore(sizeinOffset, space, inOffset.bytes.bytes, sizeinValue,
inValue.bytes.bytes)),
STATE.modBeforeStore(sizeinOffset, space, inOffset.state, sizeinValue,
inValue.state),
location.modBeforeStore(sizeinOffset, space, inOffset.location, sizeinValue,
inValue.location),
READS.modBeforeStore(sizeinOffset, space, inOffset.reads, sizeinValue,
inValue.reads));
}
@Override
public WatchValue modAfterLoad(int sizeout, int sizeinAddress, WatchValue inAddress,
public WatchValue modAfterLoad(int sizeinOffset, AddressSpace space, WatchValue inOffset,
int sizeinValue, WatchValue inValue) {
return new WatchValue(
new PrettyBytes(getEndian().isBigEndian(),
bytes.modAfterLoad(sizeout, sizeinAddress, inAddress.bytes.bytes,
sizeinValue, inValue.bytes.bytes)),
STATE.modAfterLoad(sizeout, sizeinAddress, inAddress.state,
sizeinValue, inValue.state),
location.modAfterLoad(sizeout, sizeinAddress, inAddress.location,
sizeinValue, inValue.location),
READS.modAfterLoad(sizeout, sizeinAddress, inAddress.reads,
sizeinValue, inValue.reads));
bytes.modAfterLoad(sizeinOffset, space, inOffset.bytes.bytes, sizeinValue,
inValue.bytes.bytes)),
STATE.modAfterLoad(sizeinOffset, space, inOffset.state, sizeinValue,
inValue.state),
location.modAfterLoad(sizeinOffset, space, inOffset.location, sizeinValue,
inValue.location),
READS.modAfterLoad(sizeinOffset, space, inOffset.reads, sizeinValue,
inValue.reads));
}
@Override

View file

@ -19,6 +19,7 @@ import java.math.BigInteger;
import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Endian;
import ghidra.trace.model.memory.TraceMemoryState;
@ -58,15 +59,15 @@ public enum TraceMemoryStatePcodeArithmetic implements PcodeArithmetic<TraceMemo
}
@Override
public TraceMemoryState modBeforeStore(int sizeout, int sizeinAddress,
TraceMemoryState inAddress, int sizeinValue, TraceMemoryState inValue) {
public TraceMemoryState modBeforeStore(int sizeinOffset, AddressSpace space,
TraceMemoryState inOffset, int sizeinValue, TraceMemoryState inValue) {
return inValue; // Shouldn't see STORE during Sleigh eval, anyway
}
@Override
public TraceMemoryState modAfterLoad(int sizeout, int sizeinAddress, TraceMemoryState inAddress,
int sizeinValue, TraceMemoryState inValue) {
if (inAddress == TraceMemoryState.KNOWN && inValue == TraceMemoryState.KNOWN) {
public TraceMemoryState modAfterLoad(int sizeinOffset, AddressSpace space,
TraceMemoryState inOffset, int sizeinValue, TraceMemoryState inValue) {
if (inOffset == TraceMemoryState.KNOWN && inValue == TraceMemoryState.KNOWN) {
return TraceMemoryState.KNOWN;
}
return TraceMemoryState.UNKNOWN;

View file

@ -24,8 +24,8 @@ import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.*;
import ghidra.trace.model.memory.TraceMemoryRegion;
import ghidra.util.ComparatorMath;
import ghidra.util.LockHold;
import ghidra.util.MathUtilities;
import ghidra.util.exception.*;
public class DBTraceProgramViewRootModule implements ProgramModule {
@ -195,7 +195,7 @@ public class DBTraceProgramViewRootModule implements ProgramModule {
.getMinAddress();
}
// TODO: There has got to be a better way
return reduceRegions(TraceMemoryRegion::getMinAddress, ComparatorMath::cmin);
return reduceRegions(TraceMemoryRegion::getMinAddress, MathUtilities::cmin);
}
@Override
@ -206,7 +206,7 @@ public class DBTraceProgramViewRootModule implements ProgramModule {
.getMaxAddress();
}
// TODO: There has got to be a better way
return reduceRegions(TraceMemoryRegion::getMaxAddress, ComparatorMath::cmax);
return reduceRegions(TraceMemoryRegion::getMaxAddress, MathUtilities::cmax);
}
@Override

View file

@ -983,6 +983,7 @@ public class BytesTracePcodeEmulatorTest extends AbstractTracePcodeEmulatorTest
TraceSleighUtils.evaluate("r1", tb.trace, 1, thread, 0));
}
}
@Test
public void testITE_ContextFlow() throws Throwable {
try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "ARM:LE:32:v8T")) {

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -30,7 +30,7 @@ dependencies {
api project(':Utility')
api project(':Base') // Boo!: (Where to put DefaultEnumeratedColumnProgramTableModel?)
// TODO: Evaluate these dependencies
// api("com.google.auto.service:auto-service-annotations:$autoServiceVersion")
// annotationProcessor("com.google.auto.service:auto-service:$autoServiceVersion")

View file

@ -0,0 +1,232 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.util.opinion;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import generic.ULongSpan;
import ghidra.app.util.Option;
import ghidra.app.util.bin.ByteProvider;
import ghidra.app.util.importer.MessageLog;
import ghidra.framework.model.Project;
import ghidra.framework.store.LockException;
import ghidra.generic.util.datastruct.SemisparseByteArray;
import ghidra.program.database.function.OverlappingFunctionException;
import ghidra.program.model.address.*;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.CodeUnit;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.mem.MemoryConflictException;
import ghidra.program.model.symbol.SourceType;
import ghidra.program.model.symbol.SymbolUtilities;
import ghidra.util.NumericUtilities;
import ghidra.util.exception.CancelledException;
import ghidra.util.exception.InvalidInputException;
import ghidra.util.task.TaskMonitor;
public class JitLogLoader extends AbstractProgramLoader {
public final static String JIT_LOG_NAME = "OpenJDK 17 JIT compilation log";
@Override
public Collection<LoadSpec> findSupportedLoadSpecs(ByteProvider provider) throws IOException {
return getLanguageService().getLanguageCompilerSpecPairs(
new LanguageCompilerSpecQuery(null, null, null, null, null))
.stream()
.map(lcs -> new LoadSpec(this, 0, lcs, false))
.toList();
}
@Override
public String getName() {
return JIT_LOG_NAME;
}
@Override
public LoaderTier getTier() {
return LoaderTier.UNTARGETED_LOADER;
}
@Override
public int getTierPriority() {
return 100;
}
@Override
protected List<Loaded<Program>> loadProgram(ByteProvider provider, String loadedName,
Project project, String projectFolderPath, LoadSpec loadSpec, List<Option> options,
MessageLog log, Object consumer, TaskMonitor monitor)
throws IOException, LoadException, CancelledException {
LanguageCompilerSpecPair pair = loadSpec.getLanguageCompilerSpec();
CompilerSpec cSpec = pair.getCompilerSpec();
Language language = cSpec.getLanguage();
Program program =
createProgram(provider, loadedName, null, getName(), language, cSpec, consumer);
boolean success = false;
try {
loadInto(provider, loadSpec, options, log, program, monitor);
success = true;
createDefaultMemoryBlocks(program, language, log);
}
finally {
if (!success) {
program.release(consumer);
program = null;
}
}
List<Loaded<Program>> results = new ArrayList<>();
if (program != null) {
results.add(new Loaded<>(program, loadedName, projectFolderPath));
}
return results;
}
static class JitMethod {
final String name;
SemisparseByteArray bytes = new SemisparseByteArray();
Map<Address, String> comments = new HashMap<>();
public JitMethod(String name) {
this.name = name;
}
void appendComment(Address address, String line) {
comments.compute(address, (a, c) -> c == null ? line : c + "\n" + line);
}
}
List<JitMethod> methods = new ArrayList<>();
AddressSet fullSet = new AddressSet();
static final Pattern PAT_METHOD =
Pattern.compile("\\s*#\\s*\\{method\\}\\s*\\{0x[0-9A-Fa-f]+\\}(?<name>.*)");
static final Pattern PAT_COMMENT =
Pattern.compile("\\s*0x(?<addrHex>[0-9A-Fa-f]+):\\s*;(?<comment>.*)");
static final Pattern PAT_BYTES =
Pattern.compile("\\s*0x(?<addrHex>[0-9A-Fa-f]+):\\s*(?<bytes>[\\s\\|0-9A-Fa-f]+)");
@Override
protected void loadProgramInto(ByteProvider provider, LoadSpec loadSpec,
List<Option> options, MessageLog log, Program program, TaskMonitor monitor)
throws IOException, CancelledException {
monitor.setMessage("Reading lines");
JitMethod curMethod = null;
String line;
try (BufferedReader in =
new BufferedReader(new InputStreamReader(provider.getInputStream(0)))) {
while (null != (line = in.readLine())) {
Matcher matcher;
monitor.checkCanceled();
matcher = PAT_METHOD.matcher(line);
if (matcher.matches()) {
putMethod(curMethod, program);
curMethod = new JitMethod(matcher.group("name")
.replace("&apos;", "'")
.replace("&lt;", "<")
.replace("&gt;", ">"));
continue;
}
if (curMethod == null) {
continue;
}
matcher = PAT_COMMENT.matcher(line);
if (matcher.matches()) {
Address address =
program.getAddressFactory().getAddress(matcher.group("addrHex"));
curMethod.appendComment(address, matcher.group("comment"));
}
matcher = PAT_BYTES.matcher(line);
if (matcher.matches()) {
Address address =
program.getAddressFactory().getAddress(matcher.group("addrHex"));
curMethod.bytes.putData(address.getOffset(),
NumericUtilities.convertStringToBytes(
matcher.group("bytes").replace(" ", "").replace("|", "")));
}
}
}
putMethod(curMethod, program);
monitor.setMaximum(fullSet.getNumAddresses() + methods.size());
monitor.setMessage("Creating blocks");
for (AddressRange range : fullSet) {
monitor.checkCanceled();
try {
program.getMemory()
.createInitializedBlock("block" + range.getMinAddress(),
range.getMinAddress(), range.getLength(), (byte) 0, monitor, false);
}
catch (AddressOverflowException | LockException | IllegalArgumentException
| MemoryConflictException e) {
log.appendMsg("Could not create block " + range + ": " + e);
}
monitor.incrementProgress(1);
}
monitor.setMessage("Creating methods");
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
for (JitMethod method : methods) {
monitor.checkCanceled();
AddressSet body = new AddressSet();
for (ULongSpan span : method.bytes.getInitialized(0, -1).spans()) {
body.add(space.getAddress(span.min()), space.getAddress(span.max()));
if (span.length() > Integer.MAX_VALUE) {
log.appendMsg("Method too large: " + method.name);
continue;
}
byte[] data = new byte[(int) span.length()];
method.bytes.getData(span.min(), data);
try {
program.getMemory().setBytes(space.getAddress(span.min()), data);
}
catch (MemoryAccessException | AddressOutOfBoundsException e) {
log.appendMsg("Could not write bytes " + span + ": " + e);
}
}
for (Map.Entry<Address, String> ent : method.comments.entrySet()) {
program.getListing().setComment(ent.getKey(), CodeUnit.PRE_COMMENT, ent.getValue());
}
try {
program.getFunctionManager()
.createFunction(SymbolUtilities.replaceInvalidChars(method.name, true),
body.getMinAddress(), body,
SourceType.IMPORTED);
}
catch (InvalidInputException | OverlappingFunctionException e) {
log.appendMsg("Couldn't create function: " + method.name + ": " + e);
}
monitor.incrementProgress(1);
}
}
void putMethod(JitMethod method, Program program) {
if (method == null) {
return;
}
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
methods.add(method);
for (ULongSpan span : method.bytes.getInitialized(0, -1).spans()) {
fullSet.add(space.getAddress(span.min()), space.getAddress(span.max()));
}
}
}

View file

@ -43,7 +43,7 @@ public abstract class ArithmeticVarnodeEvaluator<T> extends AbstractVarnodeEvalu
* SLEIGH: {@code shift} the left piece then {@code or} it with the right piece.
*
* @param <T> the type of values
* @param arithmetic the p-code arithmetic for values of type {@link T}
* @param arithmetic the p-code arithmetic for values of type {@code T}
* @param sizeTotal the expected output size in bytes
* @param upper the value of the left (more significant) piece
* @param lower the value of the right (less significant) piece
@ -143,8 +143,6 @@ public abstract class ArithmeticVarnodeEvaluator<T> extends AbstractVarnodeEvalu
T offset = evaluateVarnode(program, inOffset, already);
Varnode outVar = op.getOutput(); // Only for measuring size
T out = evaluateAbstract(program, space, offset, outVar.getSize(), already);
return arithmetic.modAfterLoad(outVar.getSize(),
inOffset.getSize(), offset,
outVar.getSize(), out);
return arithmetic.modAfterLoad(op, space, offset, out);
}
}

View file

@ -15,8 +15,8 @@
*/
package ghidra.program.model.address;
import static ghidra.util.ComparatorMath.cmax;
import static ghidra.util.ComparatorMath.cmin;
import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.MathUtilities.cmin;
import java.util.Iterator;

View file

@ -15,8 +15,8 @@
*/
package ghidra.util;
import static ghidra.util.ComparatorMath.cmax;
import static ghidra.util.ComparatorMath.cmin;
import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.MathUtilities.cmin;
import ghidra.program.model.address.*;

View file

@ -15,8 +15,8 @@
*/
package ghidra.util;
import static ghidra.util.ComparatorMath.cmax;
import static ghidra.util.ComparatorMath.cmin;
import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.MathUtilities.cmin;
import java.util.Iterator;
import java.util.Map.Entry;

View file

@ -15,8 +15,8 @@
*/
package ghidra.util;
import static ghidra.util.ComparatorMath.cmax;
import static ghidra.util.ComparatorMath.cmin;
import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.MathUtilities.cmin;
import java.util.Collection;
import java.util.Iterator;

View file

@ -15,8 +15,8 @@
*/
package ghidra.util;
import static ghidra.util.ComparatorMath.cmax;
import static ghidra.util.ComparatorMath.cmin;
import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.MathUtilities.cmin;
import java.util.Arrays;
import java.util.Collection;

View file

@ -19,6 +19,7 @@ import java.util.Objects;
import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Endian;
import ghidra.program.model.lang.Language;
import ghidra.program.model.pcode.PcodeOp;
@ -83,7 +84,7 @@ public enum TaintPcodeArithmetic implements PcodeArithmetic<TaintVec> {
* <p>
* We can't just naively return {@code in1}, because each unary op may mix the bytes of the
* operand a little differently. For {@link PcodeOp#COPY}, we can, since no mixing happens at
* all. This is also the case of both {@link NEGATE} operations ("negate" is a bit of a
* all. This is also the case of both {@code NEGATE} operations ("negate" is a bit of a
* misnomer, as they merely inverts the bits.) For {@link PcodeOp#INT_ZEXT}, we append empties
* to the correct end of the vector. Similarly, we replicate the most-significant element and
* append for {@link PcodeOp#INT_SEXT}. For {@link PcodeOp#INT_2COMP} (which negates an integer
@ -183,9 +184,9 @@ public enum TaintPcodeArithmetic implements PcodeArithmetic<TaintVec> {
* Here we handle indirect taint for indirect writes
*/
@Override
public TaintVec modBeforeStore(int sizeout, int sizeinAddress, TaintVec inAddress,
public TaintVec modBeforeStore(int sizeinOffset, AddressSpace space, TaintVec inOffset,
int sizeinValue, TaintVec inValue) {
return inValue.tagIndirectWrite(inAddress);
return inValue.tagIndirectWrite(inOffset);
}
/**
@ -195,9 +196,9 @@ public enum TaintPcodeArithmetic implements PcodeArithmetic<TaintVec> {
* Here we handle indirect taint for indirect reads
*/
@Override
public TaintVec modAfterLoad(int sizeout, int sizeinAddress, TaintVec inAddress,
public TaintVec modAfterLoad(int sizeinOffset, AddressSpace space, TaintVec inOffset,
int sizeinValue, TaintVec inValue) {
return inValue.tagIndirectRead(inAddress);
return inValue.tagIndirectRead(inOffset);
}
/**

View file

@ -39,7 +39,7 @@ public class PCodeDfgGraphTask extends Task {
private GraphDisplayBroker graphService;
protected HighFunction hfunction;
private AttributedGraph graph;
protected AttributedGraph graph;
private PluginTool tool;
public PCodeDfgGraphTask(PluginTool tool, GraphDisplayBroker graphService,

View file

@ -2,11 +2,6 @@ MODULE FILE LICENSE: lib/dex-ir-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/dex-reader-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/dex-reader-api-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/dex-translator-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/asm-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-analysis-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-commons-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-tree-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-util-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/baksmali-2.5.2.jar BSD-3-GRUVER
MODULE FILE LICENSE: lib/dexlib2-2.5.2.jar BSD-3-GRUVER
MODULE FILE LICENSE: lib/util-2.5.2.jar BSD-3-GRUVER

View file

@ -29,17 +29,13 @@ dependencies {
api project(':Base')
api project(':Recognizers')
api project(':PDB')
// Used by "Android DEX to JAR" file system
// dex2jar depends on asm-9.7.1, which is declared in Framework/Emulation
api 'de.femtopedia.dex2jar:dex-ir:2.4.24'
api 'de.femtopedia.dex2jar:dex-reader:2.4.24'
api 'de.femtopedia.dex2jar:dex-reader-api:2.4.24'
api 'de.femtopedia.dex2jar:dex-translator:2.4.24'
api 'org.ow2.asm:asm:9.7.1'
api 'org.ow2.asm:asm-analysis:9.7.1'
api 'org.ow2.asm:asm-commons:9.7.1'
api 'org.ow2.asm:asm-tree:9.7.1'
api 'org.ow2.asm:asm-util:9.7.1'
// Used by "Android DEX to SMALI" file system
api 'org.smali:baksmali:2.5.2' // requires guava-27.1-android or later

View file

@ -5,7 +5,6 @@
##MODULE IP: BSD-3-GRUVER
##MODULE IP: Copyright Distribution Permitted
##MODULE IP: Creative Commons Attribution 2.5
##MODULE IP: INRIA License
##MODULE IP: Jython License
##MODULE IP: LGPL 2.1
##MODULE IP: Public Domain

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,6 +16,7 @@
package ghidra.pcode.emu.sys;
import java.io.*;
import java.lang.reflect.Method;
import java.util.*;
import java.util.Map.Entry;
import java.util.stream.Collectors;
@ -132,8 +133,8 @@ public interface EmuSyscallLibrary<T> extends PcodeUseropLibrary<T> {
* Derive a syscall number to calling convention map by scraping functions in the program's
* "syscall" space.
*
* @param program
* @return
* @param program the program whose "syscall" space to scrape
* @return the map of syscall number to calling convention
*/
public static Map<Long, PrototypeModel> loadSyscallConventionMap(Program program) {
return loadSyscallFunctionMap(program).entrySet()
@ -169,6 +170,37 @@ public interface EmuSyscallLibrary<T> extends PcodeUseropLibrary<T> {
Varnode outVar, List<Varnode> inVars) {
syslib.syscall(executor, library);
}
@Override
public boolean isFunctional() {
return false;
}
@Override
public boolean hasSideEffects() {
return true;
}
@Override
public boolean canInlinePcode() {
return false;
}
@Override
public PcodeUseropLibrary<?> getDefiningLibrary() {
return syslib;
}
@Override
public Method getJavaMethod() {
try {
return syslib.getClass()
.getMethod("syscall", PcodeExecutor.class, PcodeUseropLibrary.class);
}
catch (NoSuchMethodException | SecurityException e) {
throw new AssertionError(e);
}
}
}
/**
@ -199,7 +231,7 @@ public interface EmuSyscallLibrary<T> extends PcodeUseropLibrary<T> {
*/
default PcodeUseropDefinition<T> getSyscallUserop() {
return new SyscallPcodeUseropDefinition<>(this);
};
}
/**
* Retrieve the desired system call number according to the emulated system's conventions

View file

@ -0,0 +1,5 @@
MODULE FILE LICENSE: lib/asm-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-analysis-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-commons-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-tree-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-util-9.7.1.jar INRIA License

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -26,4 +26,10 @@ dependencies {
api project(':SoftwareModeling')
api project(':Generic')
api project(':Utility')
api 'org.ow2.asm:asm:9.7.1'
api 'org.ow2.asm:asm-analysis:9.7.1'
api 'org.ow2.asm:asm-commons:9.7.1'
api 'org.ow2.asm:asm-tree:9.7.1'
api 'org.ow2.asm:asm-util:9.7.1'
}

View file

@ -1,4 +1,5 @@
##VERSION: 2.0
##MODULE IP: INRIA License
Module.manifest||GHIDRA||||END|
README.md||GHIDRA||||END|
src/test/resources/mock.cspec||GHIDRA||||END|

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -20,6 +20,7 @@ import generic.ULongSpan.ULongSpanSet;
import ghidra.app.emulator.memory.MemoryLoadImage;
import ghidra.app.emulator.state.RegisterState;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.util.PseudoInstruction;
import ghidra.lifecycle.Transitional;
import ghidra.pcode.emu.*;
import ghidra.pcode.emu.PcodeMachine.SwiMode;
@ -35,7 +36,6 @@ import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.util.Msg;
import ghidra.util.exception.CancelledException;
@ -134,7 +134,7 @@ public class AdaptedEmulator implements Emulator {
PcodeExecutorState<byte[]> sharedState) {
return new SleighInstructionDecoder(language, sharedState) {
@Override
public Instruction decodeInstruction(Address address, RegisterValue context) {
public PseudoInstruction decodeInstruction(Address address, RegisterValue context) {
try {
isDecoding = true;
return super.decodeInstruction(address, context);
@ -147,8 +147,7 @@ public class AdaptedEmulator implements Emulator {
}
}
record StateBacking(MemoryFaultHandler faultHandler, MemoryLoadImage loadImage) {
}
record StateBacking(MemoryFaultHandler faultHandler, MemoryLoadImage loadImage) {}
class AdaptedBytesPcodeExecutorState extends BytesPcodeExecutorState {
public AdaptedBytesPcodeExecutorState(Language language, StateBacking backing) {

View file

@ -114,6 +114,15 @@ public class SemisparseByteArray {
getData(loc, data, 0, data.length);
}
public synchronized byte[] getDirect(final long loc) {
long blockNum = Long.divideUnsigned(loc, BLOCK_SIZE);
int blockOffset = (int) Long.remainderUnsigned(loc, BLOCK_SIZE);
if (blockOffset != 0) {
throw new IllegalArgumentException("Offset must be at block boundary");
}
return blocks.computeIfAbsent(blockNum, n -> new byte[BLOCK_SIZE]);
}
/**
* Copy a range of data from the semisparse array into a portion of the given byte array
*

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -273,13 +273,8 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
return suspended;
}
/**
* Check for a p-code injection (override) at the given address
*
* @param address the address, usually the program counter
* @return the injected program, most likely {@code null}
*/
protected PcodeProgram getInject(Address address) {
@Override
public PcodeProgram getInject(Address address) {
return injects.get(address);
}

View file

@ -47,6 +47,8 @@ import ghidra.util.Msg;
* This class implements the control-flow logic of the target machine, cooperating with the p-code
* program flow implemented by the {@link PcodeExecutor}. This implementation exists primarily in
* {@link #beginInstructionOrInject()} and {@link #advanceAfterFinished()}.
*
* @param <T> the type of variables in the emulator
*/
public class DefaultPcodeThread<T> implements PcodeThread<T> {
@ -122,7 +124,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
*
* @see PcodeMachine#addBreakpoint(Address, String)
*/
@PcodeUserop
@PcodeUserop(functional = true)
public void emu_swi() {
thread.swi();
}
@ -136,7 +138,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
* calls to this p-code op. Then, only if and when an erroneous inject is encountered will
* the client be notified.
*/
@PcodeUserop
@PcodeUserop(functional = true)
public void emu_injection_err() {
throw new InjectionErrorPcodeExecutionException(null, null);
}
@ -148,6 +150,8 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
* <p>
* This executor checks for thread suspension and updates the program counter register upon
* execution of (external) branches.
*
* @param <T> the type of variables in the emulator
*/
public static class PcodeThreadExecutor<T> extends PcodeExecutor<T> {
volatile boolean suspended = false;
@ -192,7 +196,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
}
@Override
protected void branchToAddress(Address target) {
protected void branchToAddress(PcodeOp op, Address target) {
thread.branchToAddress(target);
}
@ -249,7 +253,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
this.arithmetic = machine.arithmetic;
PcodeExecutorState<T> sharedState = machine.getSharedState();
PcodeExecutorState<T> localState = machine.createLocalState(this);
this.state = new ThreadPcodeExecutorState<>(sharedState, localState);
this.state = createThreadState(sharedState, localState);
this.decoder = createInstructionDecoder(sharedState);
this.library = createUseropLibrary();
@ -269,6 +273,18 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
this.reInitialize();
}
/**
* A factory method for the thread's (multiplexed) state
*
* @param sharedState the shared part of the state
* @param localState the thread-local part of the state
* @return the complete state
*/
protected ThreadPcodeExecutorState<T> createThreadState(PcodeExecutorState<T> sharedState,
PcodeExecutorState<T> localState) {
return new ThreadPcodeExecutorState<>(sharedState, localState);
}
/**
* A factory method for the instruction decoder
*
@ -465,8 +481,9 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
}
}
protected RegisterValue getContextAfterCommits() {
public static RegisterValue getContextAfterCommits(Instruction instruction, long counter) {
PseudoInstruction pins = (PseudoInstruction) instruction;
Language language = instruction.getPrototype().getLanguage();
try {
SleighParserContext parserCtx = (SleighParserContext) pins.getParserContext();
var procCtx = new DisassemblerContextAdapter() {
@ -477,7 +494,8 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
if (!value.getRegister().isProcessorContext()) {
return;
}
if (!address.equals(counter)) {
if (address.getOffset() != counter &&
!Objects.equals(pins.getAddress(), address)) {
Msg.warn(this, "Context applied somewhere other than the counter.");
return;
}
@ -492,6 +510,10 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
}
}
protected RegisterValue getContextAfterCommits() {
return getContextAfterCommits(instruction, counter.getOffset());
}
/**
* Resolve a finished instruction, advancing the program counter if necessary
*/

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,7 +15,9 @@
*/
package ghidra.pcode.emu;
import ghidra.app.util.PseudoInstruction;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.Instruction;
@ -23,6 +25,13 @@ import ghidra.program.model.listing.Instruction;
* A means of decoding machine instructions from the bytes contained in the machine state
*/
public interface InstructionDecoder {
/**
* Get the language for this decoder
*
* @return the language
*/
Language getLanguage();
/**
* Decode the instruction starting at the given address using the given context
*
@ -33,7 +42,7 @@ public interface InstructionDecoder {
* @param context the disassembler/decode context
* @return the instruction
*/
Instruction decodeInstruction(Address address, RegisterValue context);
PseudoInstruction decodeInstruction(Address address, RegisterValue context);
/**
* Inform the decoder that the emulator thread just branched

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -46,6 +46,8 @@ import ghidra.util.Msg;
* TODO: "State modifiers" are a feature of the older {@link Emulator}. They are crudely
* incorporated into threads extended from this abstract class, so that they do not yet need to be
* ported to this emulator.
*
* @param <T> the type of variables in the emulator
*/
public class ModifiedPcodeThread<T> extends DefaultPcodeThread<T> {

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -234,7 +234,7 @@ public interface PcodeMachine<T> {
* <p>
* This will attempt to compile the given source against this machine's userop library and then
* inject it at the given address. The resulting p-code <em>replaces</em> that which would be
* executed by decoding the instruction at the given address. The means the machine will not
* executed by decoding the instruction at the given address. That means the machine will not
* decode, nor advance its counter, unless the Sleigh causes it. In most cases, the Sleigh will
* call {@link PcodeEmulationLibrary#emu_exec_decoded()} to cause the machine to decode and
* execute the overridden instruction.
@ -254,6 +254,14 @@ public interface PcodeMachine<T> {
*/
void inject(Address address, String source);
/**
* Check for a p-code injection (override) at the given address
*
* @param address the address, usually the program counter
* @return the injected program, most likely {@code null}
*/
PcodeProgram getInject(Address address);
/**
* Remove the inject, if present, at the given address
*

View file

@ -15,8 +15,6 @@
*/
package ghidra.pcode.emu;
import java.util.Objects;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emulate.InstructionDecodeException;
import ghidra.pcode.exec.DecodePcodeExecutionException;
@ -58,6 +56,7 @@ public class SleighInstructionDecoder implements InstructionDecoder {
/**
* Construct a Sleigh instruction decoder
*
* @see DefaultPcodeThread#createInstructionDecoder(PcodeExecutorState)
* @param language the language to decoder
* @param state the state containing the target program, probably the shared state of the p-code
* machine. It must be possible to obtain concrete buffers on this state.
@ -75,6 +74,11 @@ public class SleighInstructionDecoder implements InstructionDecoder {
Disassembler.getDisassembler(language, addrFactory, TaskMonitor.DUMMY, listener);
}
@Override
public Language getLanguage() {
return language;
}
protected boolean useCachedInstruction(Address address, RegisterValue context) {
if (block == null) {
return false;
@ -100,7 +104,7 @@ public class SleighInstructionDecoder implements InstructionDecoder {
}
@Override
public Instruction decodeInstruction(Address address, RegisterValue context) {
public PseudoInstruction decodeInstruction(Address address, RegisterValue context) {
lastMsg = DEFAULT_ERROR;
if (!useCachedInstruction(address, context)) {
parseNewBlock(address, context);
@ -116,7 +120,7 @@ public class SleighInstructionDecoder implements InstructionDecoder {
* However, if the cached instruction's context does not match the desired one, assume we're
* starting a new block. That check will have to wait for the decode call, though.
*/
if (block.getInstructionAt(address) == null) {
if (block == null || block.getInstructionAt(address) == null) {
block = null;
}
}

View file

@ -149,8 +149,8 @@ public class ThreadPcodeExecutorState<T> implements PcodeExecutorState<T> {
*
* <p>
* This will only clear the thread's local state, lest we invoke clear on the shared state for
* every thread. Instead, if necessary, the machine should clear its local state then clear each
* thread's local state.
* every thread. Instead, if necessary, the machine should clear its shared state then clear
* each thread's local state.
*/
@Override
public void clear() {

View file

@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.exec.PcodeExecutorState;
import ghidra.program.model.address.AddressSpace;
/**
* The run-time executor state for the JIT-accelerated p-code emulator
*
* @see JitDefaultBytesPcodeExecutorState
* @see JitBytesPcodeExecutorStatePiece
* @see JitBytesPcodeExecutorStateSpace
*/
public interface JitBytesPcodeExecutorState extends PcodeExecutorState<byte[]> {
/**
* For generated code to side-step the space lookup
*
* @param space the address space
* @return the state space
*/
JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space);
}

View file

@ -0,0 +1,131 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.exec.AbstractBytesPcodeExecutorStatePiece;
import ghidra.pcode.exec.BytesPcodeExecutorStateSpace;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
import ghidra.program.model.pcode.PcodeOp;
/**
* The state piece for {@link JitDefaultBytesPcodeExecutorState}
*
* <p>
* This provides access to the internals so that translated passages can pre-fetch certain objects
* to optimize state accesses.
*/
public class JitBytesPcodeExecutorStatePiece
extends AbstractBytesPcodeExecutorStatePiece<JitBytesPcodeExecutorStateSpace> {
/**
* An object to manage state for a specific {@link AddressSpace}
*/
public class JitBytesPcodeExecutorStateSpace extends BytesPcodeExecutorStateSpace<Void> {
/**
* Construct a state space
*
* @param language the emulation target language
* @param space the address space
* @param backing any extra read-through backing (not used)
*/
public JitBytesPcodeExecutorStateSpace(Language language, AddressSpace space,
Void backing) {
super(language, space, backing);
}
/**
* Pre-fetch the byte array for the block (page) containing the given offset
*
* <p>
* A translated passage is likely to call this several times in its constructor to pre-fetch
* the byte arrays for variables (ram, register, and unique) that it accesses directly,
* i.e., with a fixed offset. The generated code will then access the byte array directly to
* read and write the variable values in the emulator's state.
*
* @param offset the {@link Address#getOffset() offset} within this address space.
* @return the byte array for the containing block
*/
public byte[] getDirect(long offset) {
return bytes.getDirect(offset);
}
/**
* Read a variable from this (pre-fetched) state space
*
* <p>
* A translated passage is likely to call
* {@link JitBytesPcodeExecutorStatePiece#getForSpace(AddressSpace, boolean)} once or twice
* in its constructor to pre-fetch the per-space backing of any indirect memory variables
* that it accesses, i.e., variables with a dynamic offset. These are usually required for
* {@link PcodeOp#LOAD} and {@link PcodeOp#STORE} ops. The generated code will then invoke
* this method (and {@link #write(long, byte[], int, int) write}) passing in the offset to
* access variables in the emulator's state at runtime.
*
* @param offset the offset (known at runtime)
* @param size the size of the variable
* @return the value of the variable as a byte array
*/
public byte[] read(long offset, int size) {
return read(offset, size, Reason.EXECUTE_READ);
}
}
/**
* A state space map that creates a {@link JitBytesPcodeExecutorStateSpace} for each needed
* {@link AddressSpace}
*/
class JitBytesSpaceMap extends SimpleSpaceMap<JitBytesPcodeExecutorStateSpace> {
@Override
protected JitBytesPcodeExecutorStateSpace newSpace(AddressSpace space) {
return new JitBytesPcodeExecutorStateSpace(language, space, null);
}
}
/**
* Construct a state piece
*
* @param language the emulation target language
*/
public JitBytesPcodeExecutorStatePiece(Language language) {
super(language);
}
@Override
protected AbstractSpaceMap<JitBytesPcodeExecutorStateSpace> newSpaceMap() {
return new JitBytesSpaceMap();
}
@Override
public void clear() {
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*
* <p>
* Overridden to grant public access. The JIT-generated constructors will need to invoke this
* method.
*/
@Override
public JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space, boolean toWrite) {
return super.getForSpace(space, toWrite);
}
}

View file

@ -0,0 +1,276 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodHandles.Lookup;
import java.util.EnumSet;
import org.objectweb.asm.ClassWriter;
import ghidra.pcode.emu.jit.analysis.*;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassageClass;
import ghidra.pcode.exec.PcodeExecutorState;
/**
* The Just-in-Time (JIT) translation engine that powers the {@link JitPcodeEmulator}.
*
* <p>
* This is the translation engine from "any" machine language into JVM bytecode. The same caveats
* that apply to interpretation-based p-code emulation apply to JIT-accelerated emulation: Ghidra
* must have a Sleigh specification for the emulation target language, there must be userop
* libraries (built-in or user-provided) defining any userops encountered during the course of
* execution, all dependent code must be loaded or stubbed out, etc.
*
* <p>
* A passage is decoded at a desired entry point using the {@link JitPassageDecoder}. This compiler
* then translates the passage into bytecode. It will produce a classfile which is then loaded and
* returned to the emulator (or other client). The provided class will have three principal methods,
* not counting getters: 1) The class initializer, which initializes static fields; 2) The
* constructor, which takes a thread and initializes instance fields, and 3) The
* {@link JitCompiledPassage#run(int) run} method, which comprises the actual translation. A static
* field {@code ENTRIES} describes each entry point generated by the compiler. To execute the
* passage starting at a given entry point, the emulation thread must retrieve the index of the
* appropriate entry (i.e., address and contextreg value), instantiate the class, and then invoke
* the run method, passing it the entry index. The translated passage will read variables from the
* thread's {@link JitBytesPcodeExecutorState state} as needed, perform the equivalent operations as
* expressed in the source p-code, and then write the resulting variables back into the state.
* Memory variables are treated similarly, but without scope-based optimizations. In this manner,
* execution of the translated passage produces exactly the same effect on the emulation state as
* interpretation of the same p-code passage. The run method returns the next entry point to execute
* or {@code null} when the emulator must look up the next entry point.
*
* <p>
* Translation of a passage takes place in distinct phases. See each respective class for details of
* its design and implementation:
*
* <ol>
* <li>Control Flow Analysis: {@link JitControlFlowModel}</li>
* <li>Data Flow Analysis: {@link JitDataFlowModel}</li>
* <li>Variable Scope Analysis: {@link JitVarScopeModel}</li>
* <li>Type Assignment: {@link JitTypeModel}</li>
* <li>Variable Allocation: {@link JitAllocationModel}</li>
* <li>Operation Elimination: {@link JitOpUseModel}</li>
* <li>Code Generation: {@link JitCodeGenerator}</li>
* </ol>
*
* <h2>Control Flow Analysis</h2>
* <p>
* Some rudimentary control flow analysis is performed during decode, but the output of decode is a
* passage, i.e., collection of <em>strides</em>, not basic blocks. The control flow analysis breaks
* each stride down into basic blocks at the p-code level. Note that a single instruction's pcode
* (as well as any user instrumentation on that instruction's address) may have complex control
* flow. Additionally, branches that leave an instruction preclude execution of its remaining
* p-code. Thus, p-code basic blocks do not coincide precisely with instruction-level basic blocks.
* See {@link JitControlFlowModel}.
*
* <h2>Data Flow Analysis</h2>
* <p>
* Most every following step consumes the control flow analysis. Data flow analysis interprets each
* basic block independently using an abstraction that produces a use-def graph. A varnode that is
* read before it is written produces a "missing" variable. Those missing variables are converted to
* <em>phi</em> nodes and later resolved during inter-block analysis. The graph is also able to
* consider aliasing, partial accesses, overlapping accesses, etc., by synthesizing operations to
* model those effects. See {@link JitDataFlowModel}.
*
* <h2>Variable Scope Analysis</h2>
* <p>
* Because accessing {@link PcodeExecutorState} is expensive (relative to accessing a JVM local
* variable), the translation seeks to minimize such accesses. This is generally not recommended for
* memory accesses, as there is no telling in multi-threaded applications whether a given memory
* variable is shared/volatile or not. However, for registers and uniques, we can allocate the
* variables as JVM locals. Then we only "birth" them (read them in) when they come into scope and
* "retire" them (write them out) when they leave scope. This analyzer determines which variables
* are in scope (alive) in which basic blocks. See {@link JitVarScopeModel}.
*
* <h2>Type Assignment</h2>
* <p>
* For those variables we allocate as JVM locals, we have to choose a type, because the JVM requires
* it. We have essentially 4 to choose from. (Though we could also choose a <em>reference</em> type,
* depending on the strategy we eventually choose for multi-precision arithmetic.) Those four are
* the JVM primitives: int, float, long, and double. For those more familiar with Java but not the
* JVM, the smaller integral primitives are all represented by JVM ints. The JVM does not permit
* type confusion, e.g., the application of float addition {@code FADD} to int variables. However,
* the emulation target may permit type confusion. (Those familiar with the constant 0x5f759df may
* appreciate intentional type confusion.) When this happens, we must explicitly convert by calling,
* e.g., {@link Float#floatToRawIntBits(float)}, which is essentially just a bit cast. Nevertheless,
* we seek to reduce the number of such calls we encode into the translation. See
* {@link JitTypeModel}.
*
* <h2>Variable Allocation</h2>
* <p>
* Once we've decided the type of each use-def variable node, we allocate JVM locals and assign
* their types accordingly. To keep things simple and fast, we just allocate variables by varnode.
* Partial/overlapping accesses are coalesced to the containing varnode and cause the type to be a
* JVM int (to facilitate shifting and masking). Otherwise, types are assigned according to the most
* common use of the varnode, i.e., by taking a vote among the use-def variable nodes sharing that
* varnode. See {@link JitAllocationModel}.
*
* <h2>Operation Elimination</h2>
* <p>
* Each instruction typically produces several p-code ops, the outputs of which may not actually be
* used by any subsequent op. This analysis seeks to identify such p-code ops and remove them. Since
* many ISAs employ "flags," which are set by nearly every arithmetic instruction, such ops are
* incredibly common. Worse yet, their computation is very expensive, because the JVM does not have
* comparable flag registers, nor does it provide opcodes for producing comparable values. We have
* to emit the bit banging operations ourselves. Thus, performing this elimination stands to improve
* execution speed significantly. However, eliminating these operations may lead to confusing
* results if execution is interrupted and the state inspected by a user. The effects of the
* eliminated operations will be missing. Even though they do not (or should not) matter, the user
* may expect to see them. Thus, this step can be toggled by
* {@link JitConfiguration#removeUnusedOperations()}. See {@link JitOpUseModel}.
*
* <h2>Code Generation</h2>
* <p>
* For simplicity, we seek to generate JVM bytecode in the same order as the source p-code ops.
* There are several details given the optimizations informed by all the preceding analysis. For
* example, the transfer of control to the requested entry point, the placement of variable birth
* and retirement on control flow edges (including fall-through).... We take an object-oriented
* approach to the translation of each p-code op, the handling of each variable's allocation and
* access, the conversion of types, etc. This phase outputs the final classfile bytes, which are
* then loaded as a hidden class. See {@link JitCodeGenerator}.
*
* @implNote There are static fields in this class for configuring diagnostics. They are meant to be
* modified only temporarily by developers seeking to debug issues in the translation
* engine.
*/
public class JitCompiler {
/**
* Diagnostic toggles
*/
public enum Diag {
/** Print each passage (instructions and p-code ops) before translation */
PRINT_PASSAGE,
/** Print the contents (p-code) of each basic block and flows/branches among them */
PRINT_CFM,
/** Print the ops of each basic block in SSA (sort of) form */
PRINT_DFM,
/** Print the list of live variables for each basic block */
PRINT_VSM,
/** Print each synthetic operation, e.g., catenation, subpiece, phi */
PRINT_SYNTH,
/** Print each eliminated op */
PRINT_OUM,
/** Enable ASM's trace for each generated classfile */
TRACE_CLASS,
/** Save the generated {@code .class} file to disk for offline examination */
DUMP_CLASS;
}
/**
* The set of enabled diagnostic toggles.
*
* <p>
* In production, this should be empty.
*/
public static final EnumSet<Diag> ENABLE_DIAGNOSTICS = EnumSet.noneOf(Diag.class);
/**
* Exclude a given address offset from ASM's {@link ClassWriter#COMPUTE_MAXS} and
* {@link ClassWriter#COMPUTE_FRAMES}.
*
* <p>
* Unfortunately, when automatic computation of frames and maxes fails, the ASM library offers
* little in terms of diagnostics. It usually crashes with an NPE or an AIOOBE. Worse, when this
* happens, it fails to output any of the classfile trace. To help with this, a developer may
* identify the address of the passage seed that causes such a failure and set this variable to
* its offset. This will prevent ASM from attempting this computation so that it at least prints
* the trace and dumps out the classfile to disk (if those {@link Diag}nostics are enabled).
*
* <p>
* Once the trace/classfile is obtained, set this back to -1 and then apply debug prints in the
* crashing method. Since it's probably in the ASM library, you'll need to use your IDE /
* debugger to inject those prints. The way to do this in Eclipse is to set a "conditional
* breakpoint" then have the condition print the value and return false, so that execution
* continues. Sadly, this will still slow execution down considerably, so you'll want to set
* some other conditional breakpoint to catch when the troublesome passage is being translated.
* Probably the most helpful thing to print is the bytecode offset of each basic block ASM is
* processing as it computes the frames. Once it crashes, look at the last couple of bytecode
* offsets in the dumped classfile.
*/
public static final long EXCLUDE_MAXS = -1L;
/**
* The JIT emulator's configuration
*/
private final JitConfiguration config;
/**
* Construct a p-code to bytecode translator.
*
* <p>
* In general, this should only be used by the JIT emulator and its test suite.
*
* @param config the configuration
*/
public JitCompiler(JitConfiguration config) {
this.config = config;
}
/**
* Translate a passage using the given lookup
*
* @param lookup a lookup that can access everything the passage may need, e.g., userop
* libraries. Likely, this should come from the emulator, which may be in a script.
* If you are unsure what to use here, use {@link MethodHandles#lookup()}. If you see
* errors about accessing stuff during the compilation, ensure everything the
* emulator needs is accessible from the method calling
* {@link MethodHandles#lookup()}.
* @param passage the decoded passage to compile
* @return the compiled class, not instantiated for any particular thread
*/
public JitCompiledPassageClass compilePassage(Lookup lookup, JitPassage passage) {
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_PASSAGE)) {
System.err.println(passage);
}
JitAnalysisContext context = new JitAnalysisContext(config, passage);
JitControlFlowModel cfm = new JitControlFlowModel(context);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_CFM)) {
cfm.dumpResult();
}
JitDataFlowModel dfm = new JitDataFlowModel(context, cfm);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_DFM)) {
dfm.dumpResult();
}
JitVarScopeModel vsm = new JitVarScopeModel(cfm, dfm);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_VSM)) {
vsm.dumpResult();
}
JitTypeModel tm = new JitTypeModel(dfm);
JitAllocationModel am = new JitAllocationModel(context, dfm, vsm, tm);
JitOpUseModel oum = new JitOpUseModel(context, cfm, dfm, vsm);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_SYNTH)) {
dfm.dumpSynth();
}
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_OUM)) {
oum.dumpResult();
}
JitCodeGenerator gen = new JitCodeGenerator(lookup, context, cfm, dfm, vsm, tm, am, oum);
return gen.load();
}
/**
* Get this compiler's configuration
*
* @return the configuration
*/
public JitConfiguration getConfiguration() {
return config;
}
}

View file

@ -0,0 +1,52 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
/**
* The configuration for a JIT-accelerated emulator.
*
* @param maxPassageInstructions The (soft) maximum number of instructions to decode per translated
* passage. A passage can consist of several control-flow connected basic blocks. The
* decoder will decode contiguous streams of instructions with fall-through (called
* <em>strides</em>), adding seeds where it encounters branches. It will not stop
* mid-stride, but checks the instruction count before proceeding to another seed. If it
* exceeds the max, it stops.
* @param maxPassageOps The (soft) maximum number of p-code ops. This is similar to
* {@link #maxPassageInstructions}, but limits the number of p-code ops generated.
* <b>NOTE:</b> The JVM limits each method to 65,535 total bytes of bytecode. If this
* limit is exceeded, the ASM library throws an exception. When this happens, the
* compiler will retry the whole process, but with this configuration parameter halved.
* @param maxPassageStrides The maximum number of strides to include.
* @param removeUnusedOperations Some p-code ops produce outputs that are never used later. One
* common case is flags computed from arithmetic operations. If this option is enabled,
* the JIT compiler will remove those p-code ops.
* @param emitCounters Causes the translator to emit a call to
* {@link JitPcodeThread#count(int, int)} at the start of each basic block.
*/
public record JitConfiguration(
int maxPassageInstructions,
int maxPassageOps,
int maxPassageStrides,
boolean removeUnusedOperations,
boolean emitCounters) {
/**
* Construct a default configuration
*/
public JitConfiguration() {
this(1000, 5000, 10, true, true);
}
}

View file

@ -0,0 +1,66 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState;
import ghidra.pcode.exec.BytesPcodeArithmetic;
import ghidra.pcode.exec.DefaultPcodeExecutorState;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
/**
* The default implementation of {@link JitBytesPcodeExecutorState}.
*
* <p>
* <b>NOTE</b>: This is distinct from {@link JitDataFlowState}, which is used during the
* interpretation and analysis of the passage to translate. This state, in contrast, is the concrete
* state of the emulation target, but accessible in special ways to the translation output. In
* particular, the constructor of each translation is permitted direct access to some of this
* state's internals, so that it can pre-fetch, e.g., backing arrays for direct memory access
* operations.
*
* <p>
* This is just an extension of {@link DefaultPcodeExecutorState} that wraps the corresponding
* {@link JitBytesPcodeExecutorStatePiece}.
*/
public class JitDefaultBytesPcodeExecutorState extends DefaultPcodeExecutorState<byte[]>
implements JitBytesPcodeExecutorState {
/**
* Construct a new state for the given language
*
* @param language the emulation target language
*/
public JitDefaultBytesPcodeExecutorState(Language language) {
super(new JitBytesPcodeExecutorStatePiece(language),
BytesPcodeArithmetic.forLanguage(language));
}
/**
* Get the piece cast to the type we know it is
*
* @return the piece
*/
protected JitBytesPcodeExecutorStatePiece getPiece() {
return (JitBytesPcodeExecutorStatePiece) this.piece;
}
@Override
public JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space) {
return getPiece().getForSpace(space, true);
}
}

View file

@ -0,0 +1,119 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.lang.reflect.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.reflect.TypeLiteral;
import org.objectweb.asm.ClassVisitor;
/**
* Some utilities for generating type signatures, suitable for use with
* {@link ClassVisitor#visitField(int, String, String, String, Object)}.
*
* <p>
* <b>WARNING:</b> It seems to me, the internal representation of signatures as accepted by the ASM
* API is not fixed from version to version. In the future, these utilities may need to be updated
* to work with multiple versions, if the representation changes in a newer classfile format.
* Hopefully, the upcoming classfile API will obviate the need for any of this.
*/
public enum JitJvmTypeUtils {
;
/**
* Get the internal name of a class as in {@link org.objectweb.asm.Type#getInternalName(Class)}.
*
* @param cls the class
* @return the internal name
*/
public static String classToInternalName(Class<?> cls) {
return org.objectweb.asm.Type.getInternalName(cls);
}
/**
* Presume the given type is a {@link Class} and get its internal name
*
* @param type the type
* @return the internal name
*/
public static String rawToInternalName(Type type) {
return classToInternalName((Class<?>) type);
}
/**
* Get the signature of the given wildcard type
*
* <ul>
* <li>{@code sig(?) = *}</li>
* <li>{@code sig(? super MyType) = -sig(MyType)}</li>
* <li>{@code sig(? extends MyType) = +sig(MyType)}</li>
* </ul>
*
* @param wt the type
* @return the signature
*/
public static String wildToSignature(WildcardType wt) {
Type lower = wt.getLowerBounds().length == 0 ? null : wt.getLowerBounds()[0];
Type upper = wt.getUpperBounds()[0];
if (lower == null && upper == Object.class) {
return "*";
}
if (lower == null) {
return "+" + typeToSignature(upper);
}
if (upper == Object.class) {
return "-" + typeToSignature(lower);
}
throw new UnsupportedOperationException();
}
/**
* Get the signature of the given type
*
* <p>
* For the use case this supports, probably the best way to obtain a {@link Type} is via
* {@link TypeLiteral}.
*
* <p>
* As of the JVM 21, internal type signatures are derived as:
*
* <ul>
* <li>{@code sig(my.MyType) = Lmy/MyType.class;}</li>
* <li>{@code sig(my.MyType[]) = [sig(my.MyType)}</li>
* <li>{@code sig(my.MyType<Yet, Another, ...>) = Lmy/MyType<sig(Yet), sig(Another), ...>;}</li>
* <li>Wildcard types as in {@link #wildToSignature(WildcardType)}</li>
* <li>Type variables are not supported by these utilities</li>
* </ul>
*
* @param type the type
* @return the signature
*/
public static String typeToSignature(Type type) {
return switch (type) {
case Class<?> cls -> "L" + classToInternalName(cls) + ";";
case GenericArrayType arr -> "[" + typeToSignature(arr.getGenericComponentType());
case ParameterizedType pt -> "L" + rawToInternalName(pt.getRawType()) + "<" +
Stream.of(pt.getActualTypeArguments())
.map(a -> typeToSignature(a))
.collect(Collectors.joining(",")) +
">;";
case WildcardType wt -> wildToSignature(wt);
default -> throw new UnsupportedOperationException();
};
}
}

View file

@ -0,0 +1,849 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.math.BigInteger;
import java.util.*;
import java.util.stream.Collectors;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockSplitter;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitDataFlowModel;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.op.OpGen;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOverflowException;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.ContextChangeException;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.mem.ByteMemBufferImpl;
import ghidra.program.model.pcode.*;
import ghidra.program.util.ProgramContextImpl;
/**
* A selection of instructions decoded from an emulation target, the generated p-code ops, and
* associated metadata.
*
* <p>
* Note that the generated p-code ops include those injected by the emulator's client using
* {@link PcodeMachine#inject(Address, String)} and {@link PcodeThread#inject(Address, String)},
* which also includes breakpoints, i.e, {@link PcodeMachine#addBreakpoint(Address, String)}.
*
* @see JitPassageDecoder Passage decoding
*/
public class JitPassage extends PcodeProgram {
/**
* Check if a given p-code op could fall through
*
* <p>
* Conditional branches and non-branching ops are the only ones that can fall through. Note that
* for JIT purposes, a {@link PcodeOp#CALL CALL} op <em>does not</em> fall through! For
* decompilation, it hints that it's branching to a subroutine that <em>usually</em> returns
* back to the caller, but the JIT compiler does not take that hint. 1) There's no guarantee it
* will actually return. 2) Even if it did, it would be via a {@link PcodeOp#RETURN}, which is
* an <em>indirect</em> branch. An indirect branch is not sufficient to join two strides in the
* same passage. Thus, we have little to gain by falling through a call, and the more likely
* outcome is the JIT and/or ASM library will eliminate the code following the call.
*
* @param op the op to consider
* @return true if the op does or could fall through
*/
public static boolean hasFallthrough(PcodeOp op) {
if (op instanceof NopPcodeOp) {
return true;
}
return switch (op.getOpcode()) {
case PcodeOp.BRANCH, PcodeOp.BRANCHIND -> false;
case PcodeOp.CALL, PcodeOp.CALLIND, PcodeOp.RETURN -> false;
case PcodeOp.UNIMPLEMENTED -> false;
case PcodeOp.CBRANCH -> true;
default -> true;
};
}
/**
* An address-context pair
*
* <p>
* Because decode is sensitive to the contextreg value, we have to consider that visiting the
* same address with a different context could produce a completely different stride. Thus, we
* subsume the context value in a sense as part of the address when seeding the passage decoder,
* when referring to the "location" of p-code ops, when exiting a translated passage, etc.
*/
public static final class AddrCtx implements Comparable<AddrCtx> {
/**
* An address-context pair for synthetic p-code ops
*
* <p>
* This is currently used in probing an instruction (possibly instrumented) for fall
* through, and in testing.
*/
public static final AddrCtx NOWHERE = new AddrCtx(null, Address.NO_ADDRESS);
/**
* Derive the address-context pair from an instruction's context
*
* @param insCtx the context
* @return the address and input decode context of the instruction whose context was given
*/
public static AddrCtx fromInstructionContext(InstructionContext insCtx) {
return new AddrCtx(getInCtx(insCtx), insCtx.getAddress());
}
/**
* Derive the address-context pair from an instruction
*
* @param instruction the instruction
* @return the instruction's address and input decode context
*/
public static AddrCtx fromInstruction(Instruction instruction) {
return fromInstructionContext(instruction.getInstructionContext());
}
/**
* The contextreg value as a big integer
*
* <p>
* This is 0 when the language does not have a context register
*/
public final BigInteger biCtx;
/**
* The contextreg as a register value
*
* <p>
* This is {@code null} when the language does not have a context register
*/
public final RegisterValue rvCtx;
/**
* The address
*/
public final Address address;
/**
* Construct an address-context pair
*
* @param ctx the contextreg value
* @param address the address
*/
public AddrCtx(RegisterValue ctx, Address address) {
this.biCtx = ctx == null ? BigInteger.ZERO : ctx.getUnsignedValue();
this.rvCtx = ctx;
this.address = Objects.requireNonNull(address);
}
@Override
public String toString() {
return "AddrCtx[ctx=%s,addr=%s]".formatted(rvCtx, address);
}
@Override
public int hashCode() {
return Objects.hash(biCtx, address);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof AddrCtx that)) {
return false;
}
return this.biCtx.equals(that.biCtx) &&
this.address.equals(that.address);
}
@Override
public int compareTo(AddrCtx that) {
int c;
c = this.biCtx.compareTo(that.biCtx);
if (c != 0) {
return c;
}
c = this.address.compareTo(that.address);
if (c != 0) {
return c;
}
return 0;
}
}
/**
* Derive the decode context value from the given instruction context
*
* @param insCtx the context
* @return the input decode context from the instruction whose context was given
*/
protected static RegisterValue getInCtx(InstructionContext insCtx) {
ProcessorContextView procCtx = insCtx.getProcessorContext();
Register contextreg = procCtx.getBaseContextRegister();
if (contextreg == Register.NO_CONTEXT) {
return null;
}
return procCtx.getRegisterValue(contextreg);
}
/**
* Derive the decode context value from the given instruction
*
* @param instruction the instruction
* @return the input decode context from the instruction
*/
protected static RegisterValue getInCtx(Instruction instruction) {
return getInCtx(instruction.getInstructionContext());
}
/**
* A branch in the p-code
*/
public interface Branch {
/**
* The op performing the branch
*
* @return the "from" op
*/
PcodeOp from();
/**
* Indicates whether this branch represents a fall-through case.
*
* <p>
* Note that the {@link #from()} may not be an actual branching p-code op when
* {@code isFall} is true. A "fall-through" branch happens in two cases. First, and most
* obvious, is to describe the fall-through case of a {@link PcodeOp#CBRANCH conditional
* branch}. Second is when for a p-code op the immediately precedes the target of some other
* branch. That branch causes a split in basic blocks, and so to encode the fall through
* from that op into the basic block immediately after, a fall-through branch is added.
*
* @return true if this branch is the fall-through case.
*/
default boolean isFall() {
return false;
}
/**
* Get a string description of the branch target
*
* @return the description
*/
default String describeTo() {
return toString();
}
}
/**
* A branch to another p-code op in the same passage
*
* <p>
* The {@link JitCodeGenerator} translates internal branches into JVM bytecodes for the
* equivalent branch to the translation of the target p-code op. Thus, we remain executing
* inside the {@link JitCompiledPassage#run(int) run} method. This branch type incurs the least
* run-time cost.
*
* @param from see {@link #from()}
* @param to the target p-code op
* @param isFall see {@link #isFall()}
*/
public record IntBranch(PcodeOp from, PcodeOp to, boolean isFall) implements Branch {}
/**
* A branch to an address (and context value) not in the same passage
*
* <p>
* When execution encounters this branch, the {@link JitCompiledPassage#run(int) run} method
* sets the emulator's program counter and context to the {@link #to() branch target} and
* returns the appropriate entry point for further execution.
*
* Note that this branch type is used by the decoder to track queued decode seeds as well.
* External branches that get decoded are changed into internal branches.
*
* @param from see {@link #from()}
* @param to the target address-context pair
*/
public record ExtBranch(PcodeOp from, AddrCtx to) implements Branch {}
/**
* A branch to a dynamic address
*
* <p>
* When execution encounters this branch, the {@link JitCompiledPassage#run(int) run} method
* will set the emulator's program counter to the computed address and its context to
* {@link #flowCtx()}, then return the appropriate entry point for further execution.
*
* <p>
* TODO: Some analysis may be possible to narrow the possible addresses to a known few and then
* treat this as several {@link IntBranch}es; however, I worry this is too expensive for what it
* gets us. This will be necessary if we are to JIT, e.g., a switch table.
*
* @param from see {@link #from()}
* @param flowCtx the decode context after the branch is taken
*/
public record IndBranch(PcodeOp from, RegisterValue flowCtx) implements Branch {}
/**
* A "branch" representing an error
*
* <p>
* When execution encounters this branch, the {@link JitCompiledPassage#run(int) run} method
* throws an exception. This branch is used to encode error conditions that may not actually be
* encountered at run time. Some cases are:
*
* <ul>
* <li>An instruction decode error &mdash; synthesized as a {@link DecodeErrorPcodeOp}</li>
* <li>An {@link PcodeOp#UNIMPLEMENTED unimplemented} instruction</li>
* <li>A {@link PcodeOp#CALLOTHER call} to an undefined userop</li>
* </ul>
*
* <p>
* The decoder and translator may encounter such an error, but unless execution actually reaches
* the error, the emulator need not crash. Thus, we note the error and generate code that will
* actually throw it in the translation, only if it's actually encountered.
*
* <p>
* Note that the {@link OpGen} for the specific p-code op generating the error will decide what
* exception type to throw.
*
* @param from see {@link #from()}
* @param message the error message for the exception
*/
public record ErrBranch(PcodeOp from, String message) implements Branch {}
/**
* An extension of {@link PcodeOp} that carries along with it the address and decode context
* where it occurred.
*
* <p>
* There is a difference between {@link #at}'s {@link AddrCtx#address address} vs.
* {@link #getSeqnum() seqnum}'s {@link SequenceNumber#getTarget() target}. The former is
* determined by the {@link JitPassageDecoder} and applied to all p-code ops generated at that
* address (and context value), including those from injected Sleigh. The latter is determined
* by the {@link Instruction} (or injected {@link PcodeProgram}), which have less information
* about their origins. There are also {@link DecodeErrorPcodeOp} and {@link NopPcodeOp}, which
* are synthesized by the {@link JitPassageDecoder} without an instruction or inject. This
* information is required for bookkeeping, esp., when updating the emulator's program counter
* and decode context when a p-code op produces an unexpected run-time error.
*/
public static class DecodedPcodeOp extends PcodeOp {
private final AddrCtx at;
/**
* Construct a new p-code op, decoded by the {@link JitPassageDecoder}
*
* @param at the address and context value where the op was produced
* @param seqnum the p-code op sequence number
* @param opcode the p-code opcode
* @param inputs the input varnodes
* @param output the output varnode, or {@link null} if none or not applicable
*/
DecodedPcodeOp(AddrCtx at, SequenceNumber seqnum, int opcode, Varnode[] inputs,
Varnode output) {
super(seqnum, opcode, inputs, output);
this.at = at;
}
/**
* Re-write a p-code op including its address and context value
*
* <p>
* Aside from {@link #at}, everything is copied from the given original p-code op.
*
* @param at the address and context value where the op was produced
* @param original the original p-code op
*/
public DecodedPcodeOp(AddrCtx at, PcodeOp original) {
this(at, original.getSeqnum(), original.getOpcode(), original.getInputs(),
original.getOutput());
}
/**
* Get the address and context value where this op was produced
*
* @return the address-context pair
*/
public AddrCtx getAt() {
return at;
}
/**
* Get the address where this op was produced
*
* @return the address
*/
public Address getCounter() {
return at.address;
}
/**
* Get the decode context where this op was produced
*
* @return the decode context
*/
public RegisterValue getContext() {
return at.rvCtx;
}
/**
* Check if this op represents the start of an instruction
*
* <p>
* If this p-code op was produced by an inject, this will return false! It only returns true
* for an op that is genuinely the first op in the result of {@link Instruction#getPcode()}.
* <b>WARNING:</b> This should <em>not</em> be used for branching purposes, because branches
* to a given address are meant to target any injections there, too. Currently, this is used
* only to count the number of instructions actually executed.
*
* @see JitBlock#instructionCount()
* @see JitCompiledPassage#count(int, int)
* @see JitPcodeThread#count(int, int)
* @return true if this op is the first of an instruction
*/
public boolean isInstructionStart() {
SequenceNumber seq = getSeqnum();
return seq.getTime() == 0 && seq.getTarget().equals(at.address);
}
}
/**
* A synthetic p-code op that represents a return from the {@link JitCompiledPassage#run(int)}
* method.
*
* <p>
* When execution encounters this op (and the corresponding {@link ExtBranch}), the emulator's
* program counter and context values are set to the {@link ExtBranch#to() branch target}, and
* the appropriate entry point is returned.
*
* <p>
* This is used in a few ways: The simplest, though perhaps not obvious, way is when the decoder
* encounters an existing entry point. We avoid re-translating the same instructions by forcing
* the stride to end. However, the last instruction in that stride would have fall through,
* causing dangling control flow. To mitigate that, we append a synthetic exit op to return the
* existing entry point. The emulator can then resume execution accordingly.
*
* <p>
* The next is even less obvious. When the emulation client (or user) injects Sleigh, a common
* mistake is to forget control flow. The decoder detects this when "falling through" does not
* actually advance the program counter. In this case, we append this synthetic op to exit the
* translated passage. While it still results in an endless loop (just like the
* interpretation-based emulator), it's easier to interrupt and diagnose when we exit the
* translation between each "iteration."
*
* <p>
* The last is a small hack: The decoder needs to know whether each instruction (possibly
* instrumented by an inject) falls through. To do this, it appends an exit op to the very end
* of the instruction's (and inject's) ops and performs rudimentary control flow analysis (see
* {@link BlockSplitter}). It then seeks a path from start to exit. If one is found, it has fall
* through. This "probe" op is <em>not</em> included in the decoded stride.
*
*/
public static class ExitPcodeOp extends PcodeOp {
/**
* Construct a synthetic exit op
*
* @param at the address and context value to set on the emulator when exiting the
* {@link JitCompiledPassage#run(int)} method
*/
public ExitPcodeOp(AddrCtx at) {
super(new SequenceNumber(at.address, 0), PcodeOp.BRANCH, new Varnode[] {
new Varnode(at.address, 0) }, null);
}
}
/**
* A synthetic op representing the initial seed of a decoded passage.
*
* <p>
* Because we use a queue of {@link ExtBranch}es as the seed queue, and the initial seed has no
* real {@link Branch#from()}, we synthesize a {@link PcodeOp#BRANCH branch op} from the entry
* address to itself. This synthetic op is <em>not</em> included in the decoded stride.
*/
public static class EntryPcodeOp extends PcodeOp {
/**
* Construct the passage entry p-code op.
*
* @param entry the target address and decode context of the passage seed
*/
public EntryPcodeOp(AddrCtx entry) {
super(Address.NO_ADDRESS, 0, PcodeOp.BRANCH, new Varnode[] {
new Varnode(entry.address, 0) });
}
}
/**
* A synthetic p-code op meant to encode "no operation"
*
* <p>
* P-code does not have a NOP opcode, because there's usually no reason to produce such. A NOP
* machine instruction just produces an empty list of p-code ops, denoting "no operation."
* However, for bookkeeping purposes in our JIT translator, we occasionally need some op to hold
* an important place, but that op needs to do nothing. We use this in two situations:
*
* <ul>
* <li>An instruction (possibly because of an inject) that does nothing. Yes, essentially a NOP
* machine instruction. Because another op may target this instruction, and {@link Branch}es
* need to target a p-code op, we synthesize a p-code "nop" to hold that position. The
* alternative is to figure out what op immediately follows the branch target, but such an op
* may not have been decoded, yet. It's easier just to synthesize the nop.</li>
* <li>A p-code branch to the end of an instruction. Most often a slaspec author that means to
* skip the remainder of an instruction will use {@code goto inst_next}; however, because of
* sub-table structuring and/or personal preferences, sometimes we see {@code goto <end>;} where
* {@code <end>} is at the end of the instruction, and thus, no p-code op actually follows it.
* We essentially have the same situation and the NOP machine instruction where we can either
* synthesize a placeholder nop, or else we have to figure out what op does (or will) actually
* follow the label.</li>
* </ul>
*/
public static class NopPcodeOp extends DecodedPcodeOp {
/**
* Construct a synthetic p-code "nop"
*
* @param at the address-context pair where the op was generated
* @param seq the sequence where the nop is inserted. For machine-code NOP, this should be
* 0. For a branch to the end of an instruction, this should be the next sequence
* number (so that the branch targets this nop)
*/
public NopPcodeOp(AddrCtx at, int seq) {
super(at, new SequenceNumber(at.address, seq), PcodeOp.UNIMPLEMENTED, new Varnode[] {},
null);
}
}
/**
* A synthetic p-code op denoting a decode error
*
* <p>
* The decoder may encounter several decode errors as it selects and decodes the passage. An
* instruction is selected because the JIT believes it <em>may</em> be executed by the emulator.
* (Predicting this and making good selections is a matter of further research.) Encounting a
* decode error along a possible path is not cause to throw an exception. However; if the
* emulator does in fact attempt to execute the bytes which it can't decode, then we do throw
* the exception. This p-code op is synthesized where such decode errors occur, and the
* translator will generate code that actually throw the exception. Note that the error message
* is placed in the corresponding {@link ErrBranch}.
*/
public static class DecodeErrorPcodeOp extends DecodedPcodeOp {
/**
* Construct a p-code op representing an instruction decode error.
*
* @param at the address and decode context where the error occurred
*/
public DecodeErrorPcodeOp(AddrCtx at) {
super(at, new SequenceNumber(at.address, 0), PcodeOp.UNIMPLEMENTED, new Varnode[] {},
null);
}
}
/**
* An instruction denoting a decode error
*
* <p>
* The Sleigh disassembler normally denotes this with a {@link PseudoInstruction} having an
* {@link InvalidPrototype}. We essentially do the same here, but with custom types that are
* simpler to identify. Additionally, the types contain additional information, e.g., the error
* message. We also need the prototype to produce a single {@link DecodeErrorPcodeOp}.
*/
public static class DecodeErrorInstruction extends PseudoInstruction {
/**
* The prototype for the decode error instruction
*/
static class DecodeErrorPrototype extends InvalidPrototype {
public DecodeErrorPrototype(Language language) {
super(language);
}
@Override
public PcodeOp[] getPcode(InstructionContext context, PcodeOverride override) {
return new PcodeOp[] {
new DecodeErrorPcodeOp(AddrCtx.fromInstructionContext(context)) };
}
}
/**
* An implementation of {@link ProcessorContext} to satisfy the requirements of the
* {@link PseudoInstruction}.
*
* <p>
* This need do little more than provide the decode context register value.
*/
static class DecodeErrorProcessorContext implements ProcessorContext {
private final Language language;
private final RegisterValue ctx;
public DecodeErrorProcessorContext(Language language, RegisterValue ctx) {
this.language = language;
this.ctx = ctx;
}
@Override
public Register getBaseContextRegister() {
return language.getContextBaseRegister();
}
@Override
public List<Register> getRegisters() {
return language.getRegisters();
}
@Override
public Register getRegister(String name) {
return language.getRegister(name);
}
@Override
public BigInteger getValue(Register register, boolean signed) {
if (register == language.getContextBaseRegister()) {
return signed ? ctx.getSignedValue() : ctx.getUnsignedValue();
}
return null;
}
@Override
public RegisterValue getRegisterValue(Register register) {
if (register == language.getContextBaseRegister()) {
return ctx;
}
return null;
}
@Override
public boolean hasValue(Register register) {
return register == language.getContextBaseRegister();
}
@Override
public void setValue(Register register, BigInteger value)
throws ContextChangeException {
}
@Override
public void setRegisterValue(RegisterValue value)
throws ContextChangeException {
}
@Override
public void clearRegister(Register register) throws ContextChangeException {
}
}
private final String message;
/**
* Construct an instruction to indicate a decode error
*
* @param language the emulation target langauge
* @param address the address where decode was attempted
* @param ctx the input decode context
* @param message a message for the {@link DecodePcodeExecutionException} if the emulator
* attempts to execute this instruction
* @throws AddressOverflowException never
*/
public DecodeErrorInstruction(Language language, Address address, RegisterValue ctx,
String message) throws AddressOverflowException {
super(address, new DecodeErrorPrototype(language),
new ByteMemBufferImpl(address, new byte[] { 0 }, language.isBigEndian()),
new DecodeErrorProcessorContext(language, ctx));
this.message = message;
}
/**
* Get the message for the exception, should this instruction be "executed"
*
* @return the error message
*/
public String getMessage() {
return message;
}
}
/**
* Create an instruction to indicate a decode error
*
* <p>
* The resulting instruction will produce a single {@link DecodeErrorPcodeOp}. The translator
* will generate code that throws a {@link DecodePcodeExecutionException} should execution reach
* it.
*
* @param language the emulation target language
* @param address the address where decode was attempted
* @param ctx the input decode context
* @param message a message for the {@link DecodePcodeExecutionException}
* @return the new "instruction"
*/
public static DecodeErrorInstruction decodeError(Language language, Address address,
RegisterValue ctx, String message) {
try {
return new DecodeErrorInstruction(language, address, ctx, message);
}
catch (AddressOverflowException e) {
throw new AssertionError(e);
}
}
private final List<Instruction> instructions;
private final AddrCtx entry;
private final PcodeUseropLibrary<Object> decodeLibrary;
private final Map<PcodeOp, Branch> branches;
private final Map<PcodeOp, AddrCtx> entries;
private final Register contextreg;
private final ProgramContextImpl defaultContext;
/**
* Construct a new passage
*
* @param language the translation source language, i.e., the emulation target language. See
* {@link #getLanguage()}
* @param entry see {@link #getEntry()}
* @param code the p-code ops, grouped by stride. Within each stride, they are ordered as
* decoded and produced by their instructions. The strides are sorted by seed, with
* precedence to the decode context value. See {@link #getInstructions()}. See
* {@link #getCode()}.
* @param decodeLibrary see {@link #getDecodeLibrary()}
* @param instructions see {@link #getInstructions()}
* @param branches see {@link #getBranches()}
* @param entries see {@link #getOpEntry(PcodeOp)}
*/
public JitPassage(SleighLanguage language, AddrCtx entry, List<PcodeOp> code,
PcodeUseropLibrary<Object> decodeLibrary, List<Instruction> instructions,
Map<PcodeOp, Branch> branches, Map<PcodeOp, AddrCtx> entries) {
super(language, code, decodeLibrary.getSymbols(language));
this.entry = entry;
this.decodeLibrary = decodeLibrary;
this.instructions = instructions;
this.branches = branches;
this.entries = entries;
this.contextreg = language.getContextBaseRegister();
if (contextreg != Register.NO_CONTEXT) {
defaultContext = new ProgramContextImpl(language);
language.applyContextSettings(defaultContext);
}
else {
defaultContext = null;
}
}
/**
* Get all of the instructions in the passage.
*
* <p>
* These are grouped by stride. Within each stride, the instructions are listed in decode order.
* The strides are ordered by seed address-context pair, with context value taking precedence.
*
* @return the list of instructions
*/
public List<Instruction> getInstructions() {
return instructions;
}
/**
* {@inheritDoc}
*
* <p>
* Conventionally, the first instruction of the program is the entry. Note this might
* <em>not</em> be the initial seed. If the decoded passage contains a branch to an address
* preceding the seed, and a stride results from it, then that stride's p-code will occur
* earlier in the list. This is not a problem. The code generator will export many entry points,
* and the seed must be among them. "Entering" at that seed is achieved using a switch table at
* the start of the generated bytecode.
*/
@Override
public List<PcodeOp> getCode() {
return super.getCode();
}
/**
* Get the initial seed of this passage.
*
* <p>
* This is informational only. It should be used in naming things and/or in diagnostics.
*
* @return the address-context pair
*/
public AddrCtx getEntry() {
return entry;
}
/**
* Get the userop library that was used during decode of the passage
*
* <p>
* This often wraps the emulator's userop library. Downstream components, namely the
* {@link JitDataFlowModel}, will need this when translating {@link PcodeOp#CALLOTHER calls} to
* userops.
*
* @return the library
*/
public PcodeUseropLibrary<Object> getDecodeLibrary() {
return decodeLibrary;
}
/**
* Get all of the (non-fall-through) branches in the passage
*
* @return the branches, keyed by {@link Branch#from()}.
*/
public Map<PcodeOp, Branch> getBranches() {
return branches;
}
@Override
public String toString() {
return "<" + getClass().getSimpleName() + ":\n " + instructions.stream().map(i -> {
return "(" + getInCtx(i) + ") " + i.getAddressString(false, true) + " " + i.toString();
}).collect(Collectors.joining("\n ")) + "\n>\n" + format(true);
}
/**
* Check if a given p-code op is the first of an instruction.
*
* <p>
* <b>NOTE</b>: If an instruction is at an address with an inject, then the first op produced by
* the inject is considered the "entry" to the instruction. This is to ensure that any control
* flow to the injected address executes the injected code, not just the instruction's code.
*
* @param op the op to check.
* @return the address-context pair that generated the op, if it is the first there, or
* {@code null}
*/
public AddrCtx getOpEntry(PcodeOp op) {
return entries.get(op);
}
/**
* If the given p-code op is known to cause an error, e.g., an unimplemented instruction, get
* the error message.
*
* @param op the p-code op causing the error
* @return the message for the error caused
*/
public String getErrorMessage(PcodeOp op) {
Branch branch = branches.get(op);
return switch (branch) {
case null -> throw new AssertionError("No branch record for op: " + op);
case ErrBranch err -> err.message;
default -> throw new AssertionError("Wrong branch type " + branch + " for op: " + op);
};
}
}

View file

@ -0,0 +1,413 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.lang.invoke.MethodHandles.Lookup;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.objectweb.asm.MethodTooLargeException;
import ghidra.pcode.emu.PcodeEmulator;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.analysis.JitDataFlowModel;
import ghidra.pcode.emu.jit.analysis.JitDataFlowUseropLibrary;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPointPrototype;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassageClass;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.program.model.address.AddressRange;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
import ghidra.program.model.pcode.Varnode;
import ghidra.util.Msg;
/**
* An extension of {@link PcodeEmulator} that applies Just-in-Time (JIT) translation to accelerate
* execution.
*
* <p>
* This is meant as a near drop-in replacement for the class it extends. Aside from some additional
* configuration, and some annotations you might add to a {@link PcodeUseropLibrary}, if applicable,
* you can simply replace {@code new PcodeEmulator()} with {@code new JitPcodeEmulator(...)}.
*
* <h1>A JIT-Accelerated P-code Emulator for the Java Virtual Machine</h1>
*
* <p>
* There are two major tasks to achieving JIT-accelerated p-code emulation: 1) The translation of
* p-code to a suitable target's machine language, and 2) The selection, decoding, and cache
* management of passages of machine code translations. For our purposes, the target language is JVM
* bytecode, which introduces some restrictions which make the translation process substantially
* different than targeting native machine language.
*
* <h2>Terminology</h2>
*
* <p>
* Because of the potential for confusion of terms with similar meanings from similar disciplines,
* and to distinguish our particular use of the terms, we establish some definitions up front:
*
* <ul>
*
* <li><b>Basic block</b>: A block of <em>p-code</em> ops for which there are no branches into or
* from, except at its top and bottom. Note that this definition pertains only to p-code ops in the
* same passage. Branches into a block from ops generated elsewhere in the translation source need
* not be considered. Note also that p-code basic blocks might not coincide with machine-code basic
* blocks.</li>
*
* <li><b>Bytecode</b>: Shorthand for "JVM bytecode." Others sometimes use this to mean any machine
* code, but for us "bytecode" only refers to the JVM's machine code.</li>
*
* <li><b>Decode context</b>: The input contextreg value for decoding an instruction. This is often
* paired with an address to seed passages, identify an instruction's "location," and identify an
* entry point.</li>
*
* <li><b>Emulation host</b>: The machine or environment on which the emulation target is being
* hosted. This is usually also the <b>translation target</b>. For our purposes, this is the JVM,
* often the same JVM executing Ghidra.</li>
*
* <li><b>Emulation target</b>: The machine being emulated. As opposed to the <b>translation
* target</b> or <b>emulation host</b>. While this can include many aspects of a target platform, we
* often just mean the Instruction Set Architecture (ISA, or <b>language</b>) of the machine.</li>
*
* <li><b>Entry point</b>: An address (and contextreg value) by which execution may enter a passage.
* In addition to the decode seed, the translator may expose many entries into a given passage,
* usually at branch targets or the start of each basic block coinciding with an instruction.</li>
*
* <li><b>Instruction</b>: A single machine-code instruction.</li>
*
* <li><b>Machine code</b>: The sequence of bytes and/or decoded instructions executed by a
* machine.</li>
*
* <li><b>Passage</b>: A collection of strides connected by branches. Often each stride begins at
* the target of some branch in another stride.</li>
*
* <li><b>P-code</b>: An intermediate representation used by Ghidra in much of its analysis and
* execution modeling. For our purposes, we mean "low p-code," which is the common language into
* which the source machine code is translated before final translation to bytecode.</li>
*
* <li><b>P-code op</b>: A single p-code operation. A single instruction usually generates several
* p-code ops.</li>
*
* <li><b>Stride</b>: A contiguous sequence of instructions (and their emitted p-code) connected by
* fall-through. Note that conditional branches may appear in the middle of the stride. So long as
* fall-through is possible, the stride may continue.</li>
*
* <li><b>Translation source</b>: The machine code of the <b>emulation target</b> that is being
* translated and subsequently executed by the <b>emulation host</b>.</li>
*
* <li><b>Translation target</b>: The target of the JIT translation, usually the <b>emulation
* host</b>. For our purposes, this is always JVM bytecode.</li>
*
* <li><b>Varnode</b>: The triple (space,offset,size) giving the address and size of a variable in
* the emulation target's machine state. This is distinct from a variable node (see {@link JitVal})
* in the {@link JitDataFlowModel use-def} graph. The name "{@link Varnode}" is an unfortunate
* inheritance from the Ghidra API, where they <em>can</em> represent genuine variable nodes in the
* "high p-code" returned by the decompiler. However, the emulator consumes the "low p-code" where
* varnodes are mere triples, which is how we use the term.</li>
*
* </ul>
*
* <h2>Just-in-Time Translation</h2>
* <p>
* For details of the translation process, see {@link JitCompiler}.
*
* <h2>Translation Cache</h2>
* <p>
* This class, aside from overriding and replacing the state and thread objects with respective
* extensions, manages a part of the translation cache. For reasons discussed in the translation
* section, there are two levels of caching. Once a passage is translated into a classfile, it must
* be loaded as a class and then instantiated for the thread executing it. Thus, at the machine (or
* emulator) level, each translated passage's class is cached. Then, each thread caches its instance
* of that class. When a thread encounters an address (and contextreg value) that it has not yet
* translated, it requests that the emulator perform that translation. The details of this check are
* described in {@link #getEntryPrototype(AddrCtx, JitPassageDecoder)} and
* {@link JitPcodeThread#getEntry(AddrCtx)}.
*/
public class JitPcodeEmulator extends PcodeEmulator {
/**
* The compiler which translates passages into JVM classes
*/
protected final JitCompiler compiler;
/**
* A lookup to access non-public things
*/
private final Lookup lookup;
/**
* This emulator's cache of passage translations, incl. all entry points.
*
* <p>
* TODO: Invalidation of entries. One possible complication is any thread may still have an
* instance of one, and could possibly be executing it. Perhaps this could be a weak hash map,
* and they'll stay alive by virtue of the instances pointing to their classes? Still, we might
* like to impose a total size max, which would have to be implemented among the threads. Other
* reasons we may need to invalidate include:
*
* <ol>
* <li>Self-modifying code (we'll probably want to provide a configuration toggle given how
* expensive that may become).</li>
* <li>Changes to the memory map. At the moment, however, the p-code emulator does not provide a
* memory management unit (MMU).</li>
* <li>Addition of a new inject by the user or script. This one's actually pretty likely. For
* now, we might just document that injects should not be changes once execution starts.</li>
* </ol>
*/
protected final Map<AddrCtx, CompletableFuture<EntryPointPrototype>> codeCache =
new HashMap<>();
/**
* Create a JIT-accelerated p-code emulator
*
* @param language the emulation target langauge
* @param config configuration options for this emulator
* @param lookup a lookup in case the emulator (or its target) needs access to non-public
* elements, e.g., to access a nested {@link PcodeUseropLibrary}.
*/
public JitPcodeEmulator(Language language, JitConfiguration config, Lookup lookup) {
super(language);
this.compiler = new JitCompiler(config);
this.lookup = lookup;
}
@Override
protected PcodeExecutorState<byte[]> createSharedState() {
return new JitDefaultBytesPcodeExecutorState(language);
}
@Override
protected PcodeExecutorState<byte[]> createLocalState(PcodeThread<byte[]> thread) {
return new JitDefaultBytesPcodeExecutorState(language);
}
@Override
protected JitPcodeThread createThread(String name) {
return new JitPcodeThread(name, this);
}
@Override
public JitPcodeThread newThread() {
return (JitPcodeThread) super.newThread();
}
@Override
public JitPcodeThread newThread(String name) {
return (JitPcodeThread) super.newThread(name);
}
/**
* {@inheritDoc}
*
* <p>
* Userops can be optimized by the JIT translator under certain circumstances. To read more, see
* {@link JitDataFlowUseropLibrary}. DO NOT extend that library. The internals use it to wrap
* the library you provide here, but its documentation describes when and how the JIT translator
* optimizes invocations to your userops.
*
* <p>
* <b>WARNING</b>: Userops that accept floating point types via direct invocation should be
* careful that the sizes match exactly. That is, if you pass a {@code float} argument to a
* {@code double} parameter, you may have problems. This <em>does not</em> imply a conversion of
* floating point type. Instead, it will simply zero-fill the upper bits (as if zero-exending an
* integer) and reinterpret the resulting bits as a double. This is almost certainly
* <em>not</em> what you want. Until/unless we resolve this, the userop implementor must accept
* the proper types. It's possible multiple versions of the userop must be provided (overloading
* is not supported) to accept types of various sizes.
*/
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return super.createUseropLibrary();
}
/**
* Check if the emulator already has translated a given entry point.
*
* <p>
* This is used by the decoder to detect if it should end a stride before reaching its natural
* end (i.e., a non-fall-through instruction.) This was a design decision to reduce
* re-translation of the same machine code. Terminating the stride will cause execution to exit
* the translated passage, but it will then immediately enter the existing translated passage.
*
* @param pcCtx the program counter and contextreg value to check
* @return true if the emulator has a translation which can be entered at the given pcCtx.
*/
public boolean hasEntryPrototype(AddrCtx pcCtx) {
/**
* TODO: Investigate ignoring synchronization and instead catching the CME. This would be to
* avoid locking on every instruction decode. If we thing there's no an entry, and there
* turns out we just won a race, it's little loss.
*
* I don't think in the grand scheme of things, this is the most expensive operation of the
* translation. Nevertheless, it'll be hit a lot, so worth investigating.
*/
synchronized (codeCache) {
CompletableFuture<EntryPointPrototype> proto = codeCache.get(pcCtx);
return proto != null && proto.isDone();
}
}
/**
* Translate a new passage starting at the given seed.
*
* <p>
* Note the compiler must provide an entry to the resulting passage at the requested seed. It
* and any additional entry points are placed into the code cache. Each thread executing the
* passage must still create (and ought to cache) an instance of the translation.
*
* @param pcCtx the seed address and contextreg value for decoding and selecting a passage
* @param decoder the passage decoder, provided by the thread
* @return the class that is the translation of the passage, and information about its entry
* points.
*/
protected JitCompiledPassageClass compileWithMaxOpsBackoff(AddrCtx pcCtx,
JitPassageDecoder decoder) {
int maxOps = getConfiguration().maxPassageOps();
while (maxOps > 0) {
JitPassage decoded = decoder.decodePassage(pcCtx, maxOps);
try {
return compiler.compilePassage(lookup, decoded);
}
catch (MethodTooLargeException e) {
Msg.warn(this, "Method too large for " + pcCtx + " with maxOps=" + maxOps +
". Retrying with half.");
maxOps >>= 1;
}
}
/**
* This would be caused by an exceptionally large stride, perhaps with a good bit of
* instrumentation.
*
* TODO: If this happens, we'll need to be willing to stop decoding mid-stride. I think it's
* easily doable, as we already do this when we hit an address with an existing entry point.
*
* NOTE: We still need to treat each instruction, along with any instrumentation on it, as
* an atomic unit. I can't imagine a single instruction maxing out the Java method size,
* though.
*/
throw new AssertionError();
}
/**
* Get the entry prototype for a given address and contextreg value.
*
* <p>
* An <b>entry prototype</b> is a class representing a translated passage and an index
* identifying the point at which to enter the passage. The compiler numbers each entry point it
* generates and provides those indices via a static field in the output class. Those entry
* point indices are entered into the code cache for each translated passage. If no entry point
* exists for the requested address and contextreg value, the emulator will decode and translate
* a new passage at the requested seed.
*
* <p>
* It's a bit odd to take the thread's decoder for a machine-level thing; however, all thread
* decoders ought to have the same behavior. The particular thread's decoder will have better
* cached instruction block state for decoding in the vicinity of its past execution, though.
*
* @param pcCtx the counter and decoder context
* @param decoder the thread's decoder needing this entry point prototype
* @return the entry point prototype
* @see JitPcodeThread#getEntry(AddrCtx)
*/
public EntryPointPrototype getEntryPrototype(AddrCtx pcCtx, JitPassageDecoder decoder) {
/**
* NOTE: It is possible for a race condition, still, if (very likely) the passage provides
* multiple entry points. It's not ideal, but still correct, I think, if this happens.
*/
CompletableFuture<EntryPointPrototype> proto;
boolean wasAbsent;
synchronized (codeCache) {
proto = codeCache.get(pcCtx);
wasAbsent = proto == null;
if (wasAbsent) {
proto = new CompletableFuture<>();
codeCache.put(pcCtx, proto);
// Won't know to put other entry points, yet
}
}
/**
* TODO: I'm not sure it makes sense to do this computation without the lock.
*
* On the one hand, it allows threads to avoid stalling on every translation, and instead
* only on translations for the same entry point. However, if we do keep the lock, then we
* can avoid the race condition on alternative entry points.
*/
if (wasAbsent) {
/**
* Go ahead and use this thread instead of spawning another, because this one can't
* proceed until compilation is completed, anyway.
*/
try {
JitCompiledPassageClass compiled = compileWithMaxOpsBackoff(pcCtx, decoder);
synchronized (codeCache) {
for (Entry<AddrCtx, EntryPointPrototype> ent : compiled.getBlockEntries()
.entrySet()) {
if (ent.getKey().equals(pcCtx)) {
proto.complete(ent.getValue());
}
else {
codeCache.put(ent.getKey(),
CompletableFuture.completedFuture(ent.getValue()));
}
}
}
}
catch (Throwable t) {
proto.completeExceptionally(t);
}
}
try {
return proto.get();
}
catch (InterruptedException e) {
throw new AssertionError(e);
}
catch (ExecutionException e) {
return ExceptionUtils.rethrow(e);
}
}
/**
* Get the configuration for this emulator.
*
* @return the configuration
*/
public JitConfiguration getConfiguration() {
return compiler.getConfiguration();
}
/**
* {@inheritDoc}
*
* <p>
* <b>TODO</b>: The JIT-accelerated emulator does not currently implement access breakpoints.
* Furthermore, because JIT generated code is granted direct access to the emulator's state
* internals, it is not sufficient to override
* {@link PcodeExecutorStatePiece#getVar(AddressSpace, Object, int, boolean, Reason) getVar} and
* related.
*/
@Override
public void addAccessBreakpoint(AddressRange range, AccessKind kind) {
throw new UnsupportedOperationException();
}
}

View file

@ -0,0 +1,278 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.util.HashMap;
import java.util.Map;
import ghidra.lifecycle.Internal;
import ghidra.pcode.emu.*;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPoint;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPointPrototype;
import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.ProgramContext;
/**
* A JIT-accelerated thread of p-code emulation
*
* <p>
* This class implements the actual JIT-accelerated execution loop. In contrast to the normal
* per-instruction Fetch-Execute-Store loop inherited from {@link DefaultPcodeThread}, this thread's
* {@link #run()} method implements a per-<em>passage</em> Fetch-Decode-Translate-Execute loop.
*
*
* <h2>Fetch</h2>
* <p>
* The Fetch step involves checking the code cache for an existing translation at the thread's
* current counter and decode context. Cache entries are keyed by <em>passage entry point</em>, that
* is an address (and context reg value, if applicable) within a passage where execution is
* permitted to enter. This typically consists of the passage's seed as well as each branch target
* in the same passage. If one is found, we skip the Decode and Translate steps, and proceed
* directly to Execute.
*
* <h2>Decode</h2>
* <p>
* The Decode step involves decoding and selecting several instructions into a <em>passage</em>. A
* passage may comprise of several instructions connected by control flow. Often it is a few long
* strides of instructions connected by a few branches. The decoder will avoid selecting
* instructions that are already included in an existing translated passage. The reason for this
* complexity is that JVM bytecode cannot be rewritten or patched once loaded. For more details, see
* {@link JitPassageDecoder}.
*
* <h2>Translate</h2>
* <p>
* The Translate step involves translating the selected passage of instructions. The result of this
* translation implements {@link JitCompiledPassage}. For details of this translation process, see
* {@link JitCompiler}. The compiled passage provides a list of its entry points. Each is added to
* the emulator's code cache. Among those should be the seed required by this iteration of the
* execution loop, and so that entry point is chosen.
*
* <h2>Execute</h2>
* <p>
* The chosen entry point is then executed. This step is as simple as invoking the
* {@link EntryPoint#run()} method. This, in turn, invokes {@link JitCompiledPassage#run(int)},
* providing the entry point's index as an argument. The index identifies to the translated passage
* the desired address of entry, and so it jumps directly to the corresponding translation. That
* translation performs all the equivalent operations of the selected instructions, adhering to any
* control flow within. When control flow exits the passage, the method returns, and the loop
* repeats.
*/
public class JitPcodeThread extends BytesPcodeThread {
/**
* This thread's passage decoder, which is based on its {@link #getDecoder() instruction
* decoder}.
*/
protected final JitPassageDecoder passageDecoder;
/**
* This thread's cache of translations instantiated for this thread.
*
* <p>
* As an optimization, the translator generates classes which pre-fetch portions of the thread's
* state. Thus, the class must be instantiated for each particular thread needing to execute it.
*
* <p>
* TODO: Invalidation of entries. There are several reasons an entry may need to be invalidated:
* Expiration, eviction, or perhaps because the {@link EntryPointPrototype} (from the emulator)
* was invalidated.
*/
protected final Map<AddrCtx, EntryPoint> codeCache = new HashMap<>();
/**
* Create a thread
*
* <p>
* This should only be called by the emulator and its test suites.
*
* @param name the name of the thread
* @param machine the machine creating the thread
*/
public JitPcodeThread(String name, JitPcodeEmulator machine) {
super(name, machine);
this.passageDecoder = createPassageDecoder();
}
@Override
protected ThreadPcodeExecutorState<byte[]> createThreadState(
PcodeExecutorState<byte[]> sharedState, PcodeExecutorState<byte[]> localState) {
return new JitThreadBytesPcodeExecutorState((JitDefaultBytesPcodeExecutorState) sharedState,
(JitDefaultBytesPcodeExecutorState) localState);
}
/**
* Create the passage decoder
*
* <p>
* This is an extension point in case the decoder needs to be replaced with a further extension.
*
* @return the new passage decoder
*/
protected JitPassageDecoder createPassageDecoder() {
return new JitPassageDecoder(this);
}
@Override
public JitPcodeEmulator getMachine() {
return (JitPcodeEmulator) super.getMachine();
}
@Override
public JitThreadBytesPcodeExecutorState getState() {
return (JitThreadBytesPcodeExecutorState) super.getState();
}
@Internal
@Override
public PcodeProgram getInject(Address address) {
return super.getInject(address);
}
/**
* An accessor so the passage decoder can retrieve its thread's instruction decoder.
*
* @return the decoder
*/
@Internal
public InstructionDecoder getDecoder() {
return decoder;
}
/**
* An accessor so the passage decoder can query the language's default program context.
*
* @return the context
*/
@Internal
public ProgramContext getDefaultContext() {
return defaultContext;
}
@Override
public void inject(Address address, String source) {
/**
* TODO: Flush code cache? Alternatively, establish some convention where injects cannot be
* changed in the life cycle? I don't like that solution. It is workable, I think, though,
* but the user would have to add state to a library in order to configure/toggle each
* injection.
*
* Is it enough to identify which passages contain the address and just remove those? I
* think, so. The only nuance I can think of is that the inject may change the block
* structure, i.e., new entries are possible, but I don't think that matters terribly. The
* caching algorithm should work that out.
*/
super.inject(address, source);
}
/**
* Check if the <em>emulator</em> has an entry prototype for the given address and contextreg
* value.
*
* <p>
* This simply passes through to the emulator. It does not matter whether or not this thread has
* instantiated the prototype or not. If any thread has caused the emulator to translate the
* given entry, this will return true.
*
* @see JitPcodeEmulator#hasEntryPrototype(AddrCtx)
* @param pcCtx the address and contextreg to check
* @return true if the emulator has a translation which can be entered at the given pcCtx.
*/
public boolean hasEntry(AddrCtx pcCtx) {
return getMachine().hasEntryPrototype(pcCtx);
}
/**
* Get the translated and instantiated entry point for the given address and contextreg value.
*
* <p>
* An <b>entry point</b> is an instance of a class representing a translated passage and an
* index identifying the point at which to enter the passage. In essence, it is an instance of
* an <b>entry prototype</b> for this thread.
*
* <p>
* This will first check the cache for an existing instance. Then, it will delegate to the
* emulator. The emulator will check its cache for an existing translation. If one is found, we
* simply take it and instantiate it for this thread. Otherwise, the emulator translates a new
* passage at the given seed, and we instantiate it for this thread.
*
* @see JitPcodeEmulator#getEntryPrototype(AddrCtx, JitPassageDecoder)
* @param pcCtx the counter and decoder context
* @return the entry point
*/
public EntryPoint getEntry(AddrCtx pcCtx) {
/**
* NOTE: Placeholders are not needed at the thread level, but at the machine level.
*/
return codeCache.computeIfAbsent(pcCtx,
k -> getMachine().getEntryPrototype(k, passageDecoder).createInstance(this));
}
/**
* {@inheritDoc}
*
* <p>
* We override only this method to accelerate execution using JIT translation. Implementing
* single stepping via JIT doesn't make much sense from an efficiency standpoint. However, this
* thread still supports stepping via interpretation (as inherited). Our implementation permits
* mixing the two execution paradigms; however, using JIT after a few single steps will incur
* some waste as the JIT translates an otherwise uncommon entry point. Depending on
* circumstances and the order of operations, the effect of this on overall efficiency may vary
* because of caching.
*/
@Override
public void run() {
setSuspended(false);
if (frame != null) {
finishInstruction();
}
EntryPoint next = null;
while (!isSuspended()) {
if (next == null) {
next = getEntry(new AddrCtx(getContext(), getCounter()));
}
try {
next = next.run();
}
catch (SuspendedPcodeExecutionException e) {
// Cool.
}
}
}
/**
* This is called before each basic block is executed.
*
* <p>
* This gives the thread an opportunity to track and control execution, if desired. It provides
* the number of instructions and additional p-code ops about to be completed. If the counts
* exceed a desired schedule, or if the thread is suspended, this method may throw an exception
* to interrupt execution. This can be toggled in the emulator's configuration.
*
* @see JitConfiguration#emitCounters()
* @param instructions the number of instruction about to be completed
* @param trailingOps the number of ops of a final partial instruction about to be completed. If
* the block does not complete any instruction, this is the number of ops continuing
* in the current (partial) instruction.
*/
public void count(int instructions, int trailingOps) {
if (isSuspended()) {
throw new SuspendedPcodeExecutionException(null, null);
}
}
}

View file

@ -0,0 +1,57 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.ThreadPcodeExecutorState;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.program.model.address.AddressSpace;
/**
* The equivalent to {@link ThreadPcodeExecutorState} that multiplexes shared and local state for
* the JIT-accelerated p-code emulator
*/
public class JitThreadBytesPcodeExecutorState extends ThreadPcodeExecutorState<byte[]>
implements JitBytesPcodeExecutorState {
/**
* Construct a new thread state
*
* @param sharedState the shared portion (e.g., ram space)
* @param localState the local portion (i.e., register, unique spaces)
*/
public JitThreadBytesPcodeExecutorState(JitDefaultBytesPcodeExecutorState sharedState,
JitDefaultBytesPcodeExecutorState localState) {
super(sharedState, localState);
}
@Override
public JitDefaultBytesPcodeExecutorState getSharedState() {
return (JitDefaultBytesPcodeExecutorState) super.getSharedState();
}
@Override
public JitDefaultBytesPcodeExecutorState getLocalState() {
return (JitDefaultBytesPcodeExecutorState) super.getLocalState();
}
@Override
public JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space) {
if (isThreadLocalSpace(space)) {
return getLocalState().getForSpace(space);
}
return getSharedState().getForSpace(space);
}
}

View file

@ -0,0 +1,944 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import static ghidra.pcode.emu.jit.analysis.JitVarScopeModel.maxAddr;
import static ghidra.pcode.emu.jit.analysis.JitVarScopeModel.overlapsLeft;
import static org.objectweb.asm.Opcodes.*;
import java.math.BigInteger;
import java.util.*;
import java.util.Map.Entry;
import org.apache.commons.collections4.iterators.ReverseListIterator;
import org.objectweb.asm.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.var.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressFactory;
import ghidra.program.model.lang.Endian;
import ghidra.program.model.pcode.Varnode;
/**
* Type variable allocation phase for JIT-accelerated emulation.
*
* <p>
* The implements the Variable Allocation phase of the {@link JitCompiler} using a very simple
* placement and another "voting" algorithm to decide the allocated JVM variable types. We place/map
* variables by their storage varnodes, coalescing them as needed. Coalescing is performed for
* overlapping, but not abutting varnodes. This allocation is anticipated by the
* {@link JitVarScopeModel}, which performs the actual coalescing. Because multiple SSA variables
* will almost certainly occupy the same varnode, we employ another voting system. For example, the
* register {@code RAX} may be re-used many times within a passage. In some cases, it might be used
* to return a floating-point value. In others (and <em>probably</em> more commonly) it will be used
* to return an integral value. The more common case in the passage determines the JVM type of the
* local variable allocated for {@code RAX}. Note that variables which occupy only part of a
* coalesced varnode always vote for a JVM {@code int}, because of the shifting and masking required
* to extract that part.
*
* <p>
* The allocation process is very simple, presuming successful type assignment:
*
* <ol>
* <li>Vote Tabulation</li>
* <li>Index Reservation</li>
* <li>Handler Creation</li>
* </ol>
*
* <h2>Vote Tabulation</h2>
* <p>
* Every SSA variable (excluding constants and memory variables) contributes a vote for the type of
* its allocated local. If the varnode matches exactly, the vote is for the JVM type of the
* variable's assigned p-code type. The type mapping is simple: For integral types, we allocate
* using the smaller JVM type that fits the p-code type. For floating-point types, we allocate using
* the JVM type that exactly matches the p-code type. If the varnode is larger, i.e., because it's
* the result of coalescing, then the vote is for the smaller JVM integer type that fits the full
* varnode. Consider the following p-code:
*
* <pre>
* 1. RAX = FLOAT_ADD RCX, RDX
* 2. EAX = FLOAT_ADD EBX, 0x3f800000:4 # 1.0f
* </pre>
*
* <p>
* Several values and variables are at play here. We tabulate the type assignments and resulting
* votes:
*
* <p>
* <table border="1">
* <tr>
* <th>SSA Var</th>
* <th>Type</th>
* <th>Varnode</th>
* <th>Vote</th>
* </tr>
* <tr>
* <td>{@code RCX}<sub>in</sub></td>
* <td>{@link DoubleJitType#F8 float8}</td>
* <td>{@code RCX}</td>
* <td>{@code double}</td>
* </tr>
* <tr>
* <td>{@code RDX}<sub>in</sub></td>
* <td>{@link DoubleJitType#F8 float8}</td>
* <td>{@code RDX}</td>
* <td>{@code double}</td>
* </tr>
* <tr>
* <td>{@code RAX}<sub>1</sub></td>
* <td>{@link DoubleJitType#F8 float8}</td>
* <td>{@code RAX}</td>
* <td>{@code double}</td>
* </tr>
* <tr>
* <td>{@code EBX}<sub>in</sub></td>
* <td>{@link FloatJitType#F4 float4}</td>
* <td>{@code EBX}</td>
* <td>{@code float}</td>
* </tr>
* <tr>
* <td>{@code 0x3f800000:4}</td>
* <td>{@link FloatJitType#F4 float4}</td>
* </tr>
* <tr>
* <td>{@code EAX}<sub>2</sub></td>
* <td>{@link FloatJitType#F4 float4}</td>
* <td>{@code RAX}</td>
* <td>{@code long}</td>
* </tr>
* </table>
*
* <p>
* The registers {@code RCX}, {@code RDX}, and {@code EBX} are trivially allocated as locals of JVM
* types {@code double}, {@code double}, and {@code float}, respectively. It is also worth noting
* that {@code 0x3f800000} is allocated as a {@code float} constant in the classfile's constant
* pool. Now, we consider {@code RAX}. The varnodes for {@code RAX}<sub>1</sub> and
* {@code EAX}<sub>2</sub> are coalesced to {@code RAX}. {@code RAX}<sub>1</sub> casts its vote for
* {@code double}; whereas, {@code EAX}<sub>2</sub> casts its vote for {@code long}. This is because
* placing {@code EAX}<sub>2</sub>'s value into the larger varnode requires bitwise operators, which
* on the JVM, require integer operands. Thus the votes result in a tie, and favoring integral
* types, we allocate {@code RAX} in a JVM {@code long}.
*
* <h2>Index Reservation</h2>
* <p>
* After all the votes have been tabulated, we go through the results in address order, reserving
* JVM local indices and assigning types. Note that we must reserve two indices for every variable
* of type {@code long} or {@code double}, as specific by the JVM. Each of these reservations is
* tracked in a {@link JvmLocal}. Note that index 0 is already reserved by the JVM for the
* {@code this} ref, so we start our counting at 1. Also, some portions of the code generator may
* need to allocate additional temporary locals, so we must allow access to the next free index
* after all reservations are complete.
*
* <h2>Handler Creation</h2>
* <p>
* This actually extends a little beyond allocation, but this is a suitable place for it: All SSA
* values are assigned a handler, including constants and memory variables. Variables which access
* the same varnode get the same handler. For varnodes that are allocated in a JVM local, we create
* a handler that generates loads and stores to that local, e.g., {@link Opcodes#ILOAD iload}. For
* constant varnodes, we create a handler that generates {@link Opcodes#LDC ldc} instructions. For
* memory varnodes, we create a handler that generates a sequence of method invocations on the
* {@link JitBytesPcodeExecutorState state}. The code generator will delegate to these handlers in
* order to generate reads and writes of the corresponding variables, as well as to prepare any
* resources to facilitate access, e.g., pre-fetching items from the
* {@link JitBytesPcodeExecutorState state} in the generated constructor.
*
* @implNote There are many artifacts below that anticipate supporting p-code types greater than 8
* bytes in size. One method to support that is to allocate multiple JVM locals per p-code
* varnode. Consider a 16-byte (128-bit) integer. We could allocate 4 JVM {@code int}
* locals and then emit bytecode that performs the gradeschool-style arithmetic. I suspect
* this would perform better than just using refs to {@link BigInteger}, because it avoids
* heap pollution, and also may avoid some unnecessary arithmetic, esp., for the more
* significant portions that get dropped.
* @implNote <b>TODO</b>: It would be nice to detect varnode re-use under a different type and
* generate the appropriate declarations and handlers. This doesn't seem terribly complex,
* and it stands to spare us some casts. What's not clear is whether this offers any real
* run-time benefit.
*/
public class JitAllocationModel {
/**
* An allocated JVM local
*
* @param index the index reserved for this local
* @param name the human-readable name for this local
* @param type a type for this local
* @param vn the varnode whose value this local holds
*/
public record JvmLocal(int index, String name, SimpleJitType type, Varnode vn) {
/**
* Emit bytecode into the class constructor.
*
* @param gen the code generator
* @param iv the visitor for the class constructor
*/
public void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
VarGen.generateValInitCode(gen, vn);
}
/**
* Emit bytecode at the top of the {@link JitCompiledPassage#run(int) run} method.
*
* <p>
* This will declare all of the allocated locals for the entirety of the method.
*
* @param gen the code generator
* @param start a label at the top of the method
* @param end a label at the end of the method
* @param rv the visitor for the run method
*/
public void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
rv.visitLocalVariable(name, Type.getDescriptor(type.javaType()), null, start, end,
index);
}
/**
* Emit bytecode to load the varnode's value onto the JVM stack.
*
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateLoadCode(MethodVisitor rv) {
rv.visitVarInsn(type.opcodeLoad(), index);
}
/**
* Emit bytecode to store the value on the JVM stack into the varnode.
*
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateStoreCode(MethodVisitor rv) {
rv.visitVarInsn(type.opcodeStore(), index);
}
/**
* Emit bytecode to bring this varnode into scope.
*
* <p>
* This will copy the value from the {@link JitBytesPcodeExecutorState state} into the local
* variable.
*
* @param gen the code generator
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateBirthCode(JitCodeGenerator gen, MethodVisitor rv) {
VarGen.generateValReadCodeDirect(gen, type, vn, rv);
generateStoreCode(rv);
}
/**
* Emit bytecode to take this varnode out of scope.
*
* <p>
* This will copy the value from the local variable into the
* {@link JitBytesPcodeExecutorState state}.
*
* @param gen the code generator
* @param rv the visitor for the {@link JitCompiledPassage#run(int)} method
*/
public void generateRetireCode(JitCodeGenerator gen, MethodVisitor rv) {
generateLoadCode(rv);
VarGen.generateValWriteCodeDirect(gen, type, vn, rv);
}
}
/**
* A handler that knows how to load and store variable values onto and from the JVM stack.
*/
public interface VarHandler {
/**
* Get the p-code type of the variable this handler handles.
*
* @return the type
*/
JitType type();
/**
* Emit bytecode into the class constructor.
*
* @param gen the code generator
* @param iv the visitor for the class constructor
*/
void generateInitCode(JitCodeGenerator gen, MethodVisitor iv);
/**
* If needed, emit bytecode at the top of the {@link JitCompiledPassage#run(int) run}
* method.
*
* @param gen the code generator
* @param start a label at the top of the method
* @param end a label at the end of the method
* @param rv the visitor for the run method
*/
void generateDeclCode(JitCodeGenerator gen, Label start, Label end, MethodVisitor rv);
/**
* Emit bytecode to load the varnode's value onto the JVM stack.
*
* @param gen the code generator
* @param type the p-code type of the value expected on the JVM stack by the proceeding
* bytecode
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv);
/**
* Emit bytecode to load the varnode's value onto the JVM stack.
*
* @param gen the code generator
* @param type the p-code type of the value produced on the JVM stack by the preceding
* bytecode
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv);
}
/**
* A handler for p-code variables composed of a single JVM local variable.
*/
public interface OneLocalVarHandler extends VarHandler {
/**
* Get the local variable into which this p-code variable is allocated
*
* @return the local
*/
JvmLocal local();
@Override
default void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
// Generator inits decls directly
}
@Override
default void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
// Generator calls decls directly
}
@Override
default void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
local().generateLoadCode(rv);
TypeConversions.generate(gen, this.type(), type, rv);
}
@Override
default void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
TypeConversions.generate(gen, type, this.type(), rv);
local().generateStoreCode(rv);
}
}
/**
* The handler for a p-code variable allocated in one JVM {@code int}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record IntVarAlloc(JvmLocal local, IntJitType type) implements OneLocalVarHandler {}
/**
* The handler for a p-code variable allocated in one JVM {@code long}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record LongVarAlloc(JvmLocal local, LongJitType type) implements OneLocalVarHandler {}
/**
* The handler for a p-code variable allocated in one JVM {@code float}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record FloatVarAlloc(JvmLocal local, FloatJitType type) implements OneLocalVarHandler {}
/**
* The handler for a p-code variable allocated in one JVM {@code double}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record DoubleVarAlloc(JvmLocal local, DoubleJitType type)
implements OneLocalVarHandler {}
/**
* A portion of a multi-local variable handler.
*
* <p>
* This portion is allocated in a JVM local. When loading with a positive shift, the value is
* shifted to the right to place it into position.
*
* @param local the local variable allocated to this part
* @param shift the number of bytes and direction to shift
*/
public record MultiLocalPart(JvmLocal local, int shift) {
private JitType chooseLargerType(JitType t1, JitType t2) {
return t1.size() > t2.size() ? t1 : t2;
}
/**
* Emit bytecode to load the value from this local and position it in a value on the JVM
* stack.
*
* <p>
* If multiple parts are to be combined, the caller should emit a bitwise or after all loads
* but the first.
*
* @param gen the code generator
* @param type the p-code type of the value expected on the stack by the proceeding
* bytecode, which may be to load additional parts
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*
* @implNote We must keep temporary values in a variable of the larger of the local's or the
* expected type, otherwise bits may get dropped while positioning the value.
*/
public void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
local.generateLoadCode(rv);
JitType tempType = chooseLargerType(local.type, type);
TypeConversions.generate(gen, local.type, tempType, rv);
if (shift > 0) {
switch (tempType) {
case IntJitType t -> {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(IUSHR);
}
case LongJitType t -> {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(LUSHR);
}
default -> throw new AssertionError();
}
}
else if (shift < 0) {
switch (tempType) {
case IntJitType t -> {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(ISHL);
}
case LongJitType t -> {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(LSHL);
}
default -> throw new AssertionError();
}
}
TypeConversions.generate(gen, tempType, type, rv);
}
/**
* Emit bytecode to extract this part from the value on the JVM stack and store it in the
* local variable.
*
* <p>
* If multiple parts are to be stored, the caller should emit a {@link Opcodes#DUP dup} or
* {@link Opcodes#DUP2 dup2} before all stores but the last.
*
* @param gen the code generator
* @param type the p-code type of the value expected on the stack by the proceeding
* bytecode, which may be to load additional parts
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*
* @implNote We must keep temporary values in a variable of the larger of the local's or the
* expected type, otherwise bits may get dropped while positioning the value.
*/
public void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
JitType tempType = chooseLargerType(local.type, type);
TypeConversions.generate(gen, type, tempType, rv);
switch (tempType) {
case IntJitType t -> {
if (shift > 0) {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(ISHL);
}
else if (shift < 0) {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(IUSHR);
}
}
case LongJitType t -> {
if (shift > 0) {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(LSHL);
}
else if (shift < 0) {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(LUSHR);
}
}
default -> throw new AssertionError();
}
TypeConversions.generate(gen, tempType, local.type, rv);
switch (local.type) {
case IntJitType t -> {
int mask = -1 >>> (Integer.SIZE - Byte.SIZE * type.size());
if (shift > 0) {
mask <<= shift * Byte.SIZE;
}
else {
mask >>>= -shift * Byte.SIZE;
}
rv.visitLdcInsn(mask);
rv.visitInsn(IAND);
local.generateLoadCode(rv);
rv.visitLdcInsn(~mask);
rv.visitInsn(IAND);
rv.visitInsn(IOR);
local.generateStoreCode(rv);
}
case LongJitType t -> {
long mask = -1L >>> (Long.SIZE - Byte.SIZE * type.size());
if (shift > 0) {
mask <<= shift * Byte.SIZE;
}
else {
mask >>>= -shift * Byte.SIZE;
}
rv.visitLdcInsn(mask);
rv.visitInsn(LAND);
local.generateLoadCode(rv);
rv.visitLdcInsn(~mask);
rv.visitInsn(LAND);
rv.visitInsn(LOR);
local.generateStoreCode(rv);
}
default -> throw new AssertionError();
}
}
}
/**
* The handler for a variable allocated in a composition of locals
*
* <p>
* This can also handle a varnode that is a subpiece of a local variable allocated for a larger
* varnode. For example, this may handle {@code EAX}, when we have allocated a {@code long} to
* hold all of {@code RAX}.
*
* @param parts the parts describing how the locals are composed
* @param type the p-code type of the (whole) variable
*/
public record MultiLocalVarHandler(List<MultiLocalPart> parts, JitType type)
implements VarHandler {
@Override
public void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
// Generator calls local inits directly
}
@Override
public void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
// Generator calls local decls directly
}
@Override
public void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
parts.get(0).generateLoadCode(gen, this.type, rv);
for (MultiLocalPart part : parts.subList(1, parts.size())) {
part.generateLoadCode(gen, this.type, rv);
switch (this.type) {
case IntJitType t -> rv.visitInsn(IOR);
case LongJitType t -> rv.visitInsn(LOR);
default -> throw new AssertionError();
}
}
TypeConversions.generate(gen, this.type, type, rv);
}
@Override
public void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
TypeConversions.generate(gen, type, this.type, rv);
for (MultiLocalPart part : parts.subList(1, parts.size()).reversed()) {
switch (this.type) {
case IntJitType t -> rv.visitInsn(DUP);
case LongJitType t -> rv.visitInsn(DUP2);
default -> throw new AssertionError();
}
part.generateStoreCode(gen, this.type, rv);
}
parts.get(0).generateStoreCode(gen, this.type, rv);
}
}
/**
* A dummy handler for values/variables that are not allocated in JVM locals
*/
public enum NoHandler implements VarHandler {
/** Singleton */
INSTANCE;
@Override
public JitType type() {
return null;
}
@Override
public void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
}
@Override
public void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
}
@Override
public void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
throw new AssertionError();
}
@Override
public void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
throw new AssertionError();
}
}
/**
* The descriptor of a p-code variable
*
* <p>
* This is just a logical grouping of a varnode and its assigned p-code type.
*/
private record VarDesc(int spaceId, long offset, int size, JitType type) {
/**
* Create a descriptor from the given varnode and type
*
* @param vn the varnode
* @param type the p-code type
* @return the descriptor
*/
static VarDesc fromVarnode(Varnode vn, JitType type) {
return new VarDesc(vn.getSpace(), vn.getOffset(), vn.getSize(), type);
}
/**
* Derive a name for this variable, to use in the name of allocated local(s)
*
* @return the name
*/
public String name() {
return "s%d_%x_%d_%s".formatted(spaceId, offset, size, type.nm());
}
/**
* Convert this descriptor back to a varnode
*
* @param factory the address factory for the emulation target language
* @return the varnode
*/
public Varnode toVarnode(AddressFactory factory) {
return new Varnode(factory.getAddressSpace(spaceId).getAddress(offset), size);
}
}
private final JitDataFlowModel dfm;
private final JitVarScopeModel vsm;
private final JitTypeModel tm;
private final SleighLanguage language;
private final Endian endian;
private int nextLocal = 2; // 0:this, 1:blockId in run(int blockId)
private final Map<JitVal, VarHandler> handlers = new HashMap<>();
private final Map<Varnode, VarHandler> handlersPerVarnode = new HashMap<>();
private final NavigableMap<Address, JvmLocal> locals = new TreeMap<>();
/**
* Construct the allocation model.
*
* @param context the analysis context
* @param dfm the data flow moel
* @param vsm the variable scope model
* @param tm the type model
*/
public JitAllocationModel(JitAnalysisContext context, JitDataFlowModel dfm,
JitVarScopeModel vsm, JitTypeModel tm) {
this.dfm = dfm;
this.vsm = vsm;
this.tm = tm;
this.endian = context.getEndian();
this.language = context.getLanguage();
allocate();
}
/**
* Reserve (allocate) one local for the given p-code variable
*
* @param name the name of the JVM local
* @param type the p-code type represented by the local
* @param desc the variable's descriptor
* @return the allocated JVM local
*/
private JvmLocal genFreeLocal(String name, SimpleJitType type, VarDesc desc) {
int i = nextLocal;
if (type.javaType() == long.class || type.javaType() == double.class) {
nextLocal += 2;
}
else {
nextLocal += 1;
}
return new JvmLocal(i, name, type, desc.toVarnode(language.getAddressFactory()));
}
/**
* Get the next free local index without reserving it
*
* <p>
* This should be used by operator code generators <em>after</em> all the
* {@link JitBytesPcodeExecutorState state} bypassing local variables have been allocated. The
* variables should be scoped to that operator only, so that the ids used are freed for the next
* operator.
*
* @return the next id
*/
public int nextFreeLocal() {
return nextLocal;
}
/**
* Reserve (allocate) several locals for the given p-code variable
*
* @param name a prefix to name each JVM local
* @param types a p-code type that describes what each local stores
* @param desc the (whole) variable's descriptor
* @return the allocated JVM locals from most to least significant
*/
private List<JvmLocal> genFreeLocals(String name, List<SimpleJitType> types,
VarDesc desc) {
JvmLocal[] result = new JvmLocal[types.size()];
Iterable<SimpleJitType> it = language.isBigEndian()
? types
: () -> new ReverseListIterator<SimpleJitType>(types);
long offset = desc.offset;
int i = 0;
for (SimpleJitType t : it) {
VarDesc d = new VarDesc(desc.spaceId, offset, t.size(), t);
result[i] = genFreeLocal(name + "_" + i, t, d);
offset += t.size();
i++;
}
return List.of(result);
}
/**
* A content for assigning a type to a varnode
*
* <p>
* Because several SSA variables can share one varnode, we let each cast a vote to determine the
* JVM type of the local(s) allocated to it.
*
* @implNote <b>TODO</b>: This type contest could receive more detailed information from the
* type model, but perhaps that's more work than it's worth. I would have to
* communicate all votes, not just the winner....
*/
record TypeContest(Map<JitType, Integer> map) {
/**
* Start a new contest
*/
public TypeContest() {
this(new HashMap<>());
}
/**
* Cast a vote for the given type
*
* @param type the type
*/
public void vote(JitType type) {
map.compute(type.ext(), (t, v) -> v == null ? 1 : v + 1);
}
/**
* Choose the winner, favoring integral types
*
* @return the winning type
*/
public JitType winner() {
int max = map.values().stream().max(Integer::compare).get();
return map.entrySet()
.stream()
.filter(e -> e.getValue() == max)
.map(Map.Entry::getKey)
.sorted(Comparator.comparing(JitType::pref))
.findFirst()
.get();
}
}
private final Map<Varnode, TypeContest> typeContests = new HashMap<>();
/**
* Create a handler for the variable stored by the one given local
*
* @param local the local
* @return the handler
*/
private OneLocalVarHandler createOneLocalHandler(JvmLocal local) {
return switch (local.type) {
case IntJitType t -> new IntVarAlloc(local, t);
case LongJitType t -> new LongVarAlloc(local, t);
case FloatJitType t -> new FloatVarAlloc(local, t);
case DoubleJitType t -> new DoubleVarAlloc(local, t);
default -> throw new AssertionError();
};
}
/**
* Create a handler for a multi-part or subpiece varnode
*
* @param vn the varnode
* @return a handler to access the value of the given varnode, as allocated in one or more
* locals.
*/
private VarHandler createComplicatedHandler(Varnode vn) {
Entry<Address, JvmLocal> leftEntry = locals.floorEntry(vn.getAddress());
assert overlapsLeft(leftEntry.getValue().vn, vn);
Address min = leftEntry.getKey();
NavigableMap<Address, JvmLocal> sub = locals.subMap(min, true, maxAddr(vn), true);
List<MultiLocalPart> parts = new ArrayList<>();
for (JvmLocal local : sub.values()) {
int offset = (int) switch (endian) {
case BIG -> maxAddr(leftEntry.getValue().vn).subtract(maxAddr(vn));
case LITTLE -> vn.getAddress().subtract(leftEntry.getKey());
};
parts.add(new MultiLocalPart(local, offset));
}
return new MultiLocalVarHandler(parts, JitTypeBehavior.INTEGER.type(vn.getSize()));
}
/**
* Get (creating if necessary) the handler for the given variable's varnode.
*
* @param vv the variable
* @return the handler
*/
private VarHandler getOrCreateHandlerForVarnodeVar(JitVarnodeVar vv) {
return handlersPerVarnode.computeIfAbsent(vv.varnode(), vn -> {
JvmLocal oneLocal = locals.get(vn.getAddress());
if (oneLocal != null && oneLocal.vn.equals(vn)) {
return createOneLocalHandler(oneLocal);
}
return createComplicatedHandler(vn);
});
}
/**
* Get (creating if necessary) the handler for the given value
*
* @param v the value
* @return a handler for the value's varnode, if it is a register or unique; otherwise, the
* dummy handler
*/
private VarHandler createHandler(JitVal v) {
if (v instanceof JitConstVal) {
return NoHandler.INSTANCE;
}
if (v instanceof JitMemoryVar) {
return NoHandler.INSTANCE;
}
if (v instanceof JitVarnodeVar vv) {
return getOrCreateHandlerForVarnodeVar(vv);
}
throw new AssertionError();
}
/**
* Perform the actual allocations
*/
private void allocate() {
for (JitVal v : dfm.allValues()) {
if (v instanceof JitVarnodeVar vv && !(v instanceof JitMemoryVar)) {
Varnode vn = vv.varnode();
Varnode coalesced = vsm.getCoalesced(vn);
TypeContest tc = typeContests.computeIfAbsent(coalesced, __ -> new TypeContest());
if (vn.equals(coalesced)) {
tc.vote(tm.typeOf(v));
}
else {
tc.vote(JitTypeBehavior.INTEGER.type(coalesced.getSize()));
}
}
}
for (Map.Entry<Varnode, TypeContest> entry : typeContests.entrySet()
.stream()
.sorted(Comparator.comparing(e -> e.getKey().getAddress()))
.toList()) {
VarDesc desc = VarDesc.fromVarnode(entry.getKey(), entry.getValue().winner());
switch (desc.type()) {
case SimpleJitType t -> {
locals.put(entry.getKey().getAddress(), genFreeLocal(desc.name(), t, desc));
}
case MpIntJitType t -> {
for (JvmLocal leg : genFreeLocals(desc.name(), t.legTypes(), desc)) {
locals.put(leg.vn.getAddress(), leg);
}
}
default -> throw new AssertionError();
}
}
for (JitVal v : dfm.allValuesSorted()) {
handlers.put(v, createHandler(v));
}
}
/**
* Get the handler for the given value (constant or variable in the use-def graph)
*
* @param v the value
* @return the handler
*/
public VarHandler getHandler(JitVal v) {
return handlers.get(v);
}
/**
* Get all of the locals allocated
*
* @return the locals
*/
public Collection<JvmLocal> allLocals() {
return locals.values();
}
/**
* Get all of the locals allocated for the given varnode
*
*
* @implNote This is used by the code generator to birth and retire the local variables, given
* that scope is analyzed in terms of varnodes.
* @param vn the varnode
* @return the locals
*/
public Collection<JvmLocal> localsForVn(Varnode vn) {
Address min = vn.getAddress();
Address floor = locals.floorKey(min);
if (floor != null) {
min = floor;
}
return locals.subMap(min, true, maxAddr(vn), true).values();
}
}

View file

@ -0,0 +1,105 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.program.model.lang.Endian;
import ghidra.program.model.pcode.PcodeOp;
/**
* A collection of state that is shared among several phases of the translation process.
*
* @see JitCompiler
*/
public class JitAnalysisContext {
private final JitConfiguration config;
private final JitPassage passage;
private final SleighLanguage language;
private final Endian endian;
/**
* Construct a new context, starting with the given configuration and source passage
*
* @param config the JIT compiler's configuration
* @param passage the passage selected for translation
*/
public JitAnalysisContext(JitConfiguration config, JitPassage passage) {
this.config = config;
this.passage = passage;
this.language = passage.getLanguage();
this.endian = language.isBigEndian() ? Endian.BIG : Endian.LITTLE;
}
/**
* Get the JIT compiler configuration
*
* @return the configuration
*/
public JitConfiguration getConfiguration() {
return config;
}
/**
* Get the source passage
*
* @return the passage
*/
public JitPassage getPassage() {
return passage;
}
/**
* Get the translation source (i.e., emulation target) language
*
* @return the language
*/
public SleighLanguage getLanguage() {
return language;
}
/**
* Get the endianness of the translation source, i.e., emulation target.
*
* @return the endianness
*/
public Endian getEndian() {
return endian;
}
/**
* Check if the given p-code op is the first of an instruction.
*
* @param op the op to check
* @return the address-context pair
* @see JitPassage#getOpEntry(PcodeOp)
*/
public AddrCtx getOpEntry(PcodeOp op) {
return passage.getOpEntry(op);
}
/**
* Get the error message for a given p-code op
*
* @param op the p-code op generating the error
* @return the message
* @see JitPassage#getErrorMessage(PcodeOp)
*/
public String getErrorMessage(PcodeOp op) {
return passage.getErrorMessage(op);
}
}

View file

@ -0,0 +1,586 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.decode.DecoderForOneStride;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.PcodeProgram;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.SequenceNumber;
/**
* The control flow analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Control Flow Analysis phase of the {@link JitCompiler}. Some rudimentary
* analysis is performed during passage decoding &mdash; note the {@link BlockSplitter} is exported
* for use in {@link DecoderForOneStride}. This is necessary to evaluate whether an instruction
* (especially an inject-instrumented instruction) has fall-through. Without that information, the
* decoder cannot know whether it has reached the end of its stride. Note that the decoder records
* all the branches it encounters and includes them as metadata in the passage. Because branches
* need to record the source and target p-code op, the decoder is well suited. Additionally, it has
* to compute these anyway, and we'd rather avoid duplicative work by this analyzer.
*
* <p>
* The decoded passage contains a good deal of information, but the primary inputs at this point are
* the ordered list of p-code ops and the branches. This model's primary responsibility is to break
* the passage down into basic blocks at the p-code level. Even though the p-code ops have all been
* concatenated together when constructing the passage, we know, by definition, that each stride
* will end with an unconditional branch (or else a synthesized {@link ExitPcodeOp}. Note also that
* {@link JitPassage#getBranches()} only includes the non-fall-through branches, because these are
* all that are recorded by the decoder. Thus, it is also this model's responsibility to create the
* fall-through branches. These will occur to represent the "false" case of any conditional
* branches, and to represent "unconditional fall through."
*
* <p>
* The algorithm for this is fairly straightforward and has been implemented primarily in
* {@link BlockSplitter}. Most everything else in this class is data management and the types
* representing the model.
*
* <p>
* <b>NOTE:</b> It is technically possible for a userop to branch, but this analysis does not
* consider that. Instead, the emulator will decide how to handle those. Conventionally, I'd rather
* a userop <em>never</em> perform control flow. Instead, I'd rather see things like
* <code>pc = my_control_op(); goto [pc];</code>.
*/
public class JitControlFlowModel {
/**
* An exception thrown when control flow might run off the edge of the passage.
*
* <p>
* By definition a passage is a collection of strides, and each stride is terminated by some op
* without fall through (or else a synthesized {@link ExitPcodeOp}. In particular, the last
* stride cannot end in fall through. If it did, there would be no op for it to fall through to.
* While this should never happen, it is easy in the course of development to allow it by
* accident. The control flow analysis can detect this as it finished splitting the passage into
* blocks. If the final block has fall through, the passage is said to have "unterminated flow,"
* and this exception is thrown. We do not wait until execution of the passage to throw this. It
* is thrown during translation, as it represents an assertion failure in the translation
* process. That is, the decoder produced an unsound passage.
*/
public static class UnterminatedFlowException extends IllegalArgumentException {
/**
* Construct the exception
*/
public UnterminatedFlowException() {
super("Final block cannot fall through");
}
}
/**
* A flow from one block to another
*
* <p>
* This is just a wrapper around an {@link IntBranch} that allows us to quickly identify what
* two blocks it connects. Note that to connect two blocks in the passage, the branch must by
* definition be an {@link IntBranch}.
*
* <p>
* If this flow represents entry into the passage, then {@link #from()} and {@link #branch()}
* may be null
*
* @param from the block from which execution flows. In the case of a non-fall-through branch,
* the block should end with the branching p-code op. For conditional fall-through,
* it should end with the {@link PcodeOp#CBRANCH} op. For unconditional fall-through,
* it could end with any op having fall through.
* @param to the block to which execution flows. The block must start with the
* {@link IntBranch#to() target op} of the branch.
* @param branch the branch effecting the flow of execution
*/
public record BlockFlow(JitBlock from, JitBlock to, IntBranch branch) {
/**
* Create an entry flow to the given block
*
* @param to the block to which execution flows
* @return the flow
*/
public static BlockFlow entry(JitBlock to) {
return new BlockFlow(null, to, null);
}
}
/**
* A basic block of p-code
*
* <p>
* This follows the formal definition of a basic block, but at the p-code level. All flows into
* the block enter at its first op, and all flows out of the block exit at its last op. The
* block also contains information about these flows as well as branches out of the passage via
* this block.
*/
public static class JitBlock extends PcodeProgram {
private Map<IntBranch, BlockFlow> flowsFrom = new HashMap<>();
private Map<IntBranch, BlockFlow> flowsTo = new HashMap<>();
private List<IntBranch> branchesFrom = new ArrayList<>();
private List<IntBranch> branchesTo = new ArrayList<>();
private List<Branch> branchesOut = new ArrayList<>();
private final int instructions;
private final int trailingOps;
/**
* Construct a new block
*
* @param program the program (i.e., passage) from which this block is derived
* @param code the subset of ops, in execution order, comprising this block
*/
public JitBlock(PcodeProgram program, List<PcodeOp> code) {
super(program, List.copyOf(code));
int instructions = 0;
int trailingOps = 0;
for (PcodeOp op : code) {
if (op instanceof DecodedPcodeOp dec && dec.isInstructionStart()) {
instructions++;
trailingOps = 0;
}
else if (op instanceof DecodedPcodeOp) {
trailingOps++;
}
}
this.instructions = instructions;
this.trailingOps = trailingOps;
}
@Override
protected String getHead() {
return super.getHead() + "[start=" + start() + "]";
}
@Override
public String toString() {
return getHead();
}
/**
* Get the first p-code op in this block
*
* @return the first p-code op
*/
public PcodeOp first() {
return code.getFirst();
}
/**
* Get the sequence number of the first op
*
* <p>
* This is used for display and testing purposes only.
*
* @return the sequence number
*/
public SequenceNumber start() {
return code.getFirst().getSeqnum();
}
/**
* Get the sequence number of the last op
*
* <p>
* This is used for display and testing purposes only.
*
* @return the sequence number
*/
public SequenceNumber end() {
return code.getLast().getSeqnum();
}
/**
* Convert our collections to immutable ones
*/
private void cook() {
flowsFrom = Collections.unmodifiableMap(flowsFrom);
flowsTo = Collections.unmodifiableMap(flowsTo);
branchesFrom = Collections.unmodifiableList(branchesFrom);
branchesTo = Collections.unmodifiableList(branchesTo);
branchesOut = Collections.unmodifiableList(branchesOut);
}
/**
* Get (internal) flows leaving this block
*
* @return the flows, keyed by branch
*/
public Map<IntBranch, BlockFlow> flowsFrom() {
return flowsFrom;
}
/**
* Get (internal) flows entering this block
*
* @return the flows, keyed by branch
*/
public Map<IntBranch, BlockFlow> flowsTo() {
return flowsTo;
}
/**
* Get internal branches leaving this block
*
* @return the list of branches
*/
public List<IntBranch> branchesFrom() {
return branchesFrom;
}
/**
* Get internal branches entering this block
*
* @return the list of branches
*/
public List<IntBranch> branchesTo() {
return branchesTo;
}
/**
* Get branches leaving the passage from this block
*
* @return the list of branches
*/
public List<Branch> branchesOut() {
return branchesOut;
}
/**
* If this block has fall through, find the block into which it falls
*
* @return the block, or {@code null}
*/
public JitBlock getFallFrom() {
return flowsFrom.values()
.stream()
.filter(f -> f.branch.isFall())
.findAny()
.map(f -> f.to)
.orElse(null);
}
/**
* Check if there is an internal non-fall-through branch to this block
*
* <p>
* This is used by the {@link JitCodeGenerator} to determine whether or not a block's
* bytecode needs to be labeled.
*
* @return true if this block is targeted by a branch
*/
public boolean hasJumpTo() {
return flowsTo.values().stream().anyMatch(f -> !f.branch.isFall());
}
/**
* Get the target block for the given internal branch, assuming it's from this block
*
* @param branch the branch
* @return the target block or null
*/
public JitBlock getTargetBlock(IntBranch branch) {
return flowsFrom.get(branch).to;
}
/**
* Get the number of instructions represented in this block
*
* <p>
* This may get dicey as blocks are not necessarily split on instruction boundaries.
* Nevertheless, we seek to count the number of instructions executed at runtime, so that we
* can replay an execution, step in reverse, etc. What we actually do here is count the
* number of ops which are the first op produced by a decoded instruction.
*
* @see JitCompiledPassage#count(int, int)
* @see JitPcodeThread#count(int, int)
* @return the instruction count
*/
public int instructionCount() {
return instructions;
}
/**
* Get the number of trailing ops in this block
*
* <p>
* It is possible a block represents only partial execution of an instruction. Though
* {@link #instructionCount()} will count this partial instruction, we can tell how far we
* got into it by examining this value. With this, we should be able to replay an execution
* to exactly the same p-code op step.
*
* @return the trailing op count
*/
public int trailingOpCount() {
return trailingOps;
}
}
/**
* A class that splits a sequence of ops and associated branches into basic blocks.
*
* <p>
* This is the kernel of control flow analysis. It first indexes the branches by source and
* target op. Note that only non-fall-through branches are known at this point. Then, it
* traverses the list of ops. A split occurs following an op that is a branch source and/or
* preceding an op that is a branch target. A block is constructed when such a split point is
* encountered. In the case of a branch source, the branch is added to the newly constructed
* block. As traversal proceeds to the next op, it checks if the immediately-preceding block
* should have fall through (conditional or unconditional) by examining its last op. It adds a
* new fall-through branch if so. The end of the p-code op list is presumed a split point. If
* that final block "should have" fall through, an {@link UnterminatedFlowException} is thrown.
*
* <p>
* Once all the splitting is done, we have the blocks and all the branches (internal or
* external) that leave each block. We then compute all the branches (internal) that enter each
* block and the associated flows in both directions.
*/
public static class BlockSplitter {
private final PcodeProgram program;
private final Map<PcodeOp, Branch> branches = new HashMap<>();
private final Map<PcodeOp, IntBranch> branchesByTarget = new HashMap<>();
private final SequencedMap<PcodeOp, JitBlock> blocks = new LinkedHashMap<>();
private List<PcodeOp> partialBlock = new ArrayList<>();
private JitBlock lastBlock = null;
/**
* Construct a new block splitter to process the given program
*
* <p>
* No analysis is performed in the constructor. The client must call
* {@link #addBranches(Collection)} and then {@link #splitBlocks()}.
*
* @param program the program, i.e., list of p-code ops
*/
public BlockSplitter(PcodeProgram program) {
this.program = program;
}
/**
* Notify the splitter of the given branches before analysis
*
* <p>
* The splitter immediately indexes the given branches by source and target op.
*
* @param branches the branches
*/
public void addBranches(Collection<? extends Branch> branches) {
for (Branch b : branches) {
this.branches.put(b.from(), b);
if (b instanceof IntBranch ib) {
this.branchesByTarget.put(ib.to(), ib);
}
}
}
private JitBlock makeBlock() {
if (!partialBlock.isEmpty()) {
lastBlock = new JitBlock(program, partialBlock);
partialBlock = new ArrayList<>();
blocks.put(lastBlock.first(), lastBlock);
return lastBlock;
}
return null;
}
private boolean needsFallthrough(JitBlock block) {
if (block.branchesFrom.isEmpty() && block.branchesOut.isEmpty()) {
return true;
}
if (block.branchesFrom.size() == 1) {
return JitPassage.hasFallthrough(block.branchesFrom.getFirst().from());
}
if (block.branchesOut.size() == 1) {
return JitPassage.hasFallthrough(block.branchesOut.getFirst().from());
}
throw new AssertionError();
}
private void checkForFallthrough(PcodeOp op) {
if (lastBlock == null) {
return;
}
if (needsFallthrough(lastBlock)) {
lastBlock.branchesFrom.add(new IntBranch(lastBlock.getCode().getLast(), op, true));
}
lastBlock = null;
}
private void fillFlows() {
for (JitBlock from : blocks.values()) {
for (Branch branch : from.branchesFrom) {
if (branch instanceof IntBranch ib) {
JitBlock to = Objects.requireNonNull(blocks.get(ib.to()));
to.branchesTo.add(ib);
BlockFlow flow = new BlockFlow(from, to, ib);
from.flowsFrom.put(ib, flow);
to.flowsTo.put(ib, flow);
}
}
}
}
private void cook() {
for (JitBlock block : blocks.values()) {
block.cook();
}
}
private IntBranch getBranchTo(PcodeOp to) {
return branchesByTarget.get(to);
}
private Branch getBranchFrom(PcodeOp from) {
return branches.get(from);
}
private void doWork() {
if (program.getCode().isEmpty()) {
throw new IllegalArgumentException("No code to analyze");
}
for (PcodeOp op : program.getCode()) {
// This op would be after the block from the last iteration
checkForFallthrough(op);
IntBranch branchTo = getBranchTo(op);
if (branchTo != null) {
makeBlock();
// This op would be after the block we just made
checkForFallthrough(op);
}
partialBlock.add(op);
Branch branchFrom = getBranchFrom(op);
if (branchFrom != null) {
makeBlock();
// NB. lastBlock cannot be null, we just added the op
if (branchFrom instanceof IntBranch ib) {
lastBlock.branchesFrom.add(ib);
}
else {
lastBlock.branchesOut.add(branchFrom);
}
/**
* Do not checkForFallthrough, because the current op is already in the block
*/
}
}
makeBlock();
if (needsFallthrough(lastBlock)) {
/**
* I'm making it the decoder's responsibility to provide a sane program. We can
* catch missing control flow at the very end, but we cannot do so at the end of
* other blocks. If they have fall-through, they'll (perhaps erroneously) fall
* through to the next block that happens to be there. Thus, it is up to the
* decoder, if it decodes any incomplete strides, that is must synthesize the
* appropriate control-flow ops.
*/
throw new UnterminatedFlowException();
}
fillFlows();
cook();
}
private SequencedMap<PcodeOp, JitBlock> getBlocks() {
return blocks;
}
/**
* Perform the actual analysis
*
* @return the resulting split blocks, keyed by {@link JitBlock#start()}
*/
public SequencedMap<PcodeOp, JitBlock> splitBlocks() {
doWork();
return getBlocks();
}
}
private final JitPassage passage;
private final SequencedMap<PcodeOp, JitBlock> blocks;
/**
* Construct the control flow model.
*
* <p>
* Analysis is performed as part of constructing the model.
*
* @param context the analysis context
*/
public JitControlFlowModel(JitAnalysisContext context) {
this.passage = context.getPassage();
this.blocks = analyze();
}
/**
* Perform the analysis.
*
* @return the resulting blocks, keyed by {@link JitBlock#first()}
*/
protected SequencedMap<PcodeOp, JitBlock> analyze() {
BlockSplitter splitter = new BlockSplitter(passage);
splitter.addBranches(passage.getBranches().values());
return splitter.splitBlocks();
}
/**
* Get the basic blocks
*
* @return the collection of blocks
*/
public Collection<JitBlock> getBlocks() {
return blocks.values();
}
/**
* For diagnostics: Dump the results to stderr
*
* @see Diag#PRINT_CFM
*/
public void dumpResult() {
System.err.println("STAGE: ControlFlow");
for (JitBlock block : blocks.values()) {
System.err.println("");
System.err.println("Block: " + block);
System.err.println("Branches to:");
for (IntBranch branch : block.branchesTo) {
System.err.println(" " + branch);
}
System.err.println("Flows to:");
for (BlockFlow flow : block.flowsTo.values()) {
System.err.println(" " + flow);
}
System.err.println(block.format(true));
System.err.println("Branches from:");
for (IntBranch branch : block.branchesFrom) {
System.err.println(" " + branch);
}
System.err.println("Flows from:");
for (BlockFlow flow : block.flowsFrom.values()) {
System.err.println(" " + flow);
}
System.err.println("Branches out:");
for (Branch branch : block.branchesOut) {
System.err.println(" " + branch);
}
}
}
}

View file

@ -0,0 +1,398 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState.MiniDFState;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.pcode.opbehavior.OpBehaviorFactory;
import ghidra.pcode.opbehavior.OpBehaviorSubpiece;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Endian;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* A p-code arithmetic for interpreting p-code and constructing a use-def graph
*
* <p>
* This is used for intra-block data flow analysis. We leverage the same API as is used for concrete
* p-code interpretation, but we use it for an abstraction. The type of the interpretation is
* {@code T:=}{@link JitVal}, which can consist of constants and variables in the use-def graph. The
* arithmetic must be provided to the {@link JitDataFlowExecutor}. The intra-block portions of the
* use-def graph are populated as each block is interpreted by the executor.
*
* <p>
* The general strategy for each of the arithmetic operations is to 1) generate the output SSA
* variable for the op, 2) generate the op node for the generated output and given inputs, 3) enter
* the op into the use-def graph as the definition of its output, 4) record the inputs and used by
* the new op, and finally 5) return the generated output.
*
* <p>
* There should only need to be one of these per data flow model, not per block.
*/
public class JitDataFlowArithmetic implements PcodeArithmetic<JitVal> {
private static final OpBehaviorSubpiece OB_SUBPIECE =
(OpBehaviorSubpiece) OpBehaviorFactory.getOpBehavior(PcodeOp.SUBPIECE);
private final JitDataFlowModel dfm;
private final Endian endian;
/**
* Construct the arithmetic
*
* @param context the analysis context
* @param dfm the owning data flow model
*/
public JitDataFlowArithmetic(JitAnalysisContext context, JitDataFlowModel dfm) {
this.dfm = dfm;
this.endian = context.getEndian();
}
@Override
public Endian getEndian() {
return endian;
}
/**
* Remove {@code amt} bytes from the right of the <em>varnode</em>.
*
* <p>
* "Right" is considered with respect to the machine endianness. If it is little endian, then
* the byte are shaved from the <em>left</em> of the value. This should be used when getting
* values from the state to remove pieces from off-cut values. It should be applied before the
* pieces are ordered according to machine endianness.
*
* @param in1Vn the varnode representing the input
* @param amt the number of bytes to remove
* @param in1 the input (really a value read from the state)
* @return the resulting value
*/
public JitVal truncFromRight(Varnode in1Vn, int amt, JitVal in1) {
Varnode outVn = new Varnode(in1Vn.getAddress(), in1Vn.getSize() - amt);
return subpiece(outVn, endian.isBigEndian() ? amt : 0, in1);
}
/**
* Remove {@code amt} bytes from the left of the <em>varnode</em>.
*
* <p>
* "Left" is considered with respect to the machine endianness. If it is little endian, then the
* byte are shaved from the <em>right</em> of the value. This should be used when getting values
* from the state to remove pieces from off-cut values. It should be applied before the pieces
* are ordered according to machine endianness.
*
* @param in1Vn the varnode representing the input
* @param amt the number of bytes to remove
* @param in1 the input (really a value read from the state)
* @return the resulting value
*/
public JitVal truncFromLeft(Varnode in1Vn, int amt, JitVal in1) {
Varnode outVn = new Varnode(in1Vn.getAddress().add(amt), in1Vn.getSize() - amt);
return subpiece(outVn, endian.isBigEndian() ? 0 : amt, in1);
}
private void removeOffsetFromRight(List<JitVal> parts, int offset) {
JitVal p;
do {
p = parts.remove(parts.size() - 1);
offset -= p.size();
}
while (offset > 0);
if (offset < 0) {
JitVal np = shaveFromRight(-offset, p);
parts.add(np);
offset += np.size();
assert offset == 0;
}
}
private void removeFromLeftToSize(List<JitVal> parts, int size) {
int actualSize = 0;
JitVal p;
int i = parts.size();
do {
p = parts.get(--i);
actualSize += p.size();
}
while (actualSize < size);
if (actualSize > size) {
JitVal np = shaveFromLeft(-size, p);
parts.set(i + 1, np);
actualSize -= p.size();
actualSize += np.size();
assert actualSize == size;
}
while (i > 0) {
parts.remove(--i);
}
}
/**
* Try to produce a simplified {@link JitSynthSubPieceOp} or {@link JitCatenateOp}
*
* <p>
* This takes an input, subpiece offset, and output variable. If the input variable is the
* result of another subpiece, the result can be a single simplified subpiece. Similarly, if the
* input is the result of a catenation, then the result can be a simplified catenation, or
* possibly subpiece.
*
* If either of these situations applies, and simplification is possible, this returns a
* non-null result, and that result is added to the use-def graph specifying the given output
* variable as the simplified output. Otherwise, the result is null and the caller should create
* a new subpiece op.
*
* @param out the output variable
* @param offset the subpiece offset (number of bytes shifted right)
* @param v the input value
* @return the output variable, as the result of the simplified sub-graph.
*/
private JitVal trySimplifiedSubPiece(JitOutVar out, int offset, JitVal v) {
if (!(v instanceof JitOutVar vOut)) {
return null;
}
if (vOut.definition() instanceof JitSynthSubPieceOp subsub) {
subsub.unlink();
return dfm
.notifyOp(new JitSynthSubPieceOp(out, offset + subsub.offset(), subsub.v()))
.out();
}
if (vOut.definition() instanceof JitCatenateOp cat) {
cat.unlink();
List<JitVal> newParts = new ArrayList<>(cat.parts());
removeOffsetFromRight(newParts, offset);
removeFromLeftToSize(newParts, out.size());
assert !newParts.isEmpty();
if (newParts.size() == 1) {
// Context should already be notified
return newParts.get(0);
}
return dfm.notifyOp(new JitCatenateOp(out, newParts)).out();
}
return null;
}
/**
* Construct the result of taking the subpiece
*
* <p>
* If the input is another subpiece or a catenation, the result may be simplified. In
* particular, the subpiece of a catenation may be a smaller catenation. No matter the case, the
* given output variable is made the output of the subpiece result, and the use-def graph is
* updated accordingly.
*
* @param outVn the output variable
* @param offset the subpiece offset (number of bytes shifted right)
* @param v the input value
* @return the output variable, as the result of the simplified sub-graph
*/
private JitVal subpiece(Varnode outVn, int offset, JitVal v) {
JitOutVar out = dfm.generateOutVar(outVn);
JitVal simplified = trySimplifiedSubPiece(out, offset, v);
if (simplified != null) {
return simplified;
}
return dfm.notifyOp(new JitSynthSubPieceOp(out, offset, v)).out();
}
private Varnode subPieceVn(int size, int offset, Varnode whole) {
if (endian.isBigEndian()) {
return new Varnode(whole.getAddress().add(whole.getSize() - offset - size), size);
}
return new Varnode(whole.getAddress().add(offset), size);
}
/**
* Remove {@code amt} bytes from the right of the value.
*
* <p>
* The value is unaffected by the machine endianness, except to designate the output varnode.
*
* @param amt the number of bytes to remove
* @param in1 the input
* @return the output
*/
public JitVal shaveFromRight(int amt, JitVal in1) {
return subpiece(in1.size() - amt, amt, in1);
}
/**
* Remove {@code amt} bytes from the left of the value.
*
* <p>
* The value is unaffected by the machine endianness, except to designate the output varnode.
*
* @param amt the number of bytes to remove
* @param in1 the input
* @return the output
*/
public JitVal shaveFromLeft(int amt, JitVal in1) {
return subpiece(in1.size() - amt, 0, in1);
}
/**
* Compute the subpiece of a value.
*
* <p>
* The result is added to the use-def graph. The output varnode is computed from the input
* varnode and the subpiece parameters. This is used to handle variable retrieval when an access
* only include parts of a value previously written. Consider the x86 assembly:
*
* <pre>
* MOV RAX, qword ptr [...]
* MOV dword ptr [...], EAX
* </pre>
*
* <p>
* The second line reads {@code EAX}, which consists of only the lower part of {@code RAX}.
* Thus, we synthesize a subpiece op. These are distinct from an actual {@link PcodeOp#SUBPIECE}
* op, since we sometimes needs to filter out synthetic ops.
*
* @param size the size of the output variable in bytes
* @param offset the subpiece offset (number of bytes shifted right)
* @param v the input value
* @return the output value
*/
public JitVal subpiece(int size, int offset, JitVal v) {
if (v instanceof JitConstVal c) {
return new JitConstVal(size,
OB_SUBPIECE.evaluateBinary(size, v.size(), c.value(), BigInteger.valueOf(offset)));
}
if (v instanceof JitVarnodeVar vv) {
Varnode inVn = vv.varnode();
Varnode outVn = subPieceVn(size, offset, inVn);
return subpiece(outVn, offset, v);
}
throw new UnsupportedOperationException("unsupported subpiece of " + v);
}
/**
* Construct the catenation of the given values to form the given output varnode.
*
* <p>
* The result is added to the use-def graph. This is used to handle variable retrieval when the
* pattern of accesses indicates catenation. Consider the x86 assembly:
*
* <pre>
* MOV AH, byte ptr [...]
* MOV AL, byte ptr [...]
* MOV word ptr [...], AX
* </pre>
*
* <p>
* On the third line, the value in {@code AX} is the catenation of whatever values were written
* into {@code AH} and {@code AL}. Thus, we synthesize a catenation op node in the use-def
* graph.
*
* @param outVn the output varnode
* @param parts the list of values to catenate, ordered by machine endianness
* @return the output value
* @see MiniDFState#getDefinitions(AddressSpace, long, int)
*/
public JitVal catenate(Varnode outVn, List<JitVal> parts) {
return dfm.notifyOp(new JitCatenateOp(dfm.generateOutVar(outVn), parts)).out();
}
@Override
public JitVal unaryOp(PcodeOp op, JitVal in1) {
return dfm.notifyOp(JitOp.unOp(op, dfm.generateOutVar(op.getOutput()), in1)).out();
}
@Override
public JitVal unaryOp(int opcode, int sizeout, int sizein1, JitVal in1) {
throw new AssertionError();
}
@Override
public JitVal binaryOp(PcodeOp op, JitVal in1, JitVal in2) {
return dfm.notifyOp(JitOp.binOp(op, dfm.generateOutVar(op.getOutput()), in1, in2)).out();
}
@Override
public JitVal binaryOp(int opcode, int sizeout, int sizein1, JitVal in1, int sizein2,
JitVal in2) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* <p>
* We override this to record the {@link JitStoreOp store} op into the use-def graph. As
* "output" we just return {@code inValue}. The executor will call
* {@link JitDataFlowState#setVar(AddressSpace, JitVal, int, boolean, JitVal) setVal}, but the
* state will just ignore it, because it will be an indirect memory write.
*/
@Override
public JitVal modBeforeStore(PcodeOp op, AddressSpace space, JitVal inOffset, JitVal inValue) {
return dfm.notifyOp(new JitStoreOp(op, space, inOffset, inValue)).value();
}
@Override
public JitVal modBeforeStore(int sizeinOffset, AddressSpace space, JitVal inOffset,
int sizeinValue, JitVal inValue) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* <p>
* We override this to record the {@lnk JitLoadOp load} op into the use-def graph. For our
* {@code inValue}, the {@link JitDataFlowState state} will have just returned the
* {@link JitIndirectMemoryVar#INSTANCE dummy indirect} variable definition. We must not "use"
* this. Instead, we must take our other parameters to construct the load op and return its
* output.
*/
@Override
public JitVal modAfterLoad(PcodeOp op, AddressSpace space, JitVal inOffset, JitVal inValue) {
return dfm.notifyOp(new JitLoadOp(
op, dfm.generateOutVar(op.getOutput()), space, inOffset)).out();
}
@Override
public JitVal modAfterLoad(int sizeinOffset, AddressSpace space, JitVal inOffset,
int sizeinValue, JitVal inValue) {
throw new AssertionError();
}
@Override
public JitVal fromConst(byte[] value) {
BigInteger bigVal =
Utils.bytesToBigInteger(value, value.length, endian.isBigEndian(), false);
return JitVal.constant(value.length, bigVal);
}
@Override
public byte[] toConcrete(JitVal value, Purpose purpose) {
if (value instanceof JitConstVal c) {
return Utils.bigIntegerToBytes(c.value(), c.size(), endian.isBigEndian());
}
throw new ConcretionError("Cannot concretize " + value, purpose);
}
@Override
public long sizeOf(JitVal value) {
return value.size();
}
}

View file

@ -0,0 +1,219 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockFlow;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.op.JitPhiOp;
import ghidra.pcode.emu.jit.var.JitMissingVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.PcodeExecutor;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Register;
import ghidra.program.model.pcode.Varnode;
/**
* An encapsulation of the per-block data flow analysis done by {@link JitDataFlowModel}
*
* <p>
* One of these is created for each basic block in the passage. This does both the intra-block
* analysis and encapsulates parts of the inter-block analysis. The class also contains and provides
* access to some of the analytic results.
*
* @see JitDataFlowModel#getAnalyzer(JitBlock)
*/
public class JitDataFlowBlockAnalyzer {
private final JitAnalysisContext context;
private final JitDataFlowModel dfm;
private final JitBlock block;
private final JitDataFlowArithmetic arithmetic;
private final JitDataFlowUseropLibrary library;
private final JitDataFlowState state;
private final boolean isEntry;
JitDataFlowBlockAnalyzer(JitAnalysisContext context, JitDataFlowModel dfm, JitBlock block) {
this.context = context;
this.dfm = dfm;
this.block = block;
this.arithmetic = dfm.getArithmetic();
this.library = dfm.getLibrary();
this.state = new JitDataFlowState(context, dfm, block);
this.isEntry = context.getOpEntry(block.first()) != null;
}
/**
* Perform the intra-block analysis for this block
*
* <p>
* This just runs the block p-code through the analytic interpreter. See
* {@link JitDataFlowModel}'s section on intra-block analysis.
*/
void doIntrablock() {
PcodeExecutor<JitVal> exec = new JitDataFlowExecutor(context, dfm, state);
exec.execute(block, library);
}
/**
* The initial entry into the recursive phi option seeking algorithm
*
* <p>
* See {@link JitDataFlowModel}'s section on inter-block analysis. This will modify the given
* phi op in place, adding to it each found option. There is also more details than discussed in
* the data flow model documentation. Keep in mind a varnode may be partially defined, e.g.,
* when reading {@link RAX}, perhaps only {@link EAX} has been defined. In such cases, we must
* catenate in the same manner we would when reading the varnode during intra-block analysis.
* The portions missing a definition will generate corresponding phi nodes, which are treated
* recursively.
*
* @param phi the phi op for which we seek options
*/
void fillPhiFromDeps(JitPhiOp phi) {
fillPhiFromDeps(phi, new HashSet<>());
}
/**
* Fill options in for the given phi op
*
* <p>
* If our block is an entry, add that as a possible option. <em>Additionally</em>, consider each
* upstream block (dependency) as an option, recursively. Recursion will naturally terminate if
* there are no inward flows.
*
* @param phi the phi op for which we seek options
* @param visited the blocks which have already been visited during recursion
*/
private void fillPhiFromDeps(JitPhiOp phi, Set<JitBlock> visited) {
if (isEntry) {
phi.addInputOption();
}
for (BlockFlow flow : block.flowsTo().values()) {
JitDataFlowBlockAnalyzer analyzerFrom = dfm.getOrCreateAnalyzer(flow.from());
analyzerFrom.fillPhiFromBlock(phi, flow, visited);
}
}
/**
* Consider the given flow as an option for the given phi op, and fill it
*
* <p>
* If we've already visited the given block, we return immediately, without further recursion.
* Otherwise, we examine the varnode output state of this block for suitable definitions. If
* needed, we fill any gaps (possibly the entire varnode sought) with new phi nodes and recurse.
*
* @param phi the phi op for which we seek an option
* @param flow the flow from the block to consider
* @param visited the blocks which have already been visited during recursion
*/
private void fillPhiFromBlock(JitPhiOp phi, BlockFlow flow, Set<JitBlock> visited) {
if (!visited.add(block)) {
/**
* NOTE: We do not need to remove the block before we return. If we didn't find it by
* this path, we certainly not going to find it from here by another path.
*/
return;
}
Varnode phiVn = phi.out().varnode();
List<JitVal> defs = state.getDefinitions(phiVn);
if (defs.size() != 1) {
defs = state.generatePhis(defs, dfm.phiQueue);
JitVal catOpt = arithmetic.catenate(phiVn, defs);
phi.addOption(flow, catOpt);
/**
* New phi nodes will be picked up in next round of filling. Since parts are smaller
* than the whole, the size of such nodes should shrink until a singular definition is
* found.
*/
return;
}
JitVal val = defs.get(0);
if (val instanceof JitMissingVar missing) {
// Require the chain to have a node in this block
JitPhiOp phi2 = missing.generatePhi(dfm, block);
dfm.phiQueue.add(phi2);
state.setVar(missing.varnode(), phi2.out());
phi.addOption(flow, phi2.out());
// Will get filled on subsequent round
//fillPhiFromDeps(phi2, visited);
return;
}
phi.addOption(flow, val);
}
/**
* Get a complete catalog of all varnodes read, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesRead() {
return state.getVarnodesRead();
}
/**
* Get a complete catalog of all varnodes written, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesWritten() {
return state.getVarnodesWritten();
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see JitDataFlowState#getDefinitions(Varnode)
* @param varnode the varnode whose definition(s) to retrieve
* @return the list of values
*/
public List<JitVal> getOutput(Varnode varnode) {
return state.getDefinitions(varnode);
}
/**
* Get an ordered list of all values involved in the latest definition of the given register.
*
* @see JitDataFlowState#getDefinitions(Register)
* @param register the register whose definition(s) to retrieve
* @return the list of values
*/
public List<JitVal> getOutput(Register register) {
return state.getDefinitions(register);
}
/**
* Get the latest definition of the given varnode, synthesizing ops is required.
*
* <p>
* NOTE: May produce phi nodes that need additional inter-block analysis
*
* @see JitDataFlowModel#analyzeInterblock(Collection)
* @see JitDataFlowState#getVar(AddressSpace, JitVal, int, boolean, Reason)
* @param vn the varnode
* @return the latest definition for the block analyzed
*/
public JitVal getVar(Varnode vn) {
return state.getVar(vn, Reason.EXECUTE_READ);
}
}

View file

@ -0,0 +1,165 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.Map;
import java.util.Objects;
import ghidra.pcode.emu.jit.JitPassage.Branch;
import ghidra.pcode.emu.jit.JitPassage.IndBranch;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* A modification to {@link PcodeExecutor} that is specialized for the per-block data flow analysis.
*
* <p>
* Normally, the p-code executor follows all of the control-flow branching, as you would expect in
* the interpretation-based p-code emulator. For analysis, we do not intend to actually follow
* branches. These should only ever occur at the end of a basic block, anyway.
*
* <p>
* We do record the branch ops into the graph as {@link JitOp op nodes}. A conditional branch
* naturally participates in the data flow, as it uses the definition of its predicate varnode.
* Similarly, indirect branches use the definitions of their target varnodes. Direct branch
* operations are also added to the use-def graph, even though they do not use any variable
* definition. Architecturally, the code generator emits JVM bytecode from the op nodes in the
* use-def graph. For that to work, every p-code op must be entered into it. For bookkeeping, and
* because the code generator will need them, we look up the {@link Branch} records created by the
* passage decoder and store them in their respective branch op nodes.
*
* <p>
* This is all accomplished by overriding {@link #executeBranch(PcodeOp, PcodeFrame)} and similar
* branch execution methods. Additionally, we override {@link #badOp(PcodeOp)} and
* {@link #onMissingUseropDef(PcodeOp, PcodeFrame, String, PcodeUseropLibrary)}, because the
* inherited implementations will throw exceptions. We need not throw an exception until/unless we
* reach such bad code a run time. So, we enter them into the use-def graph as op nodes from which
* we later generate the code to throw the exception.
*/
class JitDataFlowExecutor extends PcodeExecutor<JitVal> {
private final JitDataFlowModel dfm;
private final Map<PcodeOp, Branch> branches;
/**
* Construct an executor from the given context
*
* @param context the analysis context, namely to get the branches recorded by the passage
* decoder
* @param dfm the data-flow model whose use-def graph to populate
* @param state the executor state, which tracks varnode definitions during execution
*/
protected JitDataFlowExecutor(JitAnalysisContext context, JitDataFlowModel dfm,
PcodeExecutorState<JitVal> state) {
super(context.getLanguage(), dfm.getArithmetic(), state, Reason.EXECUTE_READ);
this.dfm = dfm;
this.branches = context.getPassage().getBranches();
}
/**
* Record a branch or call op into the use-def graph
*
* <p>
* We do not need to compute the branch target, because that op was already computed by the
* passage decoder. Past attempts to perform that computation here failed when dealing with
* injects and inlined p-code userops. It is much easier to let the decoder do it, because it
* has a copy of the original p-code. That op is recorded in the {@link Branch} for this op, so
* just look it up.
*
* @param op the op
*/
protected void recordBranch(PcodeOp op) {
Branch branch = Objects.requireNonNull(branches.get(op));
dfm.notifyOp(new JitBranchOp(op, branch));
}
/**
* Record a conditional branch op into the use-def graph
*
* <p>
* While we can lookup the {@link Branch} target as in
* {@link #executeBranch(PcodeOp, PcodeFrame)}, we must still obtain the predicate's definition
* and use it.
*
* @param op the op
*/
protected void recordConditionalBranch(PcodeOp op) {
Branch branch = Objects.requireNonNull(branches.get(op));
Varnode condVar = getConditionalBranchPredicate(op);
JitVal cond = state.getVar(condVar, reason);
dfm.notifyOp(new JitCBranchOp(op, branch, cond));
}
/**
* Record an indirect branch op into the use-def graph
*
* <p>
* The {@link IndBranch} will have the target decode context, but the address is dynamic. We
* have to obtain the target varnode's definition and use it.
*
* @param op the op
*/
protected void recordIndirectBranch(PcodeOp op) {
Varnode offVar = getIndirectBranchTarget(op);
JitVal offset = state.getVar(offVar, reason);
IndBranch branch = (IndBranch) Objects.requireNonNull(branches.get(op));
dfm.notifyOp(new JitBranchIndOp(op, offset, branch));
}
@Override
public void executeBranch(PcodeOp op, PcodeFrame frame) {
recordBranch(op);
}
@Override
public void executeConditionalBranch(PcodeOp op, PcodeFrame frame) {
recordConditionalBranch(op);
}
@Override
public void executeIndirectBranch(PcodeOp op, PcodeFrame frame) {
recordIndirectBranch(op);
}
@Override
public void executeCall(PcodeOp op, PcodeFrame frame, PcodeUseropLibrary<JitVal> library) {
recordBranch(op);
}
@Override
public void executeIndirectCall(PcodeOp op, PcodeFrame frame) {
recordIndirectBranch(op);
}
@Override
public void executeReturn(PcodeOp op, PcodeFrame frame) {
recordIndirectBranch(op);
}
@Override
protected void badOp(PcodeOp op) {
dfm.notifyOp(JitOp.stubOp(op));
}
@Override
protected void onMissingUseropDef(PcodeOp op, PcodeFrame frame, String opName,
PcodeUseropLibrary<JitVal> library) {
dfm.notifyOp(new JitCallOtherMissingOp(op, opName));
}
}

View file

@ -0,0 +1,647 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.io.*;
import java.util.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.lifecycle.Internal;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.JitPassage;
import ghidra.pcode.emu.jit.JitPassage.DecodedPcodeOp;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
import ghidra.pcode.emu.jit.var.JitVal.ValUse;
import ghidra.pcode.exec.PcodeExecutorState;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.pcode.exec.PcodeProgram;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Register;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The data flow analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Data Flow Analysis phase of the {@link JitCompiler}. The result is a use-def
* graph. The graph follows Static Single Assignment (SSA) form, in that each definition of a
* variable, even if it's at the same address as a previous definition, is given a unique
* identifier. The graph is bipartite with {@link JitOp ops} on one side and {@link JitVal values}
* on the other. Please node the distinction between a <em>varnode</em> and a <em>variable</em> in
* this context. A <em>varnode</em> refers to the address and size in the machine's state. For
* better or for worse, this is often referred to as a "variable" in other contexts. A
* <em>variable</em> in the SSA sense is a unique "instance" of a varnode with precisely one
* <em>definition</em>. Consider the following x86 assembly:
*
* <pre>
* MOV RAX, qword ptr [...]
* ADD RAX, RDX
* MOV qword ptr [...], RAX
* </pre>
*
* <p>
* Ignoring RAM, there are two varnodes at play, named for the registers they represent: {@code RAX}
* and {@code RDX}. However, there are three variables. The first is an instance of {@code RAX},
* defined by the first {@code MOV} instruction. The second is an instance of {@code RDX}, which is
* implicitly defined as an input to the passage. The third is another instance of of {@code RAX},
* defined by the {@code ADD} instruction. These could be given unique names
* {@code RAX}<sub>1</sub>, {@code RDX}<sub>in</sub>, and {@code RAX}<sub>2</sub>, respectively.
* Thus, the {@code ADD} instruction uses {@code RAX}<sub>1</sub> and {@code RDX}<sub>in</sub>, to
* define {@code RAX}<sub>2</sub>. The last {@code MOV} instruction uses {@code RAX}<sub>2</sub>. If
* we plot each instruction and variable in a graph, drawing edges for each use and definition, we
* get a use-def graph.
*
* <p>
* Our analysis produces a use-def graph for the passage's p-code (not instructions) in two steps:
* First, we analyze each basic block independently. There are a lot of nuts and bolts in the
* implementation, but the analysis is achieved by straightforward interpretation of each block's
* p-code ops. Second, we connect the blocks' use-def graphs together using phi nodes where
* appropriate, according to the control flow.
*
* <h2>Intra-block analysis</h2>
* <p>
* For each block, we create a p-code interpreter consisting of a {@link JitDataFlowState} and
* {@link JitDataFlowExecutor}. Both are given this model's {@link JitDataFlowArithmetic}, which
* populates the use-def graph. We then feed the block's p-code into the executor. The block gets a
* fresh {@link JitDataFlowState}, so that its result has no dependency on the interpretation of any
* other block, except in the numbering of variable identifiers; those must be unique across the
* model.
*
* <p>
* During interpretation, varnode accesses generate value nodes. When a constant varnode is
* accessed, it simply creates a {@link JitConstVal}. When an op produces an output, it generates a
* {@link JitOutVar} and places it into the interpreter's {@link JitDataFlowState state} for its
* varnode. When a varnode is read, the interpreter examines its state for the last definition. If
* one is found, the variable is returned, its use noted, and nothing new is generated. Otherwise, a
* {@link JitMissingVar} is generated. Note that the interpreter does not track memory variables in
* its state, because the JIT translator does not seek to optimize these. At run time, such accesses
* will affect the emulator's state immediately. Registers and Sleigh uniques, on the other hand,
* are allocated as JVM locals, so we must know how they are used and defined. Direct memory
* accesses generate {@link JitDirectMemoryVar} and {@link JitMemoryOutVar}. Indirect memory
* accesses are denoted by the {@link JitLoadOp load} and {@link JitStoreOp store} op nodes, not as
* variables. There is a dummy {@link JitIndirectMemoryVar} singleton, so that the state can return
* something when the memory address is not fixed.
*
* <h2>Inter-block analysis</h2>
* <p>
* Up to this point, each block's use-def sub-graph is disconnected from the others'. We define each
* {@link JitMissingVar missing} variable generated during block interpretation as a {@link JitPhiOp
* phi} op. A phi op is said to belong to the block that generated the missing variable. We seek
* options for the phi op by examining the block's inward flows. For each source block, we check the
* most recent definition of the sought varnode. If one is present, the option is added to the phi
* op. Otherwise, we create an option by generating another phi op and taking its output. The new
* phi op belongs to the source block, and we recurse to seek its options. If a cycle is
* encountered, or we encounter a block with no inward flows, we do not recurse. An
* {@link JitInputVar input} variable is generated whenever we encounter a passage entry, indicating
* the variable could be defined outside the passage.
*
* <p>
* Note that the resulting phi ops may not adhere precisely to the formal definition of <em>phi
* node</em>. A phi op may have only one option. The recursive part of the option seeking algorithm
* generates chains of phi ops such that an option must come from an immediately upstream block,
* even if that block does not offer a direct definition. This may produce long chains when a
* varnode use is several block flows removed from a possible definition. We had considered
* simplifying/removing single-option phi ops afterward, but we found it too onerous, and the output
* bytecode is not improved. We do not generate bytecode for phi ops; they are synthetic and only
* used for analysis.
*/
public class JitDataFlowModel {
/**
* Create a list of {@link JitTypeBehavior#ANY ANY}s having the same size as the list of values.
*
* @param inVals the values, e.g., of each parameter to a userop
* @return the list
*/
static List<JitTypeBehavior> allAny(List<JitVal> inVals) {
return inVals.stream().map(v -> JitTypeBehavior.ANY).toList();
}
private final JitAnalysisContext context;
private final JitControlFlowModel cfm;
private final JitPassage passage;
private final SleighLanguage language;
private final JitDataFlowArithmetic arithmetic;
private final JitDataFlowUseropLibrary library;
private int nextVarId = 1;
private final List<JitPhiOp> phiNodes = new ArrayList<>();
private final List<JitSyntheticOp> synthNodes = new ArrayList<>();
private final Map<PcodeOp, JitOp> ops = new HashMap<>();
private final Map<JitBlock, JitDataFlowBlockAnalyzer> analyzers = new HashMap<>();
final SequencedSet<JitPhiOp> phiQueue = new LinkedHashSet<>();
/**
* Construct the data flow model.
*
* <p>
* Analysis is performed as part of constructing the model.
*
* @param context the analysis context
* @param cfm the control flow model
*/
public JitDataFlowModel(JitAnalysisContext context, JitControlFlowModel cfm) {
this.context = context;
this.cfm = cfm;
this.passage = context.getPassage();
this.language = context.getLanguage();
this.arithmetic = new JitDataFlowArithmetic(context, this);
this.library = new JitDataFlowUseropLibrary(context, this);
analyze();
}
/**
* Get the model's arithmetic that places p-code ops into the use-def graph
*
* @return the arithmetic
*/
public JitDataFlowArithmetic getArithmetic() {
return arithmetic;
}
/**
* Get a wrapper library that places userop calls into the use-def graph
*
* @return the library
*/
public JitDataFlowUseropLibrary getLibrary() {
return library;
}
/**
* Get all the phi nodes in the use-def graph.
*
* @return the list of phi nodes
*/
public List<JitPhiOp> phiNodes() {
return phiNodes;
}
/**
* Get all the synthetic op nodes in the use-def graph.
*
* @return the list of synthetic op nodes
*/
public List<JitSyntheticOp> synthNodes() {
return synthNodes;
}
/**
* Generate a unique variable identifier
*
* @return the generated identifier
*/
private int nextVarId() {
return nextVarId++;
}
/**
* Generate a new op output variable for eventual placement in the use-def graph
*
* @param out the varnode describing the corresponding {@link PcodeOp}'s
* {@link PcodeOp#getOutput() output}.
* @return the generated variable
* @see JitDataFlowModel
*/
public JitOutVar generateOutVar(Varnode out) {
if (out.isRegister() || out.isUnique()) {
return new JitLocalOutVar(nextVarId(), out);
}
return new JitMemoryOutVar(nextVarId(), out);
}
/**
* Generate a variable representing a direct memory access
*
* @param vn the varnode, which ought to be neither register nor unique
* @return the variable
*/
public JitDirectMemoryVar generateDirectMemoryVar(Varnode vn) {
return new JitDirectMemoryVar(nextVarId(), vn);
}
/**
* Generate a variable representing an indirect memory access
*
* @param space the address space containing the variable, which out to be neither register nor
* unique
* @param offset another variable describing the (dynamic) offset of the variable in the given
* space
* @param size the number of bytes in the variable
* @param quantize true if the offset should be quantized (as in
* {@link PcodeExecutorState#getVar(AddressSpace, Object, int, boolean, Reason)
* getVar}).
* @return the variable
* @see JitIndirectMemoryVar
* @see JitLoadOp
* @see JitStoreOp
* @implNote because the load and store ops already encode these details (except maybe
* {@code quantize}), this just returns a dummy instance.
*/
public JitIndirectMemoryVar generateIndirectMemoryVar(AddressSpace space, JitVal offset,
int size, boolean quantize) {
return JitIndirectMemoryVar.INSTANCE;
}
/**
* Add the given {@link JitOp} to the use-def graph
*
* @param <T> the type of the node
* @param op the op
* @return the same op
* @see JitDataFlowModel
*/
public <T extends JitOp> T notifyOp(T op) {
op.link();
if (op instanceof JitPhiOp phi) {
phiNodes.add(phi);
synthNodes.add(phi);
}
else if (op instanceof JitSyntheticOp synth) {
// Prevent call of .op()
synthNodes.add(synth);
}
else {
ops.put(Objects.requireNonNull(op.op()), op);
}
return op;
}
/**
* Get the use-def op node for the given p-code op
*
* <p>
* NOTE: When used in testing, if the passage is manufactured from a {@link PcodeProgram}, the
* decoder will re-write the p-code ops as {@link DecodedPcodeOp}s. Be sure to pass an op to
* this method that comes from the resulting {@link JitPassage}, not the original program, or
* else this method will certainly return {@code null}.
*
* @param op the p-code op from the source passage
* @return the node from the use-def graph, if present, or {@code null}
*/
public JitOp getJitOp(PcodeOp op) {
return ops.get(op);
}
/**
* Get all the op nodes, whether from a p-code op or synthesized.
*
* @return the ops.
* @see JitDataFlowModel
*/
Collection<JitOp> allOps() {
Set<JitOp> all = new LinkedHashSet<>();
all.addAll(ops.values());
all.addAll(synthNodes);
return all;
}
/**
* An upward graph traversal for collecting all values in the use-def graph.
*
* @see JitAnalysisContext#allValues()
* @see JitAnalysisContext#allValuesSorted()
*/
protected class ValCollector extends HashSet<JitVal> implements JitOpUpwardVisitor {
public ValCollector() {
for (PcodeOp op : passage.getCode()) {
JitOp jitOp = getJitOp(op);
visitOp(jitOp);
if (jitOp instanceof JitDefOp defOp) {
visitVal(defOp.out());
}
}
}
@Override
public void visitVal(JitVal v) {
if (!add(v)) {
return;
}
JitOpUpwardVisitor.super.visitVal(v);
}
}
/**
* Get all values (and variables) in the use-def graph
*
* @return the set of values
*/
public Set<JitVal> allValues() {
return new ValCollector();
}
/**
* Get the sort key of a given value. Variables get their ID, constants get -2.
*
* @param v the value
* @return the sort key
*/
int idOfVal(JitVal v) {
return v instanceof JitVar vv ? vv.id() : -2;
}
/**
* Same as {@link #allValues()}, but sorted by ID with constants at the top
*
* @return the list of values
*/
public List<JitVal> allValuesSorted() {
return allValues().stream().sorted(Comparator.comparing(this::idOfVal)).toList();
}
protected JitDataFlowBlockAnalyzer getOrCreateAnalyzer(JitBlock block) {
return analyzers.computeIfAbsent(block,
b -> new JitDataFlowBlockAnalyzer(context, this, b));
}
/**
* Get the per-block data flow analyzer for the given basic block
*
* @param block the block
* @return the analyzer
*/
public JitDataFlowBlockAnalyzer getAnalyzer(JitBlock block) {
return analyzers.get(block);
}
/**
* Construct the use-def graph
*/
protected void analyze() {
/**
* Just visit the blocks in any order. Use input placeholders and glue them together
* afterward.
*
* I considered unrolling each loop at least once to avoid certain multi-equals stuff. I
* don't think that'll be necessary. If we pre-load the registers into local variables, then
* we'll always be reading and writing to those locals, so no worries about multi-equals.
*/
for (JitBlock block : cfm.getBlocks()) {
getOrCreateAnalyzer(block).doIntrablock();
}
/**
* Now, work out the inter-block flows.
*/
analyzeInterblock(phiNodes);
}
/**
* Perform the inter-block analysis.
*
* <p>
* This is called by {@link #analyze()} after intra-block analysis.
*
* @implNote This may be called a second time by the {@link JitOpUseModel}, since a variable's
* definition may be several block flows removed from its retirement, which counts as
* a use.
*
* @see JitVarScopeModel
* @see JitOpUseModel
*/
void analyzeInterblock(Collection<JitPhiOp> phis) {
phiQueue.addAll(phis);
while (!phiQueue.isEmpty()) {
JitPhiOp phi = phiQueue.removeFirst();
JitDataFlowBlockAnalyzer analyzer = getOrCreateAnalyzer(phi.block());
analyzer.fillPhiFromDeps(phi);
}
}
/**
* For testing: Get the value(s) in (or intersecting) the given register defined by the given
* block
*
* @param block the block whose p-code to consider
* @param register the register to examine
* @return the list of values (usually variables)
*/
@Internal
List<JitVal> getOutput(JitBlock block, Register register) {
return getAnalyzer(block).getOutput(register);
}
/**
* For diagnostics: Dump the analysis result to stderr
*
* @see Diag#PRINT_DFM
*/
public void dumpResult() {
System.err.println("STAGE: DataFlow");
for (JitBlock block : cfm.getBlocks()) {
System.err.println(" Block: " + block);
for (PcodeOp op : block.getCode()) {
System.err.println(" %s: %s".formatted(op.getSeqnum(), getJitOp(op)));
}
}
}
/**
* For diagnostics: Dump the synthetic ops to stderr
*
* @see Diag#PRINT_SYNTH
*/
public void dumpSynth() {
System.err.println("SYNTHETIC OPS");
for (JitSyntheticOp synthOp : synthNodes) {
System.err.println(" " + synthOp);
}
}
/**
* A diagnostic tool for visualizing the use-def graph.
*
* <p>
* NOTE: This is only as complete as it needed to be for me to diagnose whatever issue I was
* having at the time.
*
* @see JitAnalysisContext#exportGraphviz(File)
*/
protected class GraphvizExporter implements JitOpUpwardVisitor {
final PrintWriter out;
final Set<JitVar> vars = new HashSet<>();
final Set<JitOp> ops = new HashSet<>();
public GraphvizExporter(File outFile) {
try (FileOutputStream outStream = new FileOutputStream(outFile);
PrintWriter out = new PrintWriter(outStream)) {
this.out = out;
out.println("digraph DataFlow {");
for (PcodeOp op : passage.getCode()) {
JitOp jitOp = getJitOp(op);
if (jitOp instanceof JitDefOp defOp) {
// Because of direction of visit
visitVal(defOp.out());
}
else {
visitOp(jitOp);
}
}
out.println("}");
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
String opLabel(JitOp op) {
return switch (op) {
case null -> "null";
//case JitSyntheticOp synth -> synth.getClass().getSimpleName();
//default -> op.op().toString();
default -> "%s\n%x".formatted(op.getClass().getSimpleName(),
System.identityHashCode(op));
};
}
@Override
public void visitOp(JitOp op) {
if (!ops.add(op)) {
return;
}
out.println("""
"op%x" [
label = "%s"
shape = "ellipse"
];
""".formatted(
System.identityHashCode(op),
opLabel(op)));
if (op == null) {
return;
}
int i = 0;
for (JitVal input : op.inputs()) {
i++;
if (input instanceof JitVar iv) {
out.println("""
"var%d" -> "op%x" [
headlabel = "[%d]"
];
""".formatted(
iv.id(),
System.identityHashCode(op),
i));
}
else {
out.println("""
"val%x" -> "op%x" [
headlabel = "[%d]"
];
""".formatted(
System.identityHashCode(input),
System.identityHashCode(op),
i));
}
}
if (op instanceof JitDefOp defOp) {
out.println("""
"op%x" -> "var%d" [
taillabel = "out"
];
""".formatted(
System.identityHashCode(op),
defOp.out().id()));
}
JitOpUpwardVisitor.super.visitOp(op);
}
String varLabel(JitVar v) {
return switch (v) {
case JitVarnodeVar vv -> "%s\n%d".formatted(vv.varnode().toString(language),
v.id());
default -> throw new AssertionError();
};
}
@Override
public void visitVal(JitVal v) {
final String name;
final String label;
if (v instanceof JitVar vv) {
if (!vars.add(vv)) {
return;
}
name = "var%d".formatted(vv.id());
label = varLabel(vv);
}
else if (v instanceof JitConstVal cv) {
name = "val%x".formatted(System.identityHashCode(cv));
label = cv.value().toString();
}
else {
throw new AssertionError();
}
out.println("""
"%s" [
label = "%s"
shape = "box"
];
""".formatted(name, label));
for (ValUse use : v.uses()) {
out.println("""
"%s" -> "op%x" [
dir = "back"
arrowhead = "none"
arrowtail = "crow"
taillabel = "use"
];
""".formatted(name, System.identityHashCode(use.op())));
}
if (v instanceof JitOutVar ov) {
out.println("""
"op%x" -> "%s" [
dir = "back"
arrowhead = "none"
arrowtail = "crow"
taillabel = "def"
];
""".formatted(System.identityHashCode(ov.definition()), name));
}
JitOpUpwardVisitor.super.visitVal(v);
}
}
/**
* Generate a graphviz .dot file to visualize the use-def graph.
*
* <p>
* <b>WARNING:</b> This is an internal diagnostic that is only as complete as it needed to be.
*
* @param file the output file
*/
@Internal // for diagnostics
public void exportGraphviz(File file) {
new GraphvizExporter(file);
}
}

View file

@ -0,0 +1,572 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import java.util.Map.Entry;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
import ghidra.pcode.exec.PcodeArithmetic.Purpose;
import ghidra.pcode.exec.PcodeExecutorState;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.Register;
import ghidra.program.model.mem.MemBuffer;
import ghidra.program.model.pcode.Varnode;
import ghidra.util.Msg;
/**
* An implementation of {@link PcodeExecutorState} for per-block data flow interpretation
*
* <p>
* In p-code interpretation, this interface's purpose is to store the current value of varnodes in
* the emulation/interpretation state. Here we implement it using {@code T:=}{@link JitVal}, and
* track the latest variable definition of vanodes in the data flow interpretation. The adaptation
* is fairly straightforward, except when varnode accesses do not match their latest definitions
* exactly, e.g., an access of {@code EAX} when the latest definition is for {@code RAX}. Thus, this
* state object may synthesize {@link JitSynthSubPieceOp subpiece} and {@link JitCatenateOp
* catenate} ops to model the "off-cut" use of one or more such definitions. Additionally, in
* preparation for inter-block data flow analysis, when no definition is present for a varnode (or
* part of a varnode) access, this state will synthesize {@link JitPhiOp phi} ops. See
* {@link #setVar(AddressSpace, JitVal, int, boolean, JitVal) setVar} and
* {@link #getVar(AddressSpace, JitVal, int, boolean, Reason) getVar} for details.
*
* <p>
* This state only serves to analyze data flow through register and unique variables. Because we
* know these are only accessible to the thread, we stand to save much execution time by bypassing
* the {@link JitBytesPcodeExecutorState} at run time. We can accomplish this by mapping these
* variables to suitable JVM local variables. Thus, we have one map of entries for register space
* and another for unique space. Accesses to other spaces do not mutate or read from either of those
* maps, but this class may generate a suitable {@link JitVal} for the use-def graph.
*/
public class JitDataFlowState implements PcodeExecutorState<JitVal> {
/**
* A minimal data flow machine state that can be captured by a {@link JitCallOtherOpIf}.
*/
public class MiniDFState {
private final NavigableMap<Long, JitVal> uniqMap;
private final NavigableMap<Long, JitVal> regMap;
MiniDFState() {
this(new TreeMap<>(), new TreeMap<>());
}
MiniDFState(NavigableMap<Long, JitVal> uniqMap, NavigableMap<Long, JitVal> regMap) {
this.uniqMap = uniqMap;
this.regMap = regMap;
}
NavigableMap<Long, JitVal> mapFor(AddressSpace space) {
if (space.isUniqueSpace()) {
return uniqMap;
}
if (space.isRegisterSpace()) {
return regMap;
}
return null;
}
/**
* Compute the upper (exclusive) offset of a given definition entry
*
* @param entry the entry
* @return the upper offset, exclusive
*/
protected static long endOf(Entry<Long, JitVal> entry) {
return entry.getKey() + entry.getValue().size();
}
/**
* Clear all definition entries in the given per-space map for the given varnode
*
* <p>
* Any entries completely covered by the given varnode (including an exact match) are
* removed from the map. Those partially covered will be replaced by subpieces of their
* former selves such that no part within the cleared varnode remains defined.
*
* @param map the map to modify
* @param varnode the varnode whose definitions to remove
*/
protected void doClear(NavigableMap<Long, JitVal> map, Varnode varnode) {
AddressSpace space = varnode.getAddress().getAddressSpace();
long offset = varnode.getOffset();
int size = varnode.getSize();
Entry<Long, JitVal> truncLeftEntry = map.lowerEntry(offset);
if (truncLeftEntry != null && endOf(truncLeftEntry) <= offset) {
truncLeftEntry = null;
}
/**
* Collect entry at both ends before removal, in case the clear is cutting a hole in the
* middle of one entry. I.e., could be the same entry at both ends.
*/
long end = offset + size;
Entry<Long, JitVal> truncRightEntry = map.lowerEntry(end);
if (truncRightEntry != null && endOf(truncRightEntry) <= end) {
truncRightEntry = null;
}
/**
* Replace the right entry first. If it's the same entry as the left, and we remove by
* key, then we might remove the replacement on the left, if it were done first.
*/
if (truncRightEntry != null) {
long entStart = truncRightEntry.getKey();
map.remove(entStart);
int shave = (int) (endOf(truncRightEntry) - end);
JitVal entVal = truncRightEntry.getValue();
Varnode truncVn = new Varnode(space.getAddress(entStart), entVal.size());
JitVal truncVal = arithmetic.truncFromLeft(truncVn, shave, entVal);
map.put(end, truncVal);
}
if (truncLeftEntry != null) {
long entStart = truncLeftEntry.getKey();
map.remove(entStart);
int shave = (int) (endOf(truncLeftEntry) - offset);
JitVal entVal = truncLeftEntry.getValue();
Varnode truncVn = new Varnode(space.getAddress(entStart), entVal.size());
JitVal truncVal = arithmetic.truncFromRight(truncVn, shave, entVal);
map.put(truncLeftEntry.getKey(), truncVal);
}
/**
* At this point, no part of the ends should be in the key range [start,end), so clear
* that submap
*/
map.subMap(offset, end).clear();
}
/**
* The implementation of {@link #set(Varnode, JitVal)} for a given address space
*
* @param map the map to modify for the given space
* @param varnode the varnode whose value to define
* @param val the varnode's new definition
*/
protected void doSet(NavigableMap<Long, JitVal> map, Varnode varnode, JitVal val) {
doClear(map, varnode);
if (val instanceof JitOutVar out) {
if (out.definition() instanceof JitCatenateOp cat) {
int cursor = 0;
for (JitVal part : cat.iterParts(language.isBigEndian())) {
/**
* NOTE: Do not filter phi nodes here. Perhaps if we're certain its for the
* same varnode we could, but not sure there's any benefit to doing so.
* TODO: Determine whether there's any benefit. NOTE: While the phi nodes
* are linked after the fact, they are generated (but empty) during p-code
* interpretation.
*/
map.put(varnode.getOffset() + cursor, part);
cursor += part.size();
}
/**
* Can't necessarily unlink cat here. Something else may use it. May need to
* prune afterward.
*/
return;
}
}
map.put(varnode.getOffset(), val);
}
/**
* Set one or more definition entries in the given map for the given varnode to the given
* value
*
* <p>
* Ordinary, this just sets the one varnode to the given value; however, if the given value
* is the output of a {@link JitCatenateOp catenation}, then each input part is entered into
* the map separately, and the synthetic catenation dropped. The behavior avoids nested
* catenations.
*
* @param varnode the varnode
* @param val the value
*/
public void set(Varnode varnode, JitVal val) {
var map = mapFor(varnode.getAddress().getAddressSpace());
if (map == null) {
return;
}
doSet(map, varnode, val);
}
/**
* The implementation of {@link #getDefinitions(AddressSpace, long, int)} for a given
* address space
*
* @param map the map of values for the given space
* @param space the address space
* @param offset the offset within the space
* @param size the size of the varnode
* @return the list of values
*/
protected List<JitVal> doGetDefinitions(NavigableMap<Long, JitVal> map, AddressSpace space,
long offset, int size) {
List<JitVal> result = new ArrayList<>();
Entry<Long, JitVal> preEntry = map.lowerEntry(offset);
long cursor = offset;
if (preEntry != null) {
if (endOf(preEntry) > offset) {
JitVal preVal = preEntry.getValue();
Varnode preVn = new Varnode(space.getAddress(preEntry.getKey()), preVal.size());
int shave = (int) (offset - preEntry.getKey());
JitVal truncVal = arithmetic.truncFromLeft(preVn, shave, preVal);
cursor = endOf(preEntry);
result.add(truncVal);
}
}
long end = offset + size;
for (Entry<Long, JitVal> entry : map.subMap(offset, end).entrySet()) {
if (entry.getKey() > cursor) {
result.add(new JitMissingVar(
new Varnode(space.getAddress(cursor), (int) (entry.getKey() - cursor))));
}
if (endOf(entry) > end) {
JitVal postVal = entry.getValue();
Varnode postVn = new Varnode(space.getAddress(entry.getKey()), postVal.size());
int shave = (int) (endOf(entry) - end);
JitVal truncVal = arithmetic.truncFromRight(postVn, shave, postVal);
cursor = end;
result.add(truncVal);
break;
}
result.add(entry.getValue());
cursor = endOf(entry);
}
if (end > cursor) {
result.add(
new JitMissingVar(new Varnode(space.getAddress(cursor), (int) (end - cursor))));
}
assert !result.isEmpty();
return result;
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* <p>
* In the simplest case, the list consists of exactly one SSA variable whose varnode exactly
* matches that requested. In other cases, e.g., when only a subregister is defined, the
* list may have several entries, some of which may be {@link JitMissingVar missing}.
*
* <p>
* The list is ordered according to machine endianness. That is for little endian, the
* values are ordered from least to most significant parts of the varnode defined. This is
* congruent with how {@link JitDataFlowArithmetic#catenate(Varnode, List)} expects parts to
* be listed.
*
* @param space the address space of the varnode
* @param offset the offset of the varnode
* @param size the size in bytes of the varnode
* @return the list of values
*/
public List<JitVal> getDefinitions(AddressSpace space, long offset, int size) {
var map = mapFor(space);
if (map == null) {
throw new AssertionError("What is this space?: " + space);
}
return doGetDefinitions(map, space, offset, size);
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see #getDefinitions(AddressSpace, long, int)
* @param varnode the varnode whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Varnode varnode) {
AddressSpace space = varnode.getAddress().getAddressSpace();
return getDefinitions(space, varnode.getOffset(), varnode.getSize());
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see #getDefinitions(AddressSpace, long, int)
* @param register the register whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Register register) {
return getDefinitions(register.getAddressSpace(), register.getOffset(),
register.getNumBytes());
}
/**
* Replace missing variables with phi nodes, mutating the given list in place
*
* @param defs the definitions
* @return the same list, modified
*/
protected List<JitVal> generatePhis(List<JitVal> defs, Collection<JitPhiOp> phiQueue) {
int n = defs.size();
for (int i = 0; i < n; i++) {
JitVal v = defs.get(i);
if (v instanceof JitMissingVar missing) {
JitPhiOp phi = missing.generatePhi(dfm, block);
if (phiQueue != null) {
phiQueue.add(phi);
}
defs.set(i, phi.out());
set(missing.varnode(), phi.out());
}
}
return defs;
}
/**
* Get the value of the given varnode
*
* <p>
* This is the implementation of
* {@link JitDataFlowState#getVar(AddressSpace, JitVal, int, boolean, Reason)}, but only for
* uniques and registers.
*
* @param varnode the varnode
* @return the value
*/
public JitVal getVar(Varnode varnode) {
List<JitVal> defs = generatePhis(getDefinitions(varnode), null);
if (defs.size() == 1) {
return defs.get(0);
}
return arithmetic.catenate(varnode, defs);
}
/**
* Copy this mini state
*
* @return the copy
*/
public MiniDFState copy() {
return new MiniDFState(new TreeMap<>(uniqMap), new TreeMap<>(regMap));
}
}
private final JitDataFlowModel dfm;
private final JitBlock block;
private final Language language;
private final JitDataFlowArithmetic arithmetic;
private final MiniDFState mini = new MiniDFState();
private final Set<Varnode> varnodesRead = new HashSet<>();
private final Set<Varnode> varnodesWritten = new HashSet<>();
/**
* Construct a state
*
* @param context the analysis context
* @param dfm the data flow model whose use-def graph to populate
* @param block the block being analyzed (to which generated phi ops belong)
*/
JitDataFlowState(JitAnalysisContext context, JitDataFlowModel dfm, JitBlock block) {
this.dfm = dfm;
this.block = block;
this.language = context.getLanguage();
this.arithmetic = dfm.getArithmetic();
}
@Override
public Language getLanguage() {
return language;
}
@Override
public JitDataFlowArithmetic getArithmetic() {
return arithmetic;
}
/**
* {@inheritDoc}
*
* <p>
* This and {@link #getVar(AddressSpace, JitVal, int, boolean, Reason)} are where we connect the
* interpretation to the maps of definitions we keep in this state. We examine the varnode's
* type first. We can't write to a constant, and that shouldn't be allowed anyway, so we warn if
* we observe that. We'll ignore any indirect writes, because those are denoted by
* {@link JitStoreOp store} ops. We also don't do much here with direct writes. The writes to
* such variables are handled by {@link JitMemoryOutVar}. Such output variables are actually
* passed in as {@code val} here, but need only be stored in a map if they are register or
* unique variables.
*/
@Override
public void setVar(AddressSpace space, JitVal offset, int size, boolean quantize,
JitVal val) {
/**
* We use this only to log possible storage bypasses. All uniques will be bypassed.
* Registers must be written, but it is safe to bypass subsequent loads. Actually, perhaps
* with a pre-load of register values and a try-finally to write them, we can optimize
* register access, too. Might also make sense to do that for uniques, just for debugging
* purposes.
*
* Memory must be written. Unless we can determine for sure the memory is non-volatile, we
* must presume volatile, so no bypassing is allowed. TODO: We might consider assuming
* stack-based accesses are non-volatile, but I'm not sure that is appropriate either.
* Technically one thread may launch another, providing a ref to a stack variable it knows
* will live for the entire thread's life.
*/
if (space.isConstantSpace()) {
Msg.warn(this, "Witnessed write to constant space! Ignoring.");
return;
}
if (!(offset instanceof JitConstVal c)) {
// Don't attempt bypass for any indirect memory access
return;
}
// NB. There should never be need to quantize in regs or uniqs.
Varnode varnode = new Varnode(space.getAddress(c.value().longValue()), size);
varnodesWritten.add(varnode);
mini.set(varnode, val);
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see MiniDFState#getDefinitions(AddressSpace, long, int)
* @param varnode the varnode whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Varnode varnode) {
return mini.getDefinitions(varnode);
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see MiniDFState#getDefinitions(AddressSpace, long, int)
* @param register the register whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Register register) {
return mini.getDefinitions(register);
}
/**
* Replace missing variables with phi nodes, mutating the given list in place
*
* @param defs the definitions
* @return the same list, modified
*/
List<JitVal> generatePhis(List<JitVal> defs, SequencedSet<JitPhiOp> phiQueue) {
return mini.generatePhis(defs, phiQueue);
}
/**
* {@inheritDoc}
*
* <p>
* This and {@link #setVar(AddressSpace, JitVal, int, boolean, JitVal)} are where we connect the
* interpretation to the maps of definitions we keep in this state. We examine the varnode's
* type first. If it's a constant or memory variable, it just returns the appropriate
* {@link JitConstVal}, {@link JitDirectMemoryVar}, or {@link JitIndirectMemoryVar}. If it's a
* register or unique, then we retrieve the latest definition(s) as in
* {@link MiniDFState#getDefinitions(AddressSpace, long, int)}. In the simple case of an exact
* definition, we return it. Otherwise, this synthesizes the appropriate op(s), enters them into
* the use-def graph, and returns the final output.
*/
@Override
public JitVal getVar(AddressSpace space, JitVal offset, int size, boolean quantize,
Reason reason) {
if (space.isConstantSpace()) {
if (!(offset instanceof JitConstVal c)) {
throw new AssertionError("Non-constant constant?");
}
if (c.size() == size) {
return offset;
}
return new JitConstVal(size, c.value());
}
if (space.isMemorySpace()) {
if (offset instanceof JitConstVal c) {
Varnode vn = new Varnode(space.getAddress(c.value().longValue()), size);
return dfm.generateDirectMemoryVar(vn);
}
return dfm.generateIndirectMemoryVar(space, offset, size, quantize);
}
if (!(offset instanceof JitConstVal c)) {
throw new AssertionError("Indirect non-memory access?");
}
Varnode varnode = new Varnode(space.getAddress(c.value().longValue()), size);
varnodesRead.add(varnode);
return mini.getVar(varnode);
}
@Override
public Map<Register, JitVal> getRegisterValues() {
throw new UnsupportedOperationException();
}
@Override
public MemBuffer getConcreteBuffer(Address address, Purpose purpose) {
throw new UnsupportedOperationException();
}
@Override
public void clear() {
throw new UnsupportedOperationException();
}
@Override
public PcodeExecutorState<JitVal> fork() {
throw new UnsupportedOperationException();
}
/**
* Get a complete catalog of all varnodes read, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesRead() {
return varnodesRead;
}
/**
* Get a complete catalog of all varnodes written, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesWritten() {
return varnodesWritten;
}
/**
* Capture the current state of intra-block analysis.
*
* <p>
* This may be required for follow-up op-use analysis by a {@link JitCallOtherOpIf} invoked
* using the standard strategy. All live varnodes <em>at the time of the call</em> must be
* considered used.
*
* @return the captured state
*/
public MiniDFState captureState() {
return mini.copy();
}
}

View file

@ -0,0 +1,275 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.lang.reflect.Method;
import java.lang.reflect.Parameter;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.decode.DecoderUseropLibrary;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitOutVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.AnnotatedPcodeUseropLibrary.PcodeUserop;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* A wrapper around a userop library that places {@link PcodeOp#CALLOTHER callother} ops into the
* use-def graph
*
* <p>
* This is the library provided to
* {@link JitDataFlowExecutor#execute(PcodeProgram, PcodeUseropLibrary)} to cooperate with in the
* population of the use-def graph. The Sleigh compiler is very permissive when it comes to userop
* invocations. Notably, there's no way to declare the "prototype" or "signature" of the userop.
* Invocations can have any number of input operands and an optional output operand. Because the
* use-def graph takes careful notice of variables and their definiting ops, there are two possible
* op nodes: {@link JitCallOtherOp} when no output operand is given and {@link JitCallOtherDefOp}
* when an output operand is given.
*
* <p>
* We employ several different strategies to handle a p-code userop:
*
* <ul>
* <li><b>Standard</b>: Invocation of the userop in the same fashion as the interpreted p-code
* emulator. Any live variables have to be written into the {@link JitBytesPcodeExecutorState state}
* before the invocation and the read back out afterward. If the userop accesses the state directly,
* we must use this strategy. Most userops whose implementations precede the introduction of JIT
* acceleration can be supported with this strategy, so long as they don't manipulate the
* emulator/executor directly is some unsupported way.</li>
* <li><b>Inlining</b>: The inclusion of the userop's p-code directly at its call site, replacing
* the {@link PcodeOp#CALLOTHER} op. This is implemented in the decoder by
* {@link DecoderUseropLibrary}. This strategy is only applicable to userops defined using Sleigh
* and/or p-code.</li>
* <li><b>Direct</b>: The direct invocation of the userop's defining Java method in the generated
* JVM bytecode. This is applicable when the method's parameters and return type are primitives that
* each map to a {@link JitTypeBehavior}. The input values can be passed directly in, which works
* well when the inputs are registers or uniques allocated in JVM locals. The return value can be
* handled similarly.</li>
* </ul>
*
* <p>
* The default strategy for all userops is Standard. Implementors should set the attributes of
* {@link PcodeUserop} and adjust the parameters of the userop's method accordingly. To allow
* inlining, set {@link PcodeUserop#canInline() canInline}. To allow direct invocation, set
* {@link PcodeUserop#functional()} and ensure all the parameter types and return type are
* supported. Supported types include primitives other than {@code char}. The return type may be
* {@code void}. No matter the strategy, userops may be subject to removal by the
* {@link JitOpUseModel}. To permit removal, clear {@link PcodeUserop#hasSideEffects()}. The default
* prevents removal. For the inline strategy, each op from the inlined userop is analyzed
* separately, so the userop could be partially culled. An inlined userop cannot have side effects,
* and so the attribute is ignored.
*/
public class JitDataFlowUseropLibrary implements PcodeUseropLibrary<JitVal> {
/**
* The wrapper of a specific userop definition
*/
protected class WrappedUseropDefinition implements PcodeUseropDefinition<JitVal> {
private final PcodeUseropDefinition<Object> decOp;
public WrappedUseropDefinition(PcodeUseropDefinition<Object> decOp) {
this.decOp = decOp;
}
@Override
public String getName() {
return decOp.getName();
}
@Override
public int getInputCount() {
return decOp.getInputCount();
}
@Override
public void execute(PcodeExecutor<JitVal> executor, PcodeUseropLibrary<JitVal> library,
Varnode outVar, List<Varnode> inVars) {
throw new AssertionError();
}
/**
* If the number of arguments matches the userop's Java method, map each argument value to
* the type behavior for its corresponding parameter.
*
* <p>
* This is used by the {@link JitTypeModel} to assign types to JVM locals in order to reduce
* the number of type casts. In the case of direct invocation, this enters type information
* from the userop's Java definition into the analysis.
*
* <p>
* If the parameter count doesn't match, we just map the arguments to
* {@link JitTypeBehavior#ANY} and let the error surface at run time. We need not throw the
* exception until/unless the invocation is actually executed. Similarly, if any parameter's
* type is not supported, or the userop cannot be invoked directly, we just map all
* arguments to {@link JitTypeBehavior#ANY}, because the generator will apply standard
* invocation, which does not benefit from type analysis.
*
* @param inVals the input arguments
* @return the map from argument value (SSA variable) to parameter type behavior
*/
private List<JitTypeBehavior> getInputTypes(List<JitVal> inVals) {
int inputCount = getInputCount();
if (inputCount != inVals.size()) { // includes inputCount == -1 (variadic)
return JitDataFlowModel.allAny(inVals);
}
Method method = decOp.getJavaMethod();
if (method == null) {
return JitDataFlowModel.allAny(inVals);
}
List<JitTypeBehavior> result = new ArrayList<>();
Parameter[] parameters = method.getParameters();
for (int i = 0; i < inVals.size(); i++) {
Parameter p = parameters[i];
JitTypeBehavior type = JitTypeBehavior.forJavaType(p.getType());
if (type == null) {
return JitDataFlowModel.allAny(inVals);
}
result.add(type);
}
return Collections.unmodifiableList(result);
}
/**
* Get the type behavior from the userop's Java method
*
* <p>
* If the userop is not backed by a Java method, or its return type is not supported, this
* return {@link JitTypeBehavior#ANY}.
*
* @return the type behavior
*/
private JitTypeBehavior getReturnType() {
Method method = decOp.getJavaMethod();
if (method == null) {
return JitTypeBehavior.ANY;
}
return JitTypeBehavior.forJavaType(method.getReturnType());
}
/**
* {@inheritDoc}
*
* <p>
* This "execution" is part of the intra-block analysis. This is the analytic interpretation
* of the invocation, not the actual run time invocation. This derives type information
* about the userop from the Java method and selects the approparite {@link JitCallOtherOpIf
* callother} op to enter into the use-def graph. If an output operand is given, then this
* generates an output notes defined by a {@lnk JitCallOtherDefOp}. Otherwise, it generates
* a (sink) {@link JitCallOtherOp}.
*
* @implNote When inlining a userop, the decoder leaves the original callother op in place.
* This is for branch bookkeeping. Thus, we ask the decoder-wrapped version of the
* userop if it was inlined. If so, we enter a {@link JitNopOp nop} node into the
* use-def graph. The node will still contain the original callother op, but the
* generator will not emit any code.
* @implNote <b>TODO</b>: Maybe float types shouldn't be size cast as ints and then bitcast
* to the requested type. Either that, or we need to develop an overloading system
* for userops, or to require the user to be very careful about which to invoke
* for what (float) operand sizes. <b>TODO</b>: I don't know what the actual
* behavior is here. We should add test cases for this.
* @implNote <b>TODO</b>: I think userop libraries may need to be able to hook this point.
* Not sure to what extent we should allow them control of code generation. But
* consider a syscall library. It might like to try to concretize, e.g., RAX, and
* just hard code the invoked userop in the generated code.
*/
@Override
public void execute(PcodeExecutor<JitVal> executor, PcodeUseropLibrary<JitVal> library,
PcodeOp op) {
if (decOp.canInlinePcode()) {
dfm.notifyOp(new JitNopOp(op));
return;
}
JitDataFlowState state = (JitDataFlowState) executor.getState();
List<JitVal> inVals = Stream.of(op.getInputs())
.skip(1)
.map(inVn -> state.getVar(inVn, executor.getReason()))
.toList();
List<JitTypeBehavior> inTypes = getInputTypes(inVals);
Varnode outVn = op.getOutput();
if (outVn == null) {
dfm.notifyOp(new JitCallOtherOp(op, decOp, inVals, inTypes, state.captureState()));
}
else {
JitOutVar out = dfm.generateOutVar(outVn);
dfm.notifyOp(new JitCallOtherDefOp(op, out, getReturnType(), decOp, inVals, inTypes,
state.captureState()));
state.setVar(outVn, out);
}
}
@Override
public boolean isFunctional() {
return decOp.isFunctional();
}
@Override
public boolean hasSideEffects() {
return decOp.hasSideEffects();
}
@Override
public boolean canInlinePcode() {
return decOp.canInlinePcode();
}
@Override
public Method getJavaMethod() {
return decOp.getJavaMethod();
}
@Override
public PcodeUseropLibrary<?> getDefiningLibrary() {
return decOp.getDefiningLibrary();
}
}
private final JitDataFlowModel dfm;
private final Map<String, PcodeUseropDefinition<JitVal>> userops;
/**
* Construct a wrapper library
*
* @param context the context from which the decoder's userop wrapper library is retrieved
* @param dfm the data flow model whose use-def graph to populate.
* @implNote Each time this is constructed, it has to traverse the wrapped userop library and
* create a wrapper for each individual userop. For a large library, this could get
* expensive, and it currently must happen for every passage compiled. Part of the
* cause for this requirement is the reference to the data flow mode used by each
* userop wrapper.
*/
public JitDataFlowUseropLibrary(JitAnalysisContext context, JitDataFlowModel dfm) {
this.dfm = dfm;
this.userops = context.getPassage()
.getDecodeLibrary()
.getUserops()
.values()
.stream()
.map(WrappedUseropDefinition::new)
.collect(Collectors.toUnmodifiableMap(d -> d.getName(), d -> d));
}
@Override
public Map<String, PcodeUseropDefinition<JitVal>> getUserops() {
return userops;
}
}

View file

@ -0,0 +1,95 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitOutVar;
import ghidra.pcode.emu.jit.var.JitVal;
/**
* A visitor that traverses the use-def graph upward, that is from uses toward definitions
*/
public interface JitOpUpwardVisitor extends JitOpVisitor {
@Override
default void visitUnOp(JitUnOp op) {
visitVal(op.u());
}
@Override
default void visitBinOp(JitBinOp op) {
visitVal(op.l());
visitVal(op.r());
}
@Override
default void visitStoreOp(JitStoreOp op) {
visitVal(op.offset());
visitVal(op.value());
}
@Override
default void visitLoadOp(JitLoadOp op) {
visitVal(op.offset());
}
@Override
default void visitCallOtherOp(JitCallOtherOp otherOp) {
for (JitVal v : otherOp.args()) {
visitVal(v);
}
}
@Override
default void visitCallOtherDefOp(JitCallOtherDefOp otherOp) {
for (JitVal v : otherOp.args()) {
visitVal(v);
}
}
@Override
default void visitCatenateOp(JitCatenateOp op) {
for (JitVal p : op.parts()) {
visitVal(p);
}
}
@Override
default void visitPhiOp(JitPhiOp op) {
for (JitVal opt : op.options().values()) {
visitVal(opt);
}
}
@Override
default void visitSubPieceOp(JitSynthSubPieceOp op) {
visitVal(op.v());
}
@Override
default void visitCBranchOp(JitCBranchOp op) {
visitVal(op.cond());
}
@Override
default void visitBranchIndOp(JitBranchIndOp op) {
visitVal(op.target());
}
@Override
default void visitOutVar(JitOutVar v) {
visitOp(v.definition());
}
}

View file

@ -0,0 +1,333 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockFlow;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState.MiniDFState;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitMissingVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.AnnotatedPcodeUseropLibrary.PcodeUserop;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The operator output use analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Operation Elimination phase of the {@link JitCompiler} using a simple graph
* traversal. The result is the set of {@link JitOp ops} whose outputs are (or could be) used by a
* downstream op. This includes all "sink" ops and all ops on which they depend.
*
* <p>
* Some of the sink ops are easy to identify. These are ops that have direct effects on memory,
* control flow, or other aspects of the emulated machine:
*
* <ul>
* <li><b>Memory outputs</b> - any p-code op whose output operand is a memory varnode.</li>
* <li><b>Store ops</b> - a {@link JitStoreOp store} op.</li>
* <li><b>Branch ops</b> - one of {@link JitBranchOp branch}, {@link JitCBranchOp cbranch}, or
* {@link JitBranchIndOp branchind}.</li>
* <li><b>User ops with side effects</b> - a {@link JitCallOtherOpIf callother} to a method where
* {@link PcodeUserop#hasSideEffects() hasSideEffects}{@code =true}.</li>
* <li><b>Errors</b> - e.g., {@link JitUnimplementedOp unimplemented}, {@link JitCallOtherMissingOp
* missing userop}.</li>
* </ul>
*
* <p>
* We identify these ops by invoking {@link JitOp#canBeRemoved()}. Ops that return {@code false} are
* "sink" ops.
*
* <p>
* There is another class of ops to consider as "sinks," though: The definitions of SSA variables
* that could be retired. This could be from exiting the passage, flowing to a block with fewer live
* variables, or invoking a userop with the Standard strategy (see
* {@link JitDataFlowUseropLibrary}). Luckily, we have already performed {@link JitVarScopeModel
* scope} analysis, so we already know what varnodes are retired. However, to determine what SSA
* variables are retired, we have to consider where the retirement happens. For block transitions,
* it is always at the end of the block. Thus, we can use
* {@link JitDataFlowBlockAnalyzer#getVar(Varnode)}. For userops, we capture the intra-block
* analysis state into {@link JitCallOtherOpIf#dfState()} <em>at the time of invocation</em>. We can
* then use {@link MiniDFState#getVar(Varnode)}. The defining op for each retired SSA variable is
* considered used.
*
* <p>
* Retirement due to block flow requires a little more attention. Consider an op that defines a
* variable, where that op exists in a block that ends with a conditional branch. The analyzer does
* not know which flow the code will take, so we have to consider that it could take either. If for
* either branch, the variable goes out of scope and is retired, we have to consider the defining op
* as used.
*
* <p>
* The remainder of the algorithm is simply an upward traversal of the use-def graph to collect all
* of the sink ops' dependencies. All the dependencies are considered used.
*
* @implNote The {@link JitOpUpwardVisitor} permits seeding of values (constants and variables) and
* ops. Thus, we seed using the non-{@link JitOp#canBeRemoved() removable} ops, and the
* retireable SSA variables. We do not have to get the variables' defining ops, since the
* visitor will do that for us.
*/
public class JitOpUseModel {
private final JitAnalysisContext context;
private final JitControlFlowModel cfm;
private final JitDataFlowModel dfm;
private final JitVarScopeModel vsm;
private final Set<JitOp> used = new HashSet<>();
/**
* Construct the operator use model
*
* @param context the analysis context
* @param cfm the control flow model
* @param dfm the data flow model
* @param vsm the variable scope model
*/
public JitOpUseModel(JitAnalysisContext context, JitControlFlowModel cfm,
JitDataFlowModel dfm, JitVarScopeModel vsm) {
this.context = context;
this.cfm = cfm;
this.dfm = dfm;
this.vsm = vsm;
if (context.getConfiguration().removeUnusedOperations()) {
analyze();
}
}
/**
* The implementation of the graph traversal
*
* <p>
* This implements the use-def upward visitor to collect the dependencies of ops and variables
* identified elsewhere in the code. By calling {@link #visitOp(JitOp)},
* {@link #visitVal(JitVal)}, etc., all used ops are collected into {@link JitOpUseModel#used}.
*/
class OpUseCollector implements JitOpUpwardVisitor {
final JitBlock block;
final JitDataFlowBlockAnalyzer analyzer;
/**
* Construct a collector for the given block
*
* @param block the block whose ops are being examined
*/
public OpUseCollector(JitBlock block) {
this.block = block;
this.analyzer = dfm.getAnalyzer(block);
}
@Override
public void visitOp(JitOp op) {
if (!used.add(op)) {
return;
}
JitOpUpwardVisitor.super.visitOp(op);
}
@Override
public void visitMissingVar(JitMissingVar missingVar) {
throw new AssertionError("missing: " + missingVar);
}
/**
* Visit a varnode that could be retired upon exiting a block
*
* <p>
* This applies whether exiting the passage altogether or just flowing to another block. It
* will find all definitions (including just-generated phi nodes) and visit them.
*
* @param vn the retireable varnode
*/
void visitRetireable(Varnode vn) {
for (JitVal val : analyzer.getOutput(vn)) {
visitVal(val);
}
}
/**
* Visit a varnode that will be retired before calling a userop
*
* <p>
* This applies only when the userop is invoked using the Standard strategy.
*
* @see JitDataFlowUseropLibrary
* @param vn the retired varnode
* @param callother the callother op
*/
void visitCallOtherRetireable(Varnode vn, JitCallOtherOpIf callother) {
for (JitVal val : callother.dfState().getDefinitions(vn)) {
visitVal(val);
}
}
}
/**
* Get the varnodes that will be retired before the given callother
*
* @param block the block containing the callother
* @param op the callother op
* @return the block's live varnodes, or empty, depending on the callother invocation strategy.
*/
private Set<Varnode> getCallOtherRetireVarnodes(JitBlock block, JitCallOtherOpIf op) {
// Should not see inline-replaced ops here
if (op.userop().isFunctional()) {
return Set.of();
}
return vsm.getLiveVars(block);
}
/**
* Get the varnodes that could be retired upon leaving this block
*
* <p>
* If the block has an {@link JitBlock#branchesOut() exit} branch, then all live varnodes could
* be retired. The result is the union of retired varnodes among each flow
* {@link JitBlock#flowsFrom() from} the block. Note that every block must have a means of
* leaving, i.e., {@link JitBlock#branchesOut()} and {@link JitBlock#flowsFrom()} cannot both be
* empty.
*
* @implNote Because retired varnodes are the difference in live varnodes, we can optimize the
* set computation by taking the intersection of live varnodes among all flow
* destinations and subtracting it from the live varnodes of this block.
*
* @param block the block to examine
* @return the set of varnodes that could be retired
*/
private Set<Varnode> getCouldRetireVarnodes(JitBlock block) {
if (!block.branchesOut().isEmpty()) {
return vsm.getLiveVars(block);
}
if (block.flowsFrom().isEmpty()) {
throw new AssertionError();
// or just return Set.of()?
}
Set<Varnode> aliveAfterAnyFlow =
new HashSet<>(vsm.getLiveVars(block.flowsFrom().values().iterator().next().to()));
for (BlockFlow flow : block.flowsFrom().values()) {
aliveAfterAnyFlow.retainAll(vsm.getLiveVars(flow.to()));
}
Set<Varnode> result = new HashSet<>(vsm.getLiveVars(block));
result.removeAll(aliveAfterAnyFlow);
return result;
}
/**
* Perform the analysis
*
* <p>
* This first backfills any missing phi nodes that might not have been considered during data
* flow analysis. Then, it collects all the sinks and invokes the traversal on them. Note that
* we can end traversal any time we encounter an op that we have already marked as used, because
* we will already have marked its dependencies, too. The visit order does not matter, so we
* just iterate over the blocks and ops, marking things as we encounter them.
*/
private void analyze() {
/**
* I want every value that could get written back out to the state, either because it's
* retired, or because the output operand is memory. I also need inputs to branches or to
* callother's, since those may have side effects depending on those inputs.
*/
Set<JitPhiOp> phisBefore = Set.copyOf(dfm.phiNodes());
for (JitBlock block : cfm.getBlocks()) {
for (PcodeOp op : block.getCode()) {
if (dfm.getJitOp(op) instanceof JitCallOtherOpIf callother) {
for (Varnode vn : getCallOtherRetireVarnodes(block, callother)) {
// We only want the side effect: Adds needed phi.
callother.dfState().getVar(vn); // Visit is later
}
}
}
for (Varnode vn : getCouldRetireVarnodes(block)) {
JitDataFlowBlockAnalyzer analyzer = dfm.getAnalyzer(block);
analyzer.getVar(vn); // Visit is later
}
}
Set<JitPhiOp> extraPhis = new LinkedHashSet<>(dfm.phiNodes());
extraPhis.removeAll(phisBefore);
dfm.analyzeInterblock(extraPhis);
for (JitBlock block : cfm.getBlocks()) {
OpUseCollector collector = new OpUseCollector(block);
// Locate memory outputs, stores, branches, callothers
for (PcodeOp op : block.getCode()) {
JitOp jitOp = dfm.getJitOp(op);
if (jitOp instanceof JitCallOtherOpIf callotherOp) {
for (Varnode vn : getCallOtherRetireVarnodes(block, callotherOp)) {
collector.visitCallOtherRetireable(vn, callotherOp);
}
}
if (!jitOp.canBeRemoved()) {
collector.visitOp(jitOp);
}
}
// Compute retire-able variables
for (Varnode vn : getCouldRetireVarnodes(block)) {
collector.visitRetireable(vn);
}
}
}
/**
* Check whether the given op node is used.
*
* <p>
* If the op is used, then it cannot be eliminated.
*
* @param op the op to check
* @return true if used, i.e., non-removable
*/
public boolean isUsed(JitOp op) {
if (context.getConfiguration().removeUnusedOperations()) {
return used.contains(op);
}
return true;
}
/**
* For diagnostics: Dump the analysis result to stderr
*
* @see Diag#PRINT_OUM
*/
public void dumpResult() {
System.err.println("STAGE: OpUse");
for (JitBlock block : cfm.getBlocks()) {
JitDataFlowBlockAnalyzer analyzer = dfm.getAnalyzer(block);
System.err.println(" Block: " + block);
for (Varnode vn : getCouldRetireVarnodes(block)) {
for (JitVal val : analyzer.getOutput(vn)) {
System.err.println(" Could retire: " + val);
}
}
for (PcodeOp op : block.getCode()) {
JitOp jitOp = dfm.getJitOp(op);
if (!isUsed(jitOp)) {
System.err.println(" Removed: %s: %s".formatted(op.getSeqnum(), jitOp));
}
}
}
}
}

View file

@ -0,0 +1,270 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
/**
* A visitor for traversing the use-def graph
*
* <p>
* The default implementations here do nothing other than discern the type of an op and variable and
* dispatch the invocations appropriately. To traverse the graph upward, consider
* {@link JitOpUpwardVisitor}. Note no "downward" visitor is currently provided, because it was not
* needed.
*/
public interface JitOpVisitor {
/**
* Visit an op node
*
* <p>
* The default implementation dispatches this to the type-specific {@code visit} method.
*
* @param op the op visited
*/
default void visitOp(JitOp op) {
switch (op) {
case null -> throw new NullPointerException("null op");
case JitUnOp unOp -> visitUnOp(unOp);
case JitBinOp binOp -> visitBinOp(binOp);
case JitStoreOp storeOp -> visitStoreOp(storeOp);
case JitLoadOp loadOp -> visitLoadOp(loadOp);
case JitCallOtherOp otherOp -> visitCallOtherOp(otherOp);
case JitCallOtherDefOp otherOp -> visitCallOtherDefOp(otherOp);
case JitCallOtherMissingOp otherOp -> visitCallOtherMissingOp(otherOp);
case JitCatenateOp catOp -> visitCatenateOp(catOp);
case JitPhiOp phiOp -> visitPhiOp(phiOp);
case JitSynthSubPieceOp pieceOp -> visitSubPieceOp(pieceOp);
case JitBranchOp branchOp -> visitBranchOp(branchOp);
case JitCBranchOp cBranchOp -> visitCBranchOp(cBranchOp);
case JitBranchIndOp branchIndOp -> visitBranchIndOp(branchIndOp);
case JitUnimplementedOp unimplOp -> visitUnimplementedOp(unimplOp);
case JitNopOp nopOp -> visitNopOp(nopOp);
default -> throw new AssertionError("Unrecognized op: " + op);
}
}
/**
* Visit a {@link JitUnOp}
*
* @param unOp the op visited
*/
default void visitUnOp(JitUnOp unOp) {
}
/**
* Visit a {@link JitBinOp}
*
* @param binOp the op visited
*/
default void visitBinOp(JitBinOp binOp) {
}
/**
* Visit a {@link JitStoreOp}
*
* @param storeOp the op visited
*/
default void visitStoreOp(JitStoreOp storeOp) {
}
/**
* Visit a {@link JitLoadOp}
*
* @param loadOp the op visited
*/
default void visitLoadOp(JitLoadOp loadOp) {
}
/**
* Visit a {@link JitCallOtherOp}
*
* @param otherOp the op visited
*/
default void visitCallOtherOp(JitCallOtherOp otherOp) {
}
/**
* Visit a {@link JitCallOtherDefOp}
*
* @param otherOp the op visited
*/
default void visitCallOtherDefOp(JitCallOtherDefOp otherOp) {
}
/**
* Visit a {@link JitCallOtherMissingOp}
*
* @param otherOp the op visited
*/
default void visitCallOtherMissingOp(JitCallOtherMissingOp otherOp) {
}
/**
* Visit a {@link JitCatenateOp}
*
* @param catOp the op visited
*/
default void visitCatenateOp(JitCatenateOp catOp) {
}
/**
* Visit a {@link JitPhiOp}
*
* @param phiOp the op visited
*/
default void visitPhiOp(JitPhiOp phiOp) {
}
/**
* Visit a {@link JitSynthSubPieceOp}
*
* @param pieceOp the op visited
*/
default void visitSubPieceOp(JitSynthSubPieceOp pieceOp) {
}
/**
* Visit a {@link JitBranchOp}
*
* @param branchOp the op visited
*/
default void visitBranchOp(JitBranchOp branchOp) {
}
/**
* Visit a {@link JitCBranchOp}
*
* @param cBranchOp the op visited
*/
default void visitCBranchOp(JitCBranchOp cBranchOp) {
}
/**
* Visit a {@link JitBranchIndOp}
*
* @param branchIndOp the op visited
*/
default void visitBranchIndOp(JitBranchIndOp branchIndOp) {
}
/**
* Visit a {@link JitUnimplementedOp}
*
* @param unimplOp the op visited
*/
default void visitUnimplementedOp(JitUnimplementedOp unimplOp) {
}
/**
* Visit a {@link JitNopOp}
*
* @param nopOp the op visited
*/
default void visitNopOp(JitNopOp nopOp) {
}
/**
* Visit a {@link JitVal}
*
* <p>
* The default implementation dispatches this to the type-specific {@code visit} method.
*
* @param v the value visited
*/
default void visitVal(JitVal v) {
switch (v) {
case JitConstVal constVal -> visitConstVal(constVal);
case JitVar jVar -> visitVar(jVar);
default -> throw new AssertionError();
}
}
/**
* Visit a {@link JitVar}
*
* <p>
* The default implementation dispatches this to the type-specific {@code visit} method.
*
* @param v the variable visited
*/
default void visitVar(JitVar v) {
switch (v) {
case JitInputVar inputVar -> visitInputVar(inputVar);
case JitMissingVar missingVar -> visitMissingVar(missingVar);
case JitOutVar outVar -> visitOutVar(outVar);
case JitDirectMemoryVar dirMemVar -> visitDirectMemoryVar(dirMemVar);
case JitIndirectMemoryVar indMemVar -> visitIndirectMemoryVar(indMemVar);
default -> throw new AssertionError();
}
}
/**
* Visit a {@link JitConstVal}
*
* @param constVal the variable visited
*/
default void visitConstVal(JitConstVal constVal) {
}
/**
* Visit a {@link JitDirectMemoryVar}
*
* @param dirMemVar the variable visited
*/
default void visitDirectMemoryVar(JitDirectMemoryVar dirMemVar) {
}
/**
* Visit a {@link JitIndirectMemoryVar}
*
* <p>
* NOTE: These should not ordinarily appear in the use-def graph. There is only the one
* {@link JitIndirectMemoryVar#INSTANCE}, and it's used as a temporary dummy. Indirect memory
* access is instead modeled by the {@link JitLoadOp}.
*
* @param indMemVar the variable visited
*/
default void visitIndirectMemoryVar(JitIndirectMemoryVar indMemVar) {
throw new AssertionError();
}
/**
* Visit a {@link JitInputVar}
*
* @param inputVar the variable visited
*/
default void visitInputVar(JitInputVar inputVar) {
}
/**
* Visit a {@link JitMissingVar}
*
* @param missingVar the variable visited
*/
default void visitMissingVar(JitMissingVar missingVar) {
}
/**
* Visit a {@link JitOutVar}
*
* @param outVar the variable visited
*/
default void visitOutVar(JitOutVar outVar) {
}
}

View file

@ -0,0 +1,529 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import static org.objectweb.asm.Opcodes.*;
import java.util.*;
import org.objectweb.asm.Opcodes;
/**
* The p-code type of an operand.
*
* <p>
* A type is an integer of floating-point value of a specific size in bytes. All values and
* variables in p-code are just bit vectors. The operators interpret those vectors according to a
* {@link JitTypeBehavior}. While types only technically belong to the operands, we also talk about
* values, variables, and varnodes being assigned types, so that we can allocate suitable JVM
* locals.
*/
public interface JitType {
/**
* Compare two types by preference. The type with the more preferred behavior then smaller size
* is preferred.
*
* @param t1 the first type
* @param t2 the second type
* @return as in {@link Comparator#compare(Object, Object)}
*/
static int compare(JitType t1, JitType t2) {
int c;
c = Integer.compare(t1.pref(), t2.pref());
if (c != 0) {
return c;
}
c = Integer.compare(t1.size(), t2.size());
if (c != 0) {
return c;
}
return 0;
}
/**
* Identify the p-code type that is exactly represented by the given JVM type.
*
* <p>
* This is used during Direct userop invocation to convert the arguments and return value.
*
* @param cls the primitive class (not boxed)
* @return the p-code type
* @see JitDataFlowUseropLibrary
*/
public static JitType forJavaType(Class<?> cls) {
if (cls == boolean.class) {
return IntJitType.I4;
}
if (cls == byte.class) {
return IntJitType.I1;
}
if (cls == short.class) {
return IntJitType.I2;
}
if (cls == int.class) {
return IntJitType.I4;
}
if (cls == long.class) {
return LongJitType.I8;
}
if (cls == float.class) {
return FloatJitType.F4;
}
if (cls == double.class) {
return DoubleJitType.F8;
}
throw new IllegalArgumentException();
}
/**
* A p-code type that can be represented in a single JVM variable.
*/
public interface SimpleJitType extends JitType {
/**
* The JVM type of the variable that can represent a p-code variable of this type
*
* @return the primitive class (not boxed)
*/
Class<?> javaType();
/**
* The JVM opcode to load a local variable of this type onto the stack
*
* @return the opcode
*/
int opcodeLoad();
/**
* The JVM opcode to store a local variable of this type from the stack
*
* @return the opcode
*/
int opcodeStore();
/**
* Re-apply the {@link JitTypeBehavior#INTEGER integer} behavior to this type
*
* <p>
* This may be slightly faster than {@code JitTypeBehavior.INTEGER.resolve(this)}, because
* each type can pick its int type directly, and integer types can just return {@code this}.
*
* @return this type as an int
*/
SimpleJitType asInt();
}
/**
* The p-code types for integers of size 1 through 4, i.e., that fit in a JVM int.
*
* @param size the size in bytes
*/
public record IntJitType(int size) implements SimpleJitType {
/** {@code int1}: a 1-byte integer */
public static final IntJitType I1 = new IntJitType(1);
/** {@code int2}: a 2-byte integer */
public static final IntJitType I2 = new IntJitType(2);
/** {@code int3}: a 3-byte integer */
public static final IntJitType I3 = new IntJitType(3);
/** {@code int4}: a 4-byte integer */
public static final IntJitType I4 = new IntJitType(4);
/**
* Get the type for an integer of the given size 1 through 4
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for any size <em>not</em> 1 through 4
*/
public static IntJitType forSize(int size) {
return switch (size) {
case 1 -> I1;
case 2 -> I2;
case 3 -> I3;
case 4 -> I4;
default -> throw new IllegalArgumentException("size:" + size);
};
}
/**
* Compact constructor to check the size
*
* @param size the size in bytes
*/
public IntJitType {
assert 0 < size && size <= Integer.BYTES;
}
@Override
public int pref() {
return 0;
}
@Override
public String nm() {
return "i";
}
@Override
public Class<?> javaType() {
return int.class;
}
@Override
public int opcodeLoad() {
return ILOAD;
}
@Override
public int opcodeStore() {
return ISTORE;
}
@Override
public IntJitType ext() {
return I4;
}
@Override
public IntJitType asInt() {
return this;
}
}
/**
* The p-code types for integers of size 5 through 8, i.e., that fit in a JVM long.
*
* @param size the size in bytes
*/
public record LongJitType(int size) implements SimpleJitType {
/** {@code int5}: a 5-byte integer */
public static final LongJitType I5 = new LongJitType(5);
/** {@code int6}: a 6-byte integer */
public static final LongJitType I6 = new LongJitType(6);
/** {@code int7}: a 7-byte integer */
public static final LongJitType I7 = new LongJitType(7);
/** {@code int8}: a 8-byte integer */
public static final LongJitType I8 = new LongJitType(8);
/**
* Get the type for an integer of the given size 5 through 8
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for any size <em>not</em> 5 through 8
*/
public static LongJitType forSize(int size) {
return switch (size) {
case 5 -> I5;
case 6 -> I6;
case 7 -> I7;
case 8 -> I8;
default -> throw new IllegalArgumentException("size:" + size);
};
}
/**
* Compact constructor to check the size
*
* @param size the size in bytes
*/
public LongJitType {
assert 0 < size && size <= Long.BYTES;
}
@Override
public int pref() {
return 1;
}
@Override
public String nm() {
return "l";
}
@Override
public Class<?> javaType() {
return long.class;
}
@Override
public int opcodeLoad() {
return LLOAD;
}
@Override
public int opcodeStore() {
return LSTORE;
}
@Override
public LongJitType ext() {
return I8;
}
@Override
public LongJitType asInt() {
return this;
}
}
/**
* The p-code type for floating-point of size 4, i.e., that fits in a JVM float.
*/
public enum FloatJitType implements SimpleJitType {
/** {@code float4}: a 4-byte float */
F4;
@Override
public int pref() {
return 2;
}
@Override
public String nm() {
return "f";
}
@Override
public int size() {
return Float.BYTES;
}
@Override
public Class<?> javaType() {
return float.class;
}
@Override
public int opcodeLoad() {
return FLOAD;
}
@Override
public int opcodeStore() {
return FSTORE;
}
@Override
public FloatJitType ext() {
return this;
}
@Override
public IntJitType asInt() {
return IntJitType.I4;
}
}
/**
* The p-code type for floating-point of size 8, i.e., that fits in a JVM double.
*/
public enum DoubleJitType implements SimpleJitType {
/** {@code float8}: a 8-byte float */
F8;
@Override
public int pref() {
return 3;
}
@Override
public String nm() {
return "d";
}
@Override
public int size() {
return Double.BYTES;
}
@Override
public Class<?> javaType() {
return double.class;
}
@Override
public int opcodeLoad() {
return DLOAD;
}
@Override
public int opcodeStore() {
return DSTORE;
}
@Override
public DoubleJitType ext() {
return this;
}
@Override
public LongJitType asInt() {
return LongJitType.I8;
}
}
/**
* <b>WIP</b>: The p-code types for integers of size 9 and greater.
*
* @param size the size in bytes
*/
public record MpIntJitType(int size) implements JitType {
private static final Map<Integer, MpIntJitType> FOR_SIZES = new HashMap<>();
/**
* Get the type for an integer of the given size 9 or greater
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for any size 8 or less
*/
public static MpIntJitType forSize(int size) {
return FOR_SIZES.computeIfAbsent(size, MpIntJitType::new);
}
@Override
public int pref() {
return 4;
}
@Override
public String nm() {
return "I";
}
/**
* The total number of JVM int variables ("legs") required to store the int
*
* @return the total number of legs
*/
public int legsAlloc() {
return (size + Integer.BYTES - 1) / Integer.BYTES;
}
/**
* The number of legs that are filled
*
* @return the number of whole legs
*/
public int legsWhole() {
return size / Integer.BYTES;
}
/**
* The number of bytes filled in the last leg, if partial
*
* @return the number of bytes in the partial leg, or 0 if all legs are whole
*/
public int partialSize() {
return size % Integer.BYTES;
}
/**
* Get the p-code type that describes the part of the variable in each leg
*
* <p>
* Each whole leg will have the type {@link IntJitType#I4}, and the partial leg, if
* applicable, will have its appropriate smaller integer type.
*
* @return the list of types, each fitting in a JVM int.
*/
public List<SimpleJitType> legTypes() {
IntJitType[] types = new IntJitType[legsAlloc()];
int i = 0;
if (partialSize() != 0) {
types[i++] = IntJitType.forSize(partialSize());
}
for (; i < legsWhole(); i++) {
types[i] = IntJitType.I4;
}
return Arrays.asList(types);
}
@Override
public MpIntJitType ext() {
return MpIntJitType.forSize(legsAlloc() * Integer.BYTES);
}
}
/**
* <b>WIP</b>: The p-code types for floats of size other than 4 and 8
*
* @param size the size in bytes
*/
public record MpFloatJitType(int size) implements JitType {
private static final Map<Integer, MpFloatJitType> FOR_SIZES = new HashMap<>();
/**
* Get the type for a float of the given size other than 4 and 8
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for size 4 or 8
*/
public static MpFloatJitType forSize(int size) {
return FOR_SIZES.computeIfAbsent(size, MpFloatJitType::new);
}
@Override
public int pref() {
return 5;
}
@Override
public String nm() {
return "F";
}
@Override
public MpFloatJitType ext() {
return this;
}
}
/**
* The preference for this type. Smaller is more preferred.
*
* @return the preference
*/
public int pref();
/**
* Part of the name of a JVM local variable allocated for this type
*
* @return the "type" part of a JVM local's name
*/
public String nm();
/**
* The size of this type
*
* @return the size in bytes
*/
public int size();
/**
* Extend this p-code type to the p-code type that fills its entire host JVM type.
*
* <p>
* This is useful, e.g., when multiplying two {@link IntJitType#I3 int3} values using
* {@link Opcodes#IMUL imul} that the result might be an {@link IntJitType#I4 int4} and so may
* need additional conversion.
*
* @return the extended type
*/
JitType ext();
}

View file

@ -0,0 +1,182 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.Comparator;
import java.util.Objects;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.op.JitCopyOp;
import ghidra.pcode.emu.jit.op.JitPhiOp;
/**
* The behavior/requirement for an operand's type.
*
* @see JitTypeModel
*/
public enum JitTypeBehavior {
/**
* No type requirement or interpretation.
*/
ANY {
/**
* {@inheritDoc}
*
* <p>
* If no type is specified, we default to ints.
*/
@Override
public JitType type(int size) {
return INTEGER.type(size);
}
@Override
public JitType resolve(JitType varType) {
return varType;
}
},
/**
* The bits are interpreted as an integer.
*/
INTEGER {
@Override
public JitType type(int size) {
assert size > 0;
return switch (size) {
case 1, 2, 3, 4 -> IntJitType.forSize(size);
case 5, 6, 7, 8 -> LongJitType.forSize(size);
default -> MpIntJitType.forSize(size);
};
}
@Override
public JitType resolve(JitType varType) {
return type(varType.size());
}
},
/**
* The bits are interpreted as a floating-point value.
*/
FLOAT {
@Override
public JitType type(int size) {
return switch (size) {
case Float.BYTES -> FloatJitType.F4;
case Double.BYTES -> DoubleJitType.F8;
default -> MpFloatJitType.forSize(size);
};
}
@Override
public JitType resolve(JitType varType) {
return type(varType.size());
}
},
/**
* For {@link JitCopyOp} and {@link JitPhiOp}: No type requirement or interpretation, but there
* is an implication that the output has the same interpretation as the inputs.
*/
COPY {
@Override
public JitType type(int size) {
throw new AssertionError();
}
@Override
public JitType resolve(JitType varType) {
return ANY.resolve(varType);
}
},
;
/**
* Compare two behaviors by preference. The behavior with the smaller ordinal is preferred.
*
* @param b1 the first behavior
* @param b2 the second behavior
* @return as in {@link Comparator#compare(Object, Object)}
*/
public static int compare(JitTypeBehavior b1, JitTypeBehavior b2) {
return Objects.compare(b1, b2, JitTypeBehavior::compareTo);
}
/**
* Apply this behavior to a value of the given size to determine its type
*
* @param size the size of the value in bytes
* @return the resulting type
* @throws AssertionError if the type is not applicable, and such an invocation was not expected
*/
public abstract JitType type(int size);
/**
* Re-apply this behavior to an existing type
*
* <p>
* For {@link #ANY} and {@link #COPY} the result is the given type.
*
* @param varType the type
* @return the resulting type
*/
public abstract JitType resolve(JitType varType);
/**
* Derive the type behavior from a Java language type.
*
* <p>
* This is used on userops declared with Java primitives for parameters. To work with the
* {@link JitTypeModel}, we need to specify the type behavior of each operand. We aim to select
* behaviors such that the model allocates JVM locals whose JVM types match the userop method's
* parameters. This optimizes type conversions during Direct invocation.
*
* @param cls the primitive class (not boxed)
* @return the p-code type behavior
* @see JitDataFlowUseropLibrary
*/
public static JitTypeBehavior forJavaType(Class<?> cls) {
if (cls == byte.class) {
return INTEGER;
}
if (cls == short.class) {
return INTEGER;
}
if (cls == int.class) {
return INTEGER;
}
if (cls == long.class) {
return INTEGER;
}
if (cls == float.class) {
return FLOAT;
}
if (cls == double.class) {
return FLOAT;
}
if (cls == boolean.class) {
return INTEGER;
}
if (cls == char.class) {
return null;
}
if (cls == void.class) {
return null;
}
if (cls.isPrimitive()) {
throw new AssertionError();
}
return null;
}
}

View file

@ -0,0 +1,401 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import java.util.Map.Entry;
import org.objectweb.asm.Opcodes;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.analysis.JitType.FloatJitType;
import ghidra.pcode.emu.jit.analysis.JitType.IntJitType;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitOutVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.emu.jit.var.JitVal.ValUse;
import ghidra.program.model.pcode.PcodeOp;
/**
* The type analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Type Assignment phase of the {@link JitCompiler} using a very basic "voting"
* algorithm. The result is an assignment of type to each {@link JitVal}. To be clear, at this
* phase, we're assigning types to variables (and constants) in the use-def graph, not varnodes.
* Later we do another bit of "voting" to determine the type of each JVM local allocated to a
* varnode. Perhaps we could be more direct, but in anticipation of future optimizations, we keep
* this analysis at the per-variable level. This is partly an artifact of exploration before
* deciding to allocate by varnode instead of by variable.
*
* <h2>Types in P-code and the JVM</h2>
* <p>
* P-code (and Sleigh) is a relatively type free language. Aside from size, variables have no type;
* they are just bit vectors. The operators are typed and cast the bits as required. This aligns
* well with most machine architectures. Registers are just bit vectors, and the instructions
* interpret them according to some type. In contrast, JVM variables have a type: {@code int},
* {@code long}, {@code float}, {@code double}, or a reference. Conversions between JVM types must
* be explicit, so we must attend to certain aspects of p-code types when consuming operands
* allocated in JVM locals. There are three aspects to consider when translating p-code types to the
* JVM: behavior, size, and signedness.
*
* <h3>Behavior: Integer vs. Float</h3>
* <p>
* The JVM has two integral types {@code int} and {@code long} of 4 and 8 bytes respectively. P-code
* has one integral type of no specified size. Or rather, it has as many integral types: 1-byte int,
* 2-byte int, 3-byte int, and so on. We thus describe p-code operands as having a type
* {@link JitTypeBehavior behavior}: <em>integral</em> or <em>floating-point</em>. Note there are
* two ancillary behaviors <em>any</em> and <em>copy</em> to describe the operands of truly typeless
* operators, like {@link JitCopyOp}.
*
* <h3>Size</h3>
* <p>
* When paired with a varnode's size, we have enough information to start mapping p-code types to
* JVM types. For float types, p-code only supports specific sizes defined by IEEE 754: 2-byte
* half-precision, 4-byte single-precision, 8-byte double-precision, 10-byte extended-precision,
* 16-byte quadruple-precision, and 32-byte octuple-precision. Some p-code types map precisely to
* JVM counterparts: The 4- and 8-byte integer types map precisely to the JVM's {@code int} and
* {@code long} types. Similarly, the 4- and 8-byte float types map precisely to {@code float} and
* {@code double}. <b>TODO</b>: The JIT translator does not currently support integral types greater
* than 8 bytes (64 bits) in size nor floating-point types other than 4 and 8 bytes (single and
* double precision) in size.
*
* <h3>Signedness</h3>
* <p>
* All floating-point types are signed, whether in p-code or in the JVM, so there's little to
* consider in terms of mapping. Some p-code operators have signed operands, some have unsigned
* operands, and others have no signedness at all. In contrast, no JVM bytecodes are strictly
* unsigned. They are either signed or have no signedness. It was a choice of the Java language
* designers that all variables would be signed, and this is consequence of that choice. In time,
* "unsigned" operations were introduced in the form of static methods, e.g.,
* {@link Integer#compareUnsigned(int, int)} and {@link Long#divideUnsigned(long, long)}. Note that
* at the bit level, unsigned multiplication is the same as signed, and so no "unsigned multiply"
* method was provided. This actually aligns well with p-code in that, for this aspect of
* signedness, the variables are all the same. Instead the operations apply the type interpretation.
* Thus, we need not consider signedness when allocating JVM locals.
*
* <h2>Conversions and Casts</h2>
* <p>
* Conversions between JVM primitive types must be explicit in the emitted bytecode, even if the
* intent is just to re-cast the bits. This is not the case for p-code. Conversions in p-code need
* only be explicit when they mutate the actual bits. Consider the following p-code:
*
* <pre>
* $U00:4 = FLOAT_ADD r0, r1
* r2 = INT_2COMP $U00:4
* </pre>
*
* <p>
* The native translation to bytecode:
*
* <pre>
* FLOAD 1 # r0
* FLOAD 2 # r1
* FADD
* FSTORE 3 # $U00:4
* LDC 0
* ILOAD 3 # $U00:4
* ISUB
* ISTORE 4 # r2
* </pre>
*
* <p>
* Will cause an error when loading the class. This is because the local variable 3 must be one of
* {@code int} or {@code float}, and the bytecode must declare which, so either the {@code FSTORE 3}
* or the {@code ILOAD 3} will fail the JVM's type checker. To resolve this, we could assign the
* type {@code float} to local variable 3, and change the erroneous {@code ILOAD 3} to:
*
* <pre>
* FLOAD 3
* INVOKESTATIC {@link Float#floatToRawIntBits(float)}
* </pre>
*
* <p>
* At this point, the bit-vector contents of {@code $U00:4} are on the stack, but for all the JVM
* cares, they are now an {@code int}. We must assigned a JVM type to each local we allocate and
* place bitwise type casts wherever the generated bytecodes would cause type disagreement. We would
* like to assign JVM types in a way that reduces the number of {@code INVOKESTATIC} bytecodes
* emitted. One could argue that we should instead seek to reduce the number of {@code INVOKESTATIC}
* bytecodes actually executed, but I pray the JVM's JIT compiler can recognize calls to
* {@link Float#floatToRawIntBits(float)} and similar and emit no native code for them, i.e., they
* ought to have zero run-time cost.
*
* <p>
* Size conversions cause a similar need for explicit conversions, for two reasons: 1) Any
* conversion between JVM {@code int} and {@code long} still requires specific bytecodes. Neither
* platform supports implicit conversion between {@code float} and {@code double}. 2) We allocate
* the smaller JVM integral type to accommodate each p-code integral type, so we must apply masks in
* some cases to assure values to do not exceed their p-code varnode size. Luckily, p-code also
* requires explicit conversions between sizes, e.g., using {@link PcodeOp#INT_ZEXT zext}. However,
* we often have to perform temporary conversions in order to meet the type/size requirements of JVM
* bytecodes.
*
* <p>
* Consider {@code r2 = INT_MULT r0, r1} where the registers are all 5 bytes. Thus, the registers
* are allocated as JVM locals of type {@code long}. We load {@code r0} and {@code r1} onto the
* stack, and then we emit an {@link Opcodes#LMUL}. Technically, the result is another JVM
* {@code long}, which maps to an 8-byte p-code integer. Thus, we must apply a mask to "convert" the
* result to a 5-byte p-code integer before storing the result in {@code r2}'s JVM local.
*
* <h2>Type Assignment</h2>
* <p>
* Given that only behavior and size require any explicit conversions, we omit signedness from the
* formal definition of p-code {@link JitType type}. It is just the behavior applied to a size,
* e.g., {@link IntJitType#I3 int3}.
*
* <p>
* We use a fairly straightforward voting algorithm that examines how each variable definition is
* used. The type of an operand is trivially determined by examining the behavior of each operand,
* as specified by the p-code opcode; and the size of the input varnode, specified by the specific
* p-code op instance. For example, the p-code op {@code $U00:4 = FLOAT_ADD r0, r1} has an output
* operand of {@link FloatJitType#F4 float4}. Thus, it casts a vote that {@code $U00:4} should be
* that type. However, the subsequent op {@code r2 = INT_2COMP $U00} casts a vote for
* {@link IntJitType#I4 int4}. We prefer an {@code int} when tied, so we assign {@code $U00:4} the
* type {@code int4}.
*
* <p>
* This become complicated in the face of typeless ops, namely {@link JitCopyOp copy} and
* {@link JitPhiOp phi}. Again, we'd like to reduce the number of casts we have to emit in the
* bytecode. Consider the op {@code r1 = COPY r0}. This should emit a load followed immediately by a
* store, but The JVM will require both the source and destination locals to have the same type.
* Otherwise, a cast is necessary. The votes regarding {@code r0} will thus need to incorporate the
* votes regarding {@code r1} and vice versa.
*
* <p>
* Our algorithm is a straightforward queued traversal of the use-def graph until convergence.
* First, we initialize a queue with all values (variables and constants) in the graph and
* initialize all type assignments to {@link JitTypeBehavior#ANY any}. We then process each value in
* the queue until it is empty. A value receives votes from its uses as required by each operand.
* {@link JitTypeBehavior#INTEGER integer} and {@link JitTypeBehavior float} behaviors count as 1
* vote for that behavior. The {@link JitTypeBehavior#ANY any} behavior contributes 0 votes. If the
* behavior is {@link JitTypeBehavior#COPY copy}, then we know the use is either a {@link JitCopyOp
* copy} or {@link JitPhiOp phi} op, so we fetch its output value. The op casts its vote for the
* tentative type of that output value. Similar is done for the value's defining op, if applicable.
* If it's a copy or phi, we start a sub contest where each input/option casts a vote for its
* tentative type. The defining op's vote is cast according to the winner of the sub contest. Ties
* favor {@link JitTypeBehavior#INTEGER integer}. The final winner is computed and the tentative
* type assignment is updated. If there are no votes, the tentative assignment is
* {@link JitTypeBehavior#ANY}.
*
* <p>
* When an update changes the tentative type assignment of a value, then all its neighbors are added
* back to the queue. Neighbors are those values connected to this one via a copy or phi. When the
* queue is empty, the tentative type assignments are made final. Any assignment that remains
* {@link JitTypeBehavior#ANY any} is treated as if {@link JitTypeBehavior#INTEGER int}.
* <b>TODO</b>: Prove that this algorithm always terminates.
*
* @implNote We do all the bookkeeping in terms of {@link JitTypeBehavior} and wait to resolve the
* actual type until the final assignment.
*/
public class JitTypeModel {
/**
* A contest to determine a type assignment
*
* @param counts the initial count for each candidate (should just be empty)
*/
protected record Contest(Map<JitTypeBehavior, Integer> counts) {
/**
* Start a new contest
*/
public Contest() {
this(new HashMap<>());
}
/**
* Cast a vote for the given candidate
*
* @param candidate the candidate type
* @param c the number of votes cast
*/
private void vote(JitTypeBehavior candidate, int c) {
if (candidate == JitTypeBehavior.ANY || candidate == JitTypeBehavior.COPY) {
return;
}
counts.compute(candidate, (k, v) -> v == null ? c : v + c);
}
/**
* Cast a vote for the given candidate
*
* @param candidate the candidate type
*/
public void vote(JitTypeBehavior candidate) {
vote(candidate, 1);
}
/**
* Compare the votes between two candidates, and select the winner
*
* <p>
* The {@link #winner()} method seeks the "max" candidate, so the vote counts are compared
* in the usual fashion. We need to invert the comparison of the types, though.
* {@link JitTypeBehavior#INTEGER} has a lower ordinal than {@link JitTypeBehavior#FLOAT},
* but we want to ensure int is preferred, so we reverse that comparison.
*
* @param ent1 the first candidate-vote entry
* @param ent2 the second candidate-vote entry
* @return -1 if the <em>second</em> wins, 1 if the <em>first</em> wins. 0 should never
* result, unless we're comparing a candidate with itself.
*/
public static int compareCandidateEntries(Entry<JitTypeBehavior, Integer> ent1,
Entry<JitTypeBehavior, Integer> ent2) {
int c;
c = Integer.compare(ent1.getValue(), ent2.getValue());
if (c != 0) {
return c;
}
c = JitTypeBehavior.compare(ent1.getKey(), ent2.getKey());
if (c != 0) {
return -c; // INT is preferred to FLOAT
}
return 0;
}
/**
* Compute the winner of the contest
*
* @return the winner, or {@link JitTypeBehavior#ANY} if there are no entries
*/
public JitTypeBehavior winner() {
return counts.entrySet()
.stream()
.max(Contest::compareCandidateEntries)
.map(Entry::getKey)
.orElse(JitTypeBehavior.ANY);
}
}
private final JitDataFlowModel dfm;
private final SequencedSet<JitVal> queue = new LinkedHashSet<>();
private final Map<JitVal, JitTypeBehavior> assignments = new HashMap<>();
/**
* Construct the type model
*
* @param dfm the data flow model whose use-def graph to process
*/
public JitTypeModel(JitDataFlowModel dfm) {
this.dfm = dfm;
analyze();
}
/**
* Compute the new tentative assignment for the given value
*
* <p>
* As discussed in the "voting" section of {@link JitTypeModel}, this tallies up the votes among
* the values's uses and defining op then selects the winner.
*
* @param v the value
* @return the new assignment
*/
protected JitTypeBehavior computeNewAssignment(JitVal v) {
Contest contest = new Contest();
// Downstream votes
for (ValUse use : v.uses()) {
JitTypeBehavior type = use.type();
if (type == JitTypeBehavior.COPY && use.op() instanceof JitDefOp def) {
JitVal downstream = def.out();
type = assignments.get(downstream);
}
contest.vote(type);
}
// Upstream votes
if (v instanceof JitOutVar out) {
JitTypeBehavior defType = JitTypeBehavior.ANY;
JitDefOp def = out.definition();
defType = def.type();
if (defType == JitTypeBehavior.COPY) {
Contest subContest = new Contest();
for (JitVal upstream : def.inputs()) {
subContest.vote(assignments.get(upstream));
}
defType = subContest.winner();
}
contest.vote(defType);
}
return contest.winner();
}
/**
* Re-add the given value's neighbors to the processing queue.
*
* <p>
* Neighbors are any values connected to the given one via {@link JitCopyOp} or {@link JitPhiOp}
* &mdash; or any op with an operand requiring {@link JitTypeBehavior#COPY} if additional ones
* should appear in the future. This is necessary because those ops may change their vote now
* that this value's tentative type has changed. Note if the value is already in the queue, it
* need not be added again. Thus, the queue is a {@link SequencedSet}.
*
* @param v the value whose neighbors to re-process
*/
protected void queueNeighbors(JitVal v) {
for (ValUse use : v.uses()) {
JitTypeBehavior type = use.type();
if (type == JitTypeBehavior.COPY && use.op() instanceof JitDefOp def) {
queue.add(def.out());
}
}
if (v instanceof JitOutVar out) {
JitDefOp def = out.definition();
if (def.type() == JitTypeBehavior.COPY) {
queue.addAll(def.inputs());
}
}
}
/**
* Perform the analysis
*
* <p>
* This queues every value up to be processed at least once and then runs the algorithm to
* termination. Each value in the queue is removed and a voting contest run to update its type
* assignment. If the new assignment differs from its old assignment, its neighbors (if any) are
* re-added to the queue.
*/
protected void analyze() {
Set<JitVal> vals = dfm.allValues();
queue.addAll(vals);
for (JitVal v : vals) {
assignments.put(v, JitTypeBehavior.ANY);
}
while (!queue.isEmpty()) {
JitVal v = queue.removeFirst();
JitTypeBehavior type = computeNewAssignment(v);
JitTypeBehavior old = assignments.put(v, type);
if (old != type) {
queueNeighbors(v);
}
}
}
/**
* Get the final type assignment for the given value
*
* @param v the value
* @return the value's assigned type
*/
public JitType typeOf(JitVal v) {
return assignments.get(v).type(v.size());
}
}

View file

@ -0,0 +1,531 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import java.util.Map.Entry;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockFlow;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Register;
import ghidra.program.model.pcode.Varnode;
import ghidra.util.MathUtilities;
/**
* The variable scope analysis of JIT-accelerated emulation.
*
* <p>
* This implements the Variable Scope Analysis phase of the {@link JitCompiler}. The result provides
* the set of in-scope (alive) varnodes for each basic block. The design of this analysis, and the
* shortcuts we take, are informed by the design of downstream phases. In particular, we do not
* intend to allocate each SSA variable. There are often many, many such variables, and attempting
* to allocate them to as few target resources, e.g., JVM locals, as possible is <em>probably</em> a
* complicated and expensive algorithm. I don't think we'd gain much from it either. Instead, we'll
* just allocate by varnode. To do that, though, we still have to consider that some varnodes
* overlap and otherwise alias others. If we are able to handle all that aliasing in place, then we
* need not generate code for the synthetic ops. One might ask, well then why do any of the Data
* Flow Analysis in the first place? 1) We still need data flow to inform the selection of JVM local
* types. We have not measured the run-time cost of the bitwise casts, but we do know the bytecode
* for each cast occupies space, counted against the 65,535-byte max. 2) We also need data flow to
* inform operation elimination, which removes many wasted flag computations.
*
* <p>
* To handle the aliasing, we coalesce overlapping varnodes. For example, {@code EAX} will get
* coalesced with {@code RAX}, but {@code BH} <em>will not</em> get coalesced with {@code BL},
* assuming no other part of {@code RBX} is accessed. The {@link JitDataFlowModel} records all
* varnodes accessed in the course of its intra-block analysis. Only those actually accessed are
* considered. We then compute scope in terms of these coalesced varnodes. For example, if both
* {@code RAX} and {@code EAX} are used by a passage, then an access of {@code EAX} causes
* {@code RAX} to remain in scope.
*
* <p>
* The decision to compute scope on a block-by-block basis instead of op-by-op is for simplicity. We
* intend to birth and retire variables along block transitions by considering what variables are
* coming into or leaving scope on the flow edge. <em>Birthing</em> is just reading a variable's
* value from the run-time {@link JitBytesPcodeExecutorState state} into its allocated JVM local.
* Conversely, <em>retiring</em> is writing the value back out to the state. There's little to be
* gained by retiring a variable midway through a block as opposed to the end of the block. Perhaps
* if one giant block handles a series of variables in sequence, we could have used a single JVM
* local to allocate each, but we're already committed to allocating a JVM local per (coalesced)
* varnode. So, while that may ensure only one variable is alive at a time, the number of JVM locals
* required remains the same. Furthermore, the amount of bytecode emitted remains the same, but at
* different locations in the block. The case where this might be worth considering is a userop
* invocation, because all live variables must be forcefully retired.
*
* <p>
* We then consider what common cases we want to ensure are optimized, when we're limited to a
* block-by-block analysis. One that comes to mind is a function with an early bail. Consider the
* following C source:
*
* <pre>
* int func(my_struct* ptr) {
* if (ptr == NULL) {
* return ERR;
* }
* // Do some serious work
* return ptr->v;
* }
* </pre>
*
* <p>
* Often, the C compiler will group all the returns into one final basic block, so we might get the
* following p-code:
*
* <pre>
* 1 RSP = INT_SUB RSP, 0x20:8
* 2 $U00:1 = INT_EQUAL RDI, 0:8 # RDI is ptr
* 3 CBRANCH &lt;err&gt;, $U0:1
*
* 4 # Do some serious work
* 5 $U10:8 = INT_ADD RDI, 0xc:8 # Offset to field v
* 6 EAX = LOAD [ram] $U10:8
* 7 BRANCH &lt;exit&gt;
* &lt;err&gt;
* 8 EAX = COPY 0xffffffff:4
* &lt;exit&gt;
* 9 RSP = INT_ADD RSP, 0x20:8
* 10 RIP = LOAD [ram] RSP
* 11 RSP = INT_ADD RSP, 8:8
* 12 RETURN RIP
* </pre>
*
* <p>
* Note that I've elided the actual x86 machine code and all of the noise generated by C compilation
* and p-code lifting, and I've presumed the decoded passage contains exactly the example function.
* The result is your typical if-else diamond. We'll place the error case on the left:
*
* <pre>
* +---------+
* | 1--3 |
* | CBRANCH |
* +-T-----F-+
* / \
* / \
* +--------+ +--------+
* | 8 | | 4--7 |
* | (fall) | | BRANCH |
* +--------+ +--------+
* \ /
* \ /
* +---------+
* | 9--12 |
* | RETURN |
* +---------+
* </pre>
*
* <p>
* Suppose the "serious work" on line 4 accesses several varnodes: RBX, RCX, RDX, and RSI. If
* execution follows the error path, we'd rather not birth any of those variables. Thus, we might
* like the result of the scope analysis to be:
*
* <p>
* <table border="1">
* <tr>
* <th>Block</th>
* <th>Live Vars</th>
* </tr>
* <tr>
* <td>1&ndash;3</td>
* <td>RDI, RSP, $U00:1</td>
* </tr>
* <tr>
* <td>4&ndash;7</td>
* <td>EAX, RBX, RCX, RDI, RDX, RSI, RSP, $U10:8</td>
* </tr>
* <tr>
* <td>8</td>
* <td>EAX, RSP</td>
* </tr>
* <tr>
* <td>9&ndash;12</td>
* <td>RIP, RSP</td>
* </tr>
* </table>
*
* <p>
* This can be achieved rather simply: Define two sets for each block, the upward view and the
* downward view. The first corresponds to all varnodes that could be accessed before entering this
* block or while in it. The second corresponds to all varnodes that could be access while in this
* block or after leaving it. The upward view is computed by initializing each set to the varnodes
* accessed by its block. Then we "push" each set upward by adding its elements into the set for
* each block with flows into this one, until the sets converge. The downward sets are similarly
* computed, independently of the upward sets. The result is the intersection of these sets, per
* block. The algorithm is somewhat intuitive in that we accrue live variables as we move toward the
* "body" of the control flow graph, and they begin to drop off as we approach an exit. The accrual
* is captured by the downward set, and the drop off is captured by intersection with the upward
* set. This will also prevent retirement and rebirth of variables. Essentially, if we are between
* two accesses of a varnode, then that varnode is alive. Consider {@code RSP} from the example
* above. The algorithm considers it alive in blocks 4&ndash;7 and 8, despite the fact neither
* actually accesses it. Nevertheless, we'd rather generate one birth upon entering block 1&ndash;3,
* keep it alive in the body, and then generate one retirement upon leaving block 9&ndash;12.
*
* <p>
* One notable effect of this algorithm is that all blocks in a loop will have the same variables in
* scope.... I think this is okay. We'll birth the relevant variables upon entering the loop, keep
* them all alive during loop execution, and then retire them (unless they're accessed downstream)
* upon leaving.
*
* @implNote <b>TODO</b>: There's some nonsense to figure out with types. It would be nice if we
* could allow variables of different types to occupy the same location at different
* times. This can be the case, e.g., if a register is used as a temporary location for
* copying values around. If there are times when it's treated as an int and other times
* when it's treated as a float, we could avoid unnecessary Java type conversions.
* However, this would require us to track liveness with types, and at that granularity,
* it could get unwieldy. My inclination is to just consider location liveness and then
* have the allocator decide what type to assign the local variable for that location
* based on some voting system. This is not the best, because some access sites are
* executed more often than others, but it'll suffice.
*/
public class JitVarScopeModel {
/**
* Encapsulates set movement when computing the upward and downward views.
*/
enum Which {
/**
* Set movement for the upward view
*/
UP {
@Override
Collection<JitBlock> getFlows(ScopeInfo info) {
return info.block.flowsTo().values().stream().map(BlockFlow::from).toList();
}
@Override
Set<Varnode> getLive(ScopeInfo info) {
return info.liveUp;
}
@Override
Set<Varnode> getQueued(ScopeInfo info) {
return info.queuedUp;
}
},
/**
* Set movement for the downward view
*/
DOWN {
@Override
Collection<JitBlock> getFlows(ScopeInfo info) {
return info.block.flowsFrom().values().stream().map(BlockFlow::to).toList();
}
@Override
Set<Varnode> getLive(ScopeInfo info) {
return info.liveDn;
}
@Override
Set<Varnode> getQueued(ScopeInfo info) {
return info.queuedDn;
}
};
/**
* Get the flow toward which we will push the given block's set
*
* @param info the intermediate analytic result for the block whose set to push
* @return the blocks into which our set will be unioned
*/
abstract Collection<JitBlock> getFlows(ScopeInfo info);
/**
* Get the current set for the given block
*
* @param info the intermediate analytic result for the block whose set to get
* @return the set of live varnodes
*/
abstract Set<Varnode> getLive(ScopeInfo info);
/**
* Get the varnodes which are queued for addition into the given block's set
*
* @param info the intermediate analytic result for the given block
* @return the set of queued live varnodes
*/
abstract Set<Varnode> getQueued(ScopeInfo info);
}
/**
* Encapsulates the (intermediate) analytic result for each block
*/
private class ScopeInfo {
private final JitBlock block;
private final Set<Varnode> liveUp = new HashSet<>();
private final Set<Varnode> liveDn = new HashSet<>();
private final Set<Varnode> queuedUp = new HashSet<>();
private final Set<Varnode> queuedDn = new HashSet<>();
private final Set<Varnode> liveVars = new LinkedHashSet<>();
private final Set<Varnode> liveVarsImm = Collections.unmodifiableSet(liveVars);
/**
* Construct the result for the given block
*
* @param block the block
*/
public ScopeInfo(JitBlock block) {
this.block = block;
JitDataFlowBlockAnalyzer dfa = dfm.getAnalyzer(block);
for (Varnode vn : dfa.getVarnodesRead()) {
if (!vn.isAddress()) {
queuedUp.add(getCoalesced(vn));
queuedDn.add(getCoalesced(vn));
}
}
for (Varnode vn : dfa.getVarnodesWritten()) {
if (!vn.isAddress()) {
queuedUp.add(getCoalesced(vn));
queuedDn.add(getCoalesced(vn));
}
}
}
/**
* Push this block's queue for the given view
*
* <p>
* Any block whose set was affected by this push is added to the queue of blocks to be
* processed again.
*
* @param which which view (direction)
*/
private void push(Which which) {
Set<Varnode> queued = which.getQueued(this);
if (queued.isEmpty()) {
return;
}
for (JitBlock block : which.getFlows(this)) {
ScopeInfo that = infos.get(block);
Set<Varnode> toQueueThat = new HashSet<>(queued);
toQueueThat.removeAll(which.getLive(that));
if (which.getQueued(that).addAll(toQueueThat)) {
blockQueue.add(that);
}
}
which.getLive(this).addAll(queued);
queued.clear();
}
/**
* Finish the analytic computation for this block
*
* <p>
* If a block contains an access to a variable, that variable is alive in that block. If a
* block is between (in terms of possible control-flow paths) two others that access a
* variable, that variable is alive in the block.
*/
private void finish() {
List<Varnode> sortedLiveUp = new ArrayList<>(this.liveUp);
Collections.sort(sortedLiveUp, Comparator.comparing(Varnode::getAddress));
liveVars.addAll(sortedLiveUp);
liveVars.retainAll(liveDn);
}
}
private final JitControlFlowModel cfm;
private final JitDataFlowModel dfm;
private final NavigableMap<Address, Varnode> coalesced = new TreeMap<>();
private final Map<JitBlock, ScopeInfo> infos = new HashMap<>();
private final SequencedSet<ScopeInfo> blockQueue = new LinkedHashSet<>();
/**
* Construct the model
*
* @param cfm the control flow model
* @param dfm the data flow model
*/
public JitVarScopeModel(JitControlFlowModel cfm, JitDataFlowModel dfm) {
this.cfm = cfm;
this.dfm = dfm;
analyze();
}
/**
* Get the maximum address (inclusive) in the varnode
*
* @param varnode the node
* @return the max address
*/
static Address maxAddr(Varnode varnode) {
return varnode.getAddress().add(varnode.getSize() - 1);
}
/**
* Check for overlap when one varnode is known to be to the left of the other.
*
* @param left the left varnode (having lower address)
* @param right the right varnode (having higher address)
* @return true if they overlap (not counting abutting), false otherwise.
*/
static boolean overlapsLeft(Varnode left, Varnode right) {
// max is inclusive, so use >=, not just >
return maxAddr(left).compareTo(right.getAddress()) >= 0;
}
private void coalesceVarnode(Varnode varnode) {
Address min = varnode.getAddress();
Address max = maxAddr(varnode);
Entry<Address, Varnode> leftEntry = coalesced.floorEntry(min);
if (leftEntry != null && overlapsLeft(leftEntry.getValue(), varnode)) {
min = leftEntry.getKey();
}
Entry<Address, Varnode> rightEntry = coalesced.floorEntry(max);
if (rightEntry != null) {
max = MathUtilities.cmax(max, maxAddr(rightEntry.getValue()));
}
Varnode exists = leftEntry == null ? null : leftEntry.getValue();
Varnode existsRight = rightEntry == null ? null : rightEntry.getValue();
if (exists == existsRight && exists != null && exists.getAddress().equals(min) &&
maxAddr(exists).equals(max)) {
return; // no change
}
coalesced.subMap(min, true, maxAddr(varnode), true).clear();
coalesced.put(min, new Varnode(min, (int) max.subtract(min) + 1));
}
private void coalesceVarnodes() {
Set<Varnode> allVarnodes = new HashSet<>();
for (JitBlock block : cfm.getBlocks()) {
allVarnodes.addAll(dfm.getAnalyzer(block).getVarnodesRead());
allVarnodes.addAll(dfm.getAnalyzer(block).getVarnodesWritten());
}
for (Varnode varnode : allVarnodes) {
if (!varnode.isAddress()) {
coalesceVarnode(varnode);
}
}
}
/**
* Get the varnode into which the given varnode was coalesced
*
* <p>
* In many cases, the result is the same varnode.
*
* @param part the varnode
* @return the coalesced varnode
*/
public Varnode getCoalesced(Varnode part) {
if (part.isAddress()) {
return part;
}
Entry<Address, Varnode> floorEntry = coalesced.floorEntry(part.getAddress());
assert overlapsLeft(floorEntry.getValue(), part);
return floorEntry.getValue();
}
/**
* Perform a push for the given direction for the next block in the queue.
*
* <p>
* Any block whose varnode queue was affected is added back into the block queue.
*
* @param which which view is being computed (direction)
* @return true if there remains at least one block in the queue
*/
private boolean pushNext(Which which) {
if (blockQueue.isEmpty()) {
return false;
}
ScopeInfo info = blockQueue.removeFirst();
info.push(which);
return !blockQueue.isEmpty();
}
/**
* Perform the analysis.
*
* <p>
* This starts with the upward set, which is computed by pushing queued block's varnodes upward
* until the queue is empty. All blockes are queued initially. When a block's set is affected,
* it's re-added to the queue, so we know we've converged when the queue is empty. The downward
* set is then computed in the same fashion.
*/
private void analyze() {
coalesceVarnodes();
for (JitBlock block : cfm.getBlocks()) {
ScopeInfo info = new ScopeInfo(block);
infos.put(block, info);
blockQueue.add(info);
}
while (pushNext(Which.UP)) {
}
blockQueue.addAll(infos.values());
while (pushNext(Which.DOWN)) {
}
for (ScopeInfo info : infos.values()) {
info.finish();
}
}
/**
* Get the collection of all coalesced varnodes
*
* @return the varnodes
*/
public Iterable<Varnode> coalescedVarnodes() {
return coalesced.values();
}
/**
* Get the set of live varnodes for the given block
*
* @param block the block
* @return the live varnodes
*/
public Set<Varnode> getLiveVars(JitBlock block) {
return infos.get(block).liveVarsImm;
}
/**
* For diagnostics: Dump the analysis result to stderr
*
* @see Diag#PRINT_VSM
*/
public void dumpResult() {
System.err.println("STAGE: VarLiveness");
for (JitBlock block : cfm.getBlocks()) {
System.err.println(" Block: " + block);
Set<String> liveNames = new TreeSet<>();
for (Varnode vn : infos.get(block).liveVarsImm) {
Register register = block.getLanguage().getRegister(vn.getAddress(), vn.getSize());
if (register != null) {
liveNames.add(register.getName());
}
else if (vn.isUnique()) {
liveNames.add("$U%x:%d".formatted(vn.getOffset(), vn.getSize()));
}
else {
liveNames.add("%s:%x:4".formatted(vn.getAddress().getAddressSpace().getName(),
vn.getOffset(), vn.getSize()));
}
}
System.err.println(" Live: " + liveNames);
}
}
}

View file

@ -0,0 +1,32 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.util.List;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp;
/**
* A list of contiguous instructions connected by fall through, along with their emitted p-code ops
*
* @param start the address and contextreg value that seeded this stride
* @param instructions the instructions in the order decoded
* @param ops the ops in the order decoded and emitted
* @see JitPassageDecoder
*/
record DecodedStride(AddrCtx start, List<Instruction> instructions, List<PcodeOp> ops) {}

View file

@ -0,0 +1,563 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.math.BigInteger;
import java.util.*;
import ghidra.app.plugin.processors.sleigh.SleighParserContext;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.*;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState;
import ghidra.pcode.emu.jit.op.JitNopOp;
import ghidra.pcode.exec.*;
import ghidra.program.disassemble.Disassembler;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.listing.ProgramContext;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.util.Msg;
/**
* The p-code interpreter used during passage decode
*
* <p>
* Aside from branches, this interpreter simply logs each op, so that they get collected into the
* greater stride and passage. It does "rewrite" the ops, so that we can easily recover the input
* context, especially when the op is emitted from a user inject. For branches, this interpreter
* creates the appropriate {@link Branch} records and notifies the passage decoder of new seeds.
*
* <p>
* This executor also implements the {@link DisassemblerContext} to track context changes, namely
* uses of {@code globalset}. This is kept in {@link #futCtx}. <b>TODO</b>: Should {@link #futCtx}
* be moved into the passage decoder to ensure it persists for more than a single instruction? I'm
* not sure whether or not that is already taken care of by the {@link Disassembler}.
*
* @implNote I had considered using a {@link JitDataFlowState} here, but that's Not a Good Idea,
* because a stride is not generally a <em>basic block</em>. A "stride" is just a
* contiguous run of instructions with fall-through. If there is a jump into the middle of
* it, any value analysis (e.g., constant folding) would be meaningless. Were we to put
* this in there, the temptation may be to have userop libraries attempt constant
* resolution, esp., for syscall numbers. While that may work, if only because syscall
* numbers are conventionally set in the same basic block as the invocation, there's no
* guarantee that's the case. And there may be other use cases where this is totally
* wrong. Instead, we should use as barren an executor here as possible. We do incorporate
* injects here, because they may affect control flow, which the decoder must consider.
*
* @implNote <b>WARNING</b>: This executor has no {@link PcodeExecutorState state} object. Care must
* be taken to ensure we override any method that assumes we have one, and that we don't
* invoke any method from the superclass that assumes we have one.
*
*/
class DecoderExecutor extends PcodeExecutor<Object>
implements DisassemblerContextAdapter {
private final DecoderForOneStride stride;
final AddrCtx at;
private PseudoInstruction instruction;
private NopPcodeOp termNop;
private RegisterValue flow;
private final Map<Address, RegisterValue> futCtx = new HashMap<>();
final List<PcodeOp> opsForThisStep = new ArrayList<>();
private final List<Branch> branchesForThisStep = new ArrayList<>();
private final Map<PcodeOp, DecodedPcodeOp> rewrites = new HashMap<>();
/**
* Construct the interpreter
*
* @param stride the stride being decoded
* @param at the address and contextreg value of the instruction
* @param instruction the instruction, or {@code null}
*/
DecoderExecutor(DecoderForOneStride stride, AddrCtx at, PseudoInstruction instruction) {
super(stride.decoder.thread.getLanguage(), null, null, null);
this.stride = stride;
this.at = at;
setInstruction(instruction);
}
/**
* Construct the interpreter without an instruction
*
* <p>
* This initializes the interpreter without an instruction. The decoder must set the instruction
* via {@link #setInstruction(PseudoInstruction)} as soon as it becomes available, either 1)
* because the step resulted in a simple instruction, or 2) because a user inject caused the
* instruction to be decoded.
*
* @param stride the stride being decoded
* @param at the address and contextreg value of the instruction
*/
DecoderExecutor(DecoderForOneStride stride, AddrCtx at) {
this(stride, at, null);
}
/**
* Re-write the given op as a {@link DecodedPcodeOp} with the given address/contextreg value
*
* <p>
* If the given op is already a {@link DecodedPcodeOp}, i.e., a {@link DecodeErrorPcodeOp} or
* {@link NopPcodeOp}, just return the same op without re-writing.
*
* @param at the address and decode context
* @param op the original p-code op
* @return the equivalent op, re-written
*/
static DecodedPcodeOp rewriteOp(AddrCtx at, PcodeOp op) {
if (op instanceof DecodedPcodeOp dec) {
assert dec.getAt().equals(at);
return dec;
}
return new DecodedPcodeOp(at, op);
}
/**
* Re-write the given op
*
* <p>
* Because we create an interpreter for each instruction step, we already know the target
* address and decode context. We re-write the op to capture that target. If we've already
* re-written the op, return the existing one to ensure we retain identity in the re-written
* realm.
*
* @param op the op to re-write
* @return the equivalent op, re-written
*/
DecodedPcodeOp rewrite(PcodeOp op) {
return rewrites.computeIfAbsent(op, o -> rewriteOp(at, o));
}
/**
* Set the current instruction.
*
* <p>
* This also pre-computes the resulting "flow" context from the given instruction. That is, the
* input context for the next decode instruction, not accounting for {@code globalset}. It is
* computed by taking the given instruction's input context and resetting non-flowing bits to
* the language's defaults. When a branch is encountered or fall through is considered, we
* account for {@code globalset} and derive the target context for the target address.
*
* @param instruction the instruction
*/
void setInstruction(PseudoInstruction instruction) {
this.instruction = instruction;
if (at.rvCtx == null || instruction == null ||
instruction instanceof DecodeErrorInstruction) {
this.flow = at.rvCtx;
}
else {
Register contextreg = stride.decoder.contextreg;
ProgramContext defaultContext = stride.decoder.defaultContext;
this.flow = new RegisterValue(contextreg, BigInteger.ZERO)
.combineValues(defaultContext.getDefaultValue(contextreg, at.address))
.combineValues(defaultContext.getFlowValue(at.rvCtx));
processContextChanges();
}
}
/**
* Decode the instruction this executor is meant to interpret
*
* <p>
* This can be delayed if there is a user inject at the target address. In that case, this may
* be invoked by {@link DecoderUseropLibrary#emu_exec_decoded(PcodeExecutor)} or
* {@link DecoderUseropLibrary#emu_skip_decoded(PcodeExecutor)}.
*
* @return the decoded instruction, which may be a {@link DecodeErrorInstruction}
*/
PseudoInstruction decodeInstruction() {
PseudoInstruction instruction = stride.decoder.decodeInstruction(at.address, at.rvCtx);
setInstruction(instruction);
return instruction;
}
private void processContextChanges() {
try {
SleighParserContext parserCtx =
(SleighParserContext) instruction.getParserContext();
parserCtx.applyCommits(this);
}
catch (MemoryAccessException e) {
throw new AssertionError(e);
}
}
/**
* Interpret the given program with the passage decoder's userop library
*
* @param program the p-code to interpret
*/
public void execute(PcodeProgram program) {
execute(program, stride.passage.library());
}
/**
* {@inheritDoc}
*
* @implNote We check here if a "terminal nop" was necessary. Any jump to (should never be past)
* the end of the program will require one. Instead of trying to figure out what the
* op following this instruction is, so the jumps can target it, we add a special nop,
* and the jump is made to target it. Once we reach the end of the p-code program
* proper, we have to add that nop.
*/
@Override
public void finish(PcodeFrame frame, PcodeUseropLibrary<Object> library) {
super.finish(frame, library);
if (termNop != null) {
opsForThisStep.add(termNop);
}
}
/**
* {@inheritDoc}
*
* <p>
* We only really need to interpret branching ops here. We also interpret
* {@link PcodeOp#CALLOTHER callother}, in case wer're able to inline a p-code userop. Note that
* if we inline the userop, we still retain the {@code callother} op, because internal jumps may
* target it. It is easier to leave it in the books and {@link JitNopOp nop} it out later than
* to try to substitute the first inlined op. Worse, if the inlined userop emits no p-code,
* substitution would get especially difficult.
*
* <p>
* We also interpret {@link PcodeOp#UNIMPLEMENTED unimplemented}, because that will require us
* to create an {@link ErrBranch} record. All other ops must still be added to the decoded
* passage, but not (yet) interpreted.
*/
@Override
public void stepOp(PcodeOp op, PcodeFrame frame, PcodeUseropLibrary<Object> library) {
/**
* NOTE: Must log every op, including inlined CALLOTHER's, because an internal jump may
* refer to that CALLOTHER. It's easier, I think, to snuff the op later than it is to try to
* substitute the refs.
*/
op = rewrite(op);
switch (op.getOpcode()) {
case PcodeOp.BRANCH, //
PcodeOp.CBRANCH, //
PcodeOp.CALL, //
PcodeOp.BRANCHIND, //
PcodeOp.CALLIND, //
PcodeOp.RETURN, //
PcodeOp.CALLOTHER, //
PcodeOp.UNIMPLEMENTED -> {
opsForThisStep.add(op);
super.stepOp(op, frame, library);
}
default -> {
opsForThisStep.add(op);
}
}
}
/**
* {@inheritDoc}
*
* <p>
* We interpret this the same as an unconditional branch, because at this point, we need only
* collect branch targets to seed additional strides.
*/
@Override
public void executeConditionalBranch(PcodeOp op, PcodeFrame frame) {
doExecuteBranch(op, frame);
}
/**
* {@inheritDoc}
*
* <p>
* We override this to prevent an attempt to write PC to the {@link #getState() state}, which is
* {@code null}.
*/
@Override
protected void branchToOffset(PcodeOp op, long offset, PcodeFrame frame) {
}
@Override
protected void branchToOffset(PcodeOp op, Object offset, PcodeFrame frame) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* <p>
* This creates an {@link ExtBranch} record and collects it for this instruction step. The
* record will first be used to check for fall through. Then, the passage decoder is notified,
* which either adds it to the seed queue or converts it to an {@link IntBranch} record.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void branchToAddress(PcodeOp op, Address target) {
branchesForThisStep.add(new ExtBranch(op, takeTargetContext(target)));
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link IntBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void branchInternal(PcodeOp op, PcodeFrame frame, int relative) {
int tgtSeq = op.getSeqnum().getTime() + relative;
if (tgtSeq == frame.getCode().size()) {
if (termNop == null) {
termNop = new NopPcodeOp(at, tgtSeq);
}
branchesForThisStep.add(new IntBranch(op, termNop, false));
}
else {
PcodeOp to = frame.getCode().get(op.getSeqnum().getTime() + relative);
branchesForThisStep.add(new IntBranch(op, rewrite(to), false));
}
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link IndBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void doExecuteIndirectBranch(PcodeOp op, PcodeFrame frame) {
branchesForThisStep.add(new IndBranch(op, flow));
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link ErrBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis. In most (all?) cases, this
* is the only op emitted by the instruction (decode error, unimplemented instruction), and so
* there is certainly no fall through.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void badOp(PcodeOp op) {
String message;
if (instruction instanceof DecodeErrorInstruction err) {
message = err.getMessage();
}
else {
message =
"Encountered an unimplemented instruction at " + at + " (" + instruction + ")";
}
branchesForThisStep.add(new ErrBranch(op, message));
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link ErrBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis. In contrast to
* {@link #badOp(PcodeOp)}, an instruction that calls a missing userop may still have fall
* through.
*/
@Override
protected void onMissingUseropDef(PcodeOp op, PcodeFrame frame, String opName,
PcodeUseropLibrary<Object> library) {
branchesForThisStep.add(
new ErrBranch(op, "Sleigh userop '%s' is not in the library".formatted(opName)));
}
@Override
public void setFutureRegisterValue(Address address, RegisterValue value) {
if (!value.getRegister().isProcessorContext()) {
return;
}
futCtx.compute(address, (a, v) -> v == null ? value : v.combineValues(value));
}
/**
* Derive the contextreg value at the given target address (branch or fall through).
*
* <p>
* An instruction's constructors may use {@code globalset} to place context changes at specific
* addresses. Those changes are collected by
* {@link #setFutureRegisterValue(Address, RegisterValue)} through some chain of method
* invocations started by {@link #setInstruction(PseudoInstruction)}. When the interpreter
* encounters a branch op, that op will specify the target address. We must also derive the
* context for that branch. This is the pre-computed "flow" context, but now accounting for
* {@code globalset} at the target address.
*
* @param target the target address
* @return the target address and contextreg value
*/
public AddrCtx takeTargetContext(Address target) {
if (!futCtx.containsKey(target)) {
return new AddrCtx(flow, target);
}
/** Do not remove, in case there are multiple branches to the same target address */
return new AddrCtx(flow.combineValues(futCtx.get(target)), target);
}
/**
* After p-code interpretation, check if the instruction has fall through, notify the stride
* decoder of the instruction's ops, and notify the passage of the instruction's branches.
*
* <p>
* To determine whether there's fall through, this performs a miniature control flow analysis on
* just this step's p-code ops. This is required because a user inject can be very complex, and
* need not obey all of the usual control flow checks imposed by the Sleigh semantic compiler.
* In particular {@link Instruction#hasFallthrough()} is not sufficient, for at least two
* reasons: 1) The aforementioned user inject possibilities, 2) We do not consider a
* {@link PcodeOp#CALL call} or {@link PcodeOp#CALLIND callind} as having fall through.
*
* <p>
* To use control flow analysis as a means of checking for fall through, we append a special
* "probe" {@link ExitPcodeOp} along with an {@link ExtBranch} record to {@link AddrCtx#NOWHERE
* nowhere}. The probe thus serves the secondary purpose of preventing any complaints from the
* analyzer about unterminated control flow. We then perform the analysis, borrowing
* {@link BlockSplitter} from {@link JitControlFlowModel}. In practice, this seems fast enough.
* Because the splitter keeps the blocks in the original order, the first op will certainly be
* in the first block, and the probe op will certainly be in the last block. We perform a simple
* reachability test between the two. The step has fall through if and only if a path is found.
*
* @param from the instruction's or inject's p-code
* @return true if the step falls through.
*/
public boolean checkFallthroughAndAccumulate(PcodeProgram from) {
if (instruction instanceof DecodeErrorInstruction) {
stride.opsForStride.addAll(opsForThisStep);
for (Branch branch : branchesForThisStep) {
switch (branch) {
case ErrBranch eb -> stride.passage.otherBranches.put(eb.from(), eb);
default -> throw new AssertionError();
}
}
return false;
}
if (opsForThisStep.isEmpty()) {
return true;
}
ExitPcodeOp probeOp = new ExitPcodeOp(AddrCtx.NOWHERE);
opsForThisStep.add(probeOp);
ExtBranch probeBranch = new ExtBranch(probeOp, AddrCtx.NOWHERE);
branchesForThisStep.add(probeBranch);
PcodeProgram program = new PcodeProgram(from, opsForThisStep);
BlockSplitter splitter = new BlockSplitter(program);
splitter.addBranches(branchesForThisStep);
SequencedMap<PcodeOp, JitBlock> blocks = splitter.splitBlocks();
JitBlock entry = blocks.firstEntry().getValue();
JitBlock exit = blocks.lastEntry().getValue();
Set<JitBlock> reachable = new HashSet<>();
collectReachable(reachable, entry);
for (JitBlock block : blocks.values()) {
for (PcodeOp op : block.getCode()) {
if (op != probeOp) {
stride.opsForStride.add(op);
}
}
for (IntBranch branch : block.branchesFrom()) {
if (!branch.isFall()) {
stride.passage.internalBranches.put(branch.from(), branch);
}
}
for (Branch branch : block.branchesOut()) {
if (branch != probeBranch) {
switch (branch) {
case ExtBranch eb -> stride.passage.flowTo(eb);
default -> stride.passage.otherBranches.put(branch.from(), branch);
}
}
}
}
return reachable.contains(exit);
}
/**
* The reachability test mentioned in {@link #checkFallthroughAndAccumulate(PcodeProgram)}
*
* <p>
* Collects the set of blocks reachable from {@code cur} into the given mutable set.
*
* @param into a mutable set for collecting reachable blocks
* @param cur the source block, or an intermediate during recursion
*/
private void collectReachable(Set<JitBlock> into, JitBlock cur) {
if (!into.add(cur)) {
return;
}
for (BlockFlow flow : cur.flowsFrom().values()) {
collectReachable(into, flow.to());
}
}
/**
* Compute the fall-through address
*
* <p>
* This computes the "next" address whether or not the instruction actually has fall through.
* The caller should check for fall through first.
*
* @return the next address
* @implNote If no instruction was actually decoded during this step, and the decoder is asking
* about fall through, then the user very likely made an error in specifying an
* inject's control flow, in which case the counter will not advance. To get this same
* effect, we just return the current address. The decoder and/or translator ought to
* recognize this and ensure the resulting infinite loop can be interrupted.
* @see PcodeMachine#inject(Address, String)
*/
Address getAdvancedAddress() {
if (instruction != null) {
return instruction.getMaxAddress().next();
}
Msg.warn(this, "An inject may have forgotten control flow.");
return at.address;
}
/**
* Notify the stride of an instruction
*
* <p>
* For addresses without injects, every decoded instruction ought to be included in the stride.
* For an address with an inject, a decoded instruction should only be included if it is
* actually interpreted, i.e., its ops are included.
*
* @param instruction the decoded instruction
*/
void addInstruction(PseudoInstruction instruction) {
stride.instructions.add(instruction);
}
}

View file

@ -0,0 +1,169 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.util.*;
import java.util.Map.Entry;
import org.apache.commons.collections4.MapUtils;
import ghidra.pcode.emu.jit.JitConfiguration;
import ghidra.pcode.emu.jit.JitPassage;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.exec.PcodeUseropLibrary;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp;
/**
* The decoder for a single passage
*
* <p>
* This is a sort of "mutable" passage or passage "builder" that is used while the passage is being
* decoded. Once complete, this provides an immutable (or at least it's supposed to be) decoded
* {@link Passage}.
*/
class DecoderForOnePassage {
private final JitPassageDecoder decoder;
private final AddrCtx seed;
private final int maxOps;
private final int maxInstrs;
private final int maxStrides;
final Map<PcodeOp, IntBranch> internalBranches = new HashMap<>();
final SequencedMap<PcodeOp, ExtBranch> externalBranches = new LinkedHashMap<>();
final Map<PcodeOp, Branch> otherBranches = new HashMap<>();
final Map<AddrCtx, PcodeOp> firstOps = new HashMap<>();
final List<DecodedStride> strides = new ArrayList<>();
private int opCount = 0;
private int instructionCount = 0;
/**
* Construct the decoder
*
* @param decoder the thread's passage decoder
* @param seed the seed for this passage
* @param maxOps the maximum-ish number of p-code ops to emit
*/
DecoderForOnePassage(JitPassageDecoder decoder, AddrCtx seed, int maxOps) {
this.decoder = decoder;
this.seed = seed;
this.maxOps = maxOps;
JitConfiguration config = decoder.thread.getMachine().getConfiguration();
this.maxInstrs = config.maxPassageInstructions();
this.maxStrides = config.maxPassageStrides();
EntryPcodeOp entryOp = new EntryPcodeOp(seed);
externalBranches.put(entryOp, new ExtBranch(entryOp, seed));
}
/**
* Implements the actual decode loop
*/
void decodePassage() {
while (opCount < maxOps && instructionCount < maxInstrs &&
strides.size() < maxStrides) {
Entry<PcodeOp, ExtBranch> nextEnt = externalBranches.pollFirstEntry();
if (nextEnt == null) {
break;
}
ExtBranch next = nextEnt.getValue();
AddrCtx start = next.to();
if (decoder.thread.hasEntry(start)) {
otherBranches.put(next.from(), next);
}
else {
decodeStride(start);
PcodeOp to = Objects.requireNonNull(firstOps.get(start));
internalBranches.put(next.from(), new IntBranch(next.from(), to, false));
}
}
}
/**
* Record that a direct branch was encountered.
*
* <p>
* If we've already decoded the target, we create an {@link IntBranch} record, and we're done.
* Otherwise, we queue up an {@link ExtBranch} record. If multiple direct branches target the
* same address, we still create separate entries. First, we note their {@link Branch#from()
* from} fields will be different. Also, we ensure once we've terminated (probably because of a
* quota), we must examine records still in the queue, but whose targets may have since been
* decoded, and convert them to {@link IntBranch} records.
*
* @param from the op representing or causing the control flow
* @param to the target of the branch
*/
void flowTo(ExtBranch eb) {
if (firstOps.containsKey(eb.to())) {
IntBranch ib = new IntBranch(eb.from(), firstOps.get(eb.to()), false);
internalBranches.put(ib.from(), ib);
return;
}
externalBranches.put(eb.from(), eb);
}
/**
* Decode a stride starting at the given address.
*
* @param start the starting address and context
*/
private void decodeStride(AddrCtx start) {
DecodedStride stride = new DecoderForOneStride(decoder, this, start).decode();
opCount += stride.ops().size();
instructionCount += stride.instructions().size();
strides.add(stride);
}
/**
* Sort out the result and create the decoded passage
*
* <p>
* The strides are sorted by their seeds (contextreg value then address), and their code
* concatenated together. The various types of branches are also all combined. (They can still
* be distinguished by type.) {@link ExtBranch} records are converted to {@link IntBranch}
* records where possible.
*
* @return the passage
*/
JitPassage finish() {
strides.sort(Comparator.comparing(DecodedStride::start));
List<PcodeOp> code = strides.stream().flatMap(b -> b.ops().stream()).toList();
List<Instruction> instructions =
strides.stream().flatMap(b -> b.instructions().stream()).toList();
Map<PcodeOp, Branch> branches = otherBranches;
branches.putAll(internalBranches);
for (ExtBranch eb : externalBranches.values()) {
if (firstOps.containsKey(eb.to())) {
branches.put(eb.from(), new IntBranch(eb.from(), firstOps.get(eb.to()), false));
}
else {
branches.put(eb.from(), eb);
}
}
return new JitPassage(decoder.thread.getLanguage(), seed, code, decoder.library,
instructions, branches, MapUtils.invertMap(firstOps));
}
/**
* Get the decoder-wrapped userop library
*
* @return the library
*/
PcodeUseropLibrary<Object> library() {
return decoder.library;
}
}

View file

@ -0,0 +1,186 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.util.ArrayList;
import java.util.List;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.exec.PcodeProgram;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp;
/**
* The decoder for a single stride.
*
* <p>
* This starts at a given seed and proceeds linearly until it hits an instruction without fall
* through. It may also stop if it encounters an existing entry point or an erroneous user inject.
*
* @see JitPassageDecoder
*/
public class DecoderForOneStride {
/**
* The result of decoding an instruction
*
* <p>
* This may also represent an error encountered while trying to decode an instruction.
*
* @param executor the p-code interpreter, which retains some state
* @param program the resulting p-code
*/
record StepResult(DecoderExecutor executor, PcodeProgram program) {
/**
* Check whether the result falls through, accumulate its instructions and ops, and apply
* any control-flow effects.
*
* @return true if the result falls through.
* @see DecoderExecutor#checkFallthroughAndAccumulate(PcodeProgram)
*/
boolean checkFallthroughAndAccumulate() {
return executor.checkFallthroughAndAccumulate(program);
}
/**
* Compute the fall-through target
*
* <p>
* <b>NOTE</b>: This should only be called after checking if the result actually has fall
* through; otherwise, this will blindly compute the address and context immediately after
* the instruction.
*
* @return the next address to decode
*/
AddrCtx next() {
return executor.takeTargetContext(executor.getAdvancedAddress());
}
}
final JitPassageDecoder decoder;
final DecoderForOnePassage passage;
private final AddrCtx start;
final List<Instruction> instructions = new ArrayList<>();
final List<PcodeOp> opsForStride = new ArrayList<>();;
/**
* Construct a stride decoder
*
* @param decoder the thread's passage decoder
* @param passage the decoder for this specific passage
* @param start the seed to start this stride
*/
public DecoderForOneStride(JitPassageDecoder decoder, DecoderForOnePassage passage,
AddrCtx start) {
this.decoder = decoder;
this.passage = passage;
this.start = start;
}
/**
* Finish decoding and create the stride
*
* @return the stride
*/
DecodedStride toStride() {
return new DecodedStride(start, instructions, opsForStride);
}
/**
* "Step" the decoder an instruction
*
* <p>
* This will attempt to decode the instruction at the given address (and contextreg value). If
* the given address is already a known entry point (for the entire emulator), then this returns
* {@code null} and the stride should be terminated. Otherwise, this checks for a user inject or
* then decodes an instruction. The resulting p-code (which may represent a decode error) is
* interpreted, and the first op is saved, in case it is targeted by a direct branch. As a
* special case, if the inject and/or instruction emits no p-code, we synthesize a
* {@link NopPcodeOp nop}, so that we can enter something into our books.
*
* @param at the address of the instruction to decode
* @return the result
*/
private StepResult stepAddrCtx(AddrCtx at) {
/**
* Avoid duplicate translation when we encounter an existing entry point. Just encode an
* exit branch.
*/
if (decoder.thread.hasEntry(at)) {
ExitPcodeOp exitOp = new ExitPcodeOp(at);
opsForStride.add(exitOp);
passage.otherBranches.put(exitOp, new ExtBranch(exitOp, at));
return null;
}
DecoderExecutor executor = new DecoderExecutor(this, at);
PcodeProgram program = decoder.thread.getInject(at.address);
if (program == null) {
PseudoInstruction instruction = executor.decodeInstruction();
instructions.add(instruction);
program = PcodeProgram.fromInstruction(instruction, false);
}
executor.execute(program);
if (executor.opsForThisStep.isEmpty()) {
NopPcodeOp nop = new NopPcodeOp(at, 0);
passage.firstOps.put(at, nop);
opsForStride.add(nop);
}
else {
passage.firstOps.put(at, executor.opsForThisStep.getFirst());
}
return new StepResult(executor, program);
}
/**
* Decode the stride.
*
* @return the decoded stride
*/
public DecodedStride decode() {
AddrCtx at = start;
while (true) {
if (passage.firstOps.containsKey(at)) {
return toStride();
}
StepResult result = stepAddrCtx(at);
if (result == null || !result.checkFallthroughAndAccumulate()) {
return toStride();
}
AddrCtx next = result.next();
if (at.equals(next)) {
// Would happen because of inject without control flow
ExitPcodeOp exitOp = new ExitPcodeOp(at);
opsForStride.add(exitOp);
passage.otherBranches.put(exitOp, new ExtBranch(exitOp, at));
return toStride();
}
at = next;
}
/**
* NOTE: If we impose a max instruction count within the stride, be sure to add the
* "external branch" that falls-through to the next instruction outside the passage.
*/
}
}

View file

@ -0,0 +1,202 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.lang.reflect.Method;
import java.util.List;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.DefaultPcodeThread.PcodeEmulationLibrary;
import ghidra.pcode.emu.jit.op.JitNopOp;
import ghidra.pcode.exec.*;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The decoder's wrapper around the emulator's userop library
*
* <p>
* This library serves two purposes: 1) to override {@link PcodeEmulationLibrary#emu_exec_decoded()}
* and {@link PcodeEmulationLibrary#emu_skip_decoded()}, and 2) to check and inline p-code userops
* that {@link ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition#canInlinePcode() allow}
* it.
*
* <p>
* We accomplish the first purpose simply by adding the two userops using the usual annotations. The
* two built-in userops regarding the decoded instruction are easily inlinable, so we will mark them
* as such. Note, however, that they are separate from the wrappers we mention for the second
* purpose (inlining), and so we must implement that inlining in the actual userop. We still mark
* them for informational purposes and because the translator needs to know.
*
* <p>
* We accomplish the second purpose of inlining by accepting the emulator's userop library and
* individually wrapping each of its userops, excluding the two we override. We allow each userop's
* attributes to pass through, but when executed, we check if the userop allows inlining. If so,
* then we feed the userop's p-code into the decoder's interpreter. This effectively inlines the op,
* control flow ops and all, into the passage. Note we do not actually replace the
* {@link PcodeOp#CALLOTHER callother} op, for bookkeeping purposes. Instead we will map it to a
* {@link JitNopOp nop} during translation.
*/
public class DecoderUseropLibrary extends AnnotatedPcodeUseropLibrary<Object> {
/**
* The wrapper around one of the emulator's userops
*/
protected class WrappedUseropDefinition implements PcodeUseropDefinition<Object> {
private final PcodeUseropDefinition<byte[]> rtOp;
/**
* Wrap the given userop
*
* @param rtOp the actual userop, as defined by the user or emulator
*/
public WrappedUseropDefinition(PcodeUseropDefinition<byte[]> rtOp) {
this.rtOp = rtOp;
}
@Override
public String getName() {
return rtOp.getName();
}
@Override
public int getInputCount() {
return rtOp.getInputCount();
}
@Override
public void execute(PcodeExecutor<Object> executor, PcodeUseropLibrary<Object> library,
Varnode outVar, List<Varnode> inVars) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* @implNote If the userop can be inlined, we assume the delegate's {@code execute} method
* simply produces p-code and feeds it to the executor. If that is true, then the
* target type {@code <T>} does not matter, so we cast everything to raw types.
* Thus, the user is responsible to apply the {@link #canInlinePcode()} attribute
* correctly.
*/
@Override
@SuppressWarnings("unchecked")
public void execute(PcodeExecutor<Object> executor, PcodeUseropLibrary<Object> library,
PcodeOp op) {
if (rtOp.canInlinePcode()) {
@SuppressWarnings("rawtypes")
PcodeExecutor rawExec = executor;
@SuppressWarnings("rawtypes")
PcodeUseropLibrary rawLib = library;
rtOp.execute(rawExec, rawLib, op);
}
else {
// Nothing to do. CALLOTHER is logged and will be compiled later.
}
}
@Override
public boolean isFunctional() {
return rtOp.isFunctional();
}
@Override
public boolean hasSideEffects() {
return rtOp.hasSideEffects();
}
@Override
public boolean canInlinePcode() {
return rtOp.canInlinePcode();
}
@Override
public Method getJavaMethod() {
return rtOp.getJavaMethod();
}
@Override
public PcodeUseropLibrary<?> getDefiningLibrary() {
return rtOp.getDefiningLibrary();
}
}
/**
* Wrap the given userop library
*
* @param rtLib the actual library provided by the user or emulator
*/
public DecoderUseropLibrary(PcodeUseropLibrary<byte[]> rtLib) {
for (PcodeUseropDefinition<byte[]> opDef : rtLib.getUserops().values()) {
if (ops.containsKey(opDef.getName())) {
// Allow our annotations to override stuff in rtLib
continue;
}
ops.put(opDef.getName(), new WrappedUseropDefinition(opDef));
}
}
/**
* The replacement for {@link PcodeEmulationLibrary#emu_exec_decoded()}.
*
* <p>
* The one built into the emulator would have the thread interpret the decoded instruction
* directly. While this might "work," it totally missed the purpose of JIT translation. We
* instead inline the userop's p-code into the rest of the passage. We accomplish this by having
* the decoder interpret the p-code instead. We also need to ensure the decoded instruction is
* added into the passage.
*
* <p>
* Note that the {@link PcodeOp#CALLOTHER callother} op will be mapped to a {@link JitNopOp nop}
* during translation because we have set {@code canInline}.
*
* @param executor the decoder's executor
*/
@PcodeUserop(canInline = true)
public void emu_exec_decoded(@OpExecutor PcodeExecutor<Object> executor) {
DecoderExecutor de = (DecoderExecutor) executor;
PseudoInstruction instruction = de.decodeInstruction();
de.addInstruction(instruction);
PcodeProgram program = PcodeProgram.fromInstruction(instruction, false);
de.execute(program);
}
/**
* The replacement for {@link PcodeEmulationLibrary#emu_skip_decoded()}.
*
* <p>
* The one built into the emulator would have the thread drop and skip the decoded instruction
* directly. This would not have the intended effect, because the decoder is the thing that
* needs to skip and advance to the next address. We instead "inline" nothing, but we must still
* decode the instruction. Because the executor provides the decode routine, it can internally
* work out fall through. We will <em>not</em> add the instruction to the passage, though,
* because we will not have the executor interpret any of the instructon's p-code. As for fall
* through, the {@link DecoderExecutor#checkFallthroughAndAccumulate(PcodeProgram)} routine just
* does its usual. If the inject falls through, {@link DecoderExecutor#getAdvancedAddress()}
* considers the decoded instruction, even though it was never interpreted.
*
* <p>
* Note that the {@link PcodeOp#CALLOTHER callother} op will still be mapped to a
* {@link JitNopOp nop} during translation because we have set {@code canInline}.
*
* @param executor the decoder's executor
*/
@PcodeUserop(canInline = true)
public void emu_skip_decoded(@OpExecutor PcodeExecutor<Object> executor) {
DecoderExecutor de = (DecoderExecutor) executor;
de.decodeInstruction();
}
}

View file

@ -0,0 +1,167 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.InstructionDecoder;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockSplitter;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.DecodePcodeExecutionException;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Register;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.ProgramContext;
import ghidra.program.model.pcode.PcodeOp;
/**
* The decoder of a {@link JitPassage} to support JIT-accelerated p-code emulation.
*
* <p>
* When the emulator encounters an address (and contextreg value) that it has not previously
* translated, it must decode a passage seeded at that required entry point. It must then translate
* the passage and collects all the resulting entry points, and finally invoke the passage's
* {@link JitCompiledPassage#run(int) run} method for the required entry point.
*
* <h2>Decoding a Passage</h2>
* <p>
* Decode starts with a single seed, which is the entry point required by the emulator. As such,
* that seed <em>must</em> be among the entry points exported by the translator. Decode occurs one
* stride at a time. Starting with the seed, we decode a stride by disassembling linearly until: 1)
* We encounter an instruction without fall through, 2) there's already an entry point to a
* translated passage at an encountered address, or 3) a user injection fails to specify control
* flow. Case 1 is the normal expected case. For example, when the decoder encounters an
* unconditional branch, the stride is terminated. Case 2 is meant to reduce duplicative
* translations, but it does come at some cost during decode time. Suppose execution branches into
* the middle of a previously translated basic block. (Note that basic blocks are only broken apart
* using branches <em>in the same passage</em>, so it is possible some branch encountered later
* would jump into another passage's basic block.) That previously translated passage will not have
* exposed an entry point at that branch target, so the emulator will begin decoding using the
* branch target as the seed. Ideally, the resulting passage will consist of a single stride that
* terminates at an existing entry point. The emulator will translate and execute the passage, which
* should exit at that entry point, where the emulator can then continue execution. Case 3 is just
* to ensure execution does not get caught in a translated infinite loop. There will still be an
* infinite loop, but it can be interrupted while execution is in the emulator's logic rather than
* the translated logic.
*
* <p>
* As the stride decoder processes each instruction, it interprets its p-code, along with any
* generated by user injects, to collect branch targets. For direct branches ({@link PcodeOp#BRANCH
* branch}, {@link PcodeOp#CBRANCH cbranch}, and {@link PcodeOp#CALL call}), the target address (and
* appropriate contextreg value) is added to the queue of seeds, unless that target is already
* decoded in this passage. A bit of control flow analysis is required to determine whether each
* instruction (with user injects) has fall through. We borrow the {@link BlockSplitter} from the
* {@link JitControlFlowModel} to accomplish this. We append a "probe" p-code op at the very end,
* and then once we have the (miniature) control flow graph, we check if there's a path from
* instruction start to the probe op. If there is, then we can fall through, so decode proceeds to
* the next instruction. If not, the stride is terminated, so the decoder starts a new stride at the
* next seed, unless we've met the p-code op, instruction, or stride {@link JitConfiguration quota}.
*
* <p>
* The seed queue is a list of {@link ExtBranch} records. Each stride is decoded by removing a seed
* from that queue, decoding instructions, emitting ops, and then creating an {@link IntBranch}
* record targeting the first op of the newly-decoded instruction. The {@link Branch#from() from}
* field is taken from the seed {@link ExtBranch} record. Decode will likely terminate before this
* queue is emptied, in which case, those remaining external branches will become part of the
* passage's {@link JitPassage#getBranches() branches}. Direct branches to instructions already
* included in the passage, p-code relative branches, and queued external branches to instructions
* which have since been decoded all become {@link IntBranch} records, too. For indirect branches
* ({@link PcodeOp#BRANCHIND branchind}, {@link PcodeOp#CALLIND callind}, and {@link PcodeOp#RETURN
* return}), we create {@link IndBranch} records. For error cases (e.g.,
* {@link PcodeOp#UNIMPLEMENTED unimplemented}), we create {@link ErrBranch} records.
*
* @implNote The process described above is actually implemented in {@link DecoderForOnePassage}.
* This class just keeps the configuration and some other trappings, and instantiates an
* actual decoder upon requesting a seed.
*/
public class JitPassageDecoder {
final JitPcodeThread thread;
final InstructionDecoder decoder;
final ProgramContext defaultContext;
final Register contextreg;
final DecoderUseropLibrary library;
/**
* Construct a passage decoder
*
* @param thread the thread whose instruction decoder, context, and userop library to use.
*/
public JitPassageDecoder(JitPcodeThread thread) {
this.thread = thread;
this.decoder = thread.getDecoder();
this.defaultContext = thread.getDefaultContext();
this.contextreg =
defaultContext == null ? Register.NO_CONTEXT : defaultContext.getBaseContextRegister();
this.library = new DecoderUseropLibrary(thread.getUseropLibrary());
}
/**
* Decode a passage starting at the given seed
*
* @param seed the seed address
* @param ctxIn the seed contextreg value
* @param maxOps the maximum-ish number of p-code ops to emit
* @see #decodePassage(AddrCtx, int)
* @return the decoded passage
*/
public JitPassage decodePassage(Address seed, RegisterValue ctxIn, int maxOps) {
return decodePassage(new AddrCtx(ctxIn, seed), maxOps);
}
/**
* Decode a passage starting at the given seed
*
* <p>
* We provide a {@code maxOps} parameter so that the configured
* {@link JitConfiguration#maxPassageOps() option} can be overridden. In particular, the
* bytecode emitter may exceed the maximum size of a Java method, in which case we must abort,
* re-decode with fewer ops, and retry. Whether this back off should persist in the
* configuration is yet to be determined. Output size can vary wildly depending on the number of
* basic blocks, scope transitions, nature of the ops, etc. We ought to be able to provide a
* reasonable default value that mostly avoids retries, because each retry essentially wastes an
* entire JIT translation. On the other hand, if we choose too small a value, we lose some of
* the benefits of translating the control flow and keeping variables in JVM locals.
*
* @param seed the required entry point, where decode will start
* @param maxOps the maximum-ish number of p-codes to emit
* @return the decoded passage
*/
public JitPassage decodePassage(AddrCtx seed, int maxOps) {
DecoderForOnePassage forOne = new DecoderForOnePassage(this, seed, maxOps);
forOne.decodePassage();
return forOne.finish();
}
/**
* Decode a single instruction
*
* @param address the address of the instruction
* @param ctx the input decode context
* @return the decoded instruction, or a {@link DecodeErrorInstruction}.
*/
PseudoInstruction decodeInstruction(Address address, RegisterValue ctx) {
try {
return decoder.decodeInstruction(address, ctx);
}
catch (DecodePcodeExecutionException e) {
return JitPassage.decodeError(decoder.getLanguage(), address, ctx, e.getMessage());
}
}
}

View file

@ -0,0 +1,74 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static org.objectweb.asm.Opcodes.ATHROW;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.DecodedPcodeOp;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.program.model.pcode.PcodeOp;
/**
* A requested exception handler
*
* <p>
* When an exception occurs, we must retire all of the variables before we pop the
* {@link JitCompiledPassage#run(int) run} method's frame. We also write out the program counter and
* disassembly context so that the emulator can resume appropriately. After that, we re-throw the
* exception.
*
* <p>
* When the code generator knows the code it's emitting can cause a user exception, e.g., the Direct
* invocation of a userop, and there are live variables in scope, then it should request a handler
* (via {@link JitCodeGenerator#requestExceptionHandler(DecodedPcodeOp, JitBlock)}) and surround the
* code in a {@code try-catch} on {@link Throwable} directing it to this handler.
*
* @param op the op which may cause an exception
* @param block the block containing the op
* @param label the label at the start of the handler
*/
public record ExceptionHandler(PcodeOp op, JitBlock block, Label label) {
/**
* Construct a handler, generating a new label
*
* @param op the op which may cause an exception
* @param block the block containing the op
*/
public ExceptionHandler(PcodeOp op, JitBlock block) {
this(op, block, new Label());
}
/**
* Emit the handler's code into the {@link JitCompiledPassage#run(int) run} method.
*
* @param gen the code generator
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateRunCode(JitCodeGenerator gen, MethodVisitor rv) {
rv.visitLabel(label);
// [exc]
gen.generatePassageExit(block, () -> {
rv.visitLdcInsn(gen.getAddressForOp(op).getOffset());
}, gen.getExitContext(op), rv);
// [exc]
rv.visitInsn(ATHROW);
// []
}
}

View file

@ -0,0 +1,87 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.program.model.address.Address;
/**
* A field request for a pre-fetched page from the {@link JitBytesPcodeExecutorStateSpace}.
*
* <p>
* The field is used for direct memory accesses. For those, the address space and fixed address is
* given in the p-code, so we are able to pre-fetch the page and access it directly at run time.
*
* @param address the address contained by the page to pre-fetch
*/
public record FieldForArrDirect(Address address) implements InstanceFieldReq {
@Override
public String name() {
return "arrDir_%s_%x".formatted(address.getAddressSpace().getName(),
address.getOffset());
}
/**
* {@inheritDoc}
*
* <p>
* Consider the address {@code ram:00600000}. The declaration is equivalent to:
*
* <pre>
* private final byte[] arrDir_ram_600000;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* arrDir_ram_600000 =
* state.getForSpace(ADDRESS_FACTORY.getAddressSpace(ramId)).getDirect(0x600000);
* </pre>
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(), TDESC_BYTE_ARR, null, null);
// [...]
iv.visitVarInsn(ALOAD, 0);
// [...,this]
gen.generateLoadJitStateSpace(address.getAddressSpace(), iv);
// [...,jitspace]
iv.visitLdcInsn(address.getOffset());
// [...,arr]
iv.visitMethodInsn(INVOKEVIRTUAL, NAME_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE,
"getDirect", MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__GET_DIRECT, false);
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(), TDESC_BYTE_ARR);
// [...]
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// [...]
rv.visitVarInsn(ALOAD, 0);
// [...,this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(),
TDESC_BYTE_ARR);
// [...,arr]
}
}

View file

@ -0,0 +1,77 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.RegisterValue;
/**
* A field request for pre-constructed contextreg value
*/
record FieldForContext(RegisterValue ctx) implements StaticFieldReq {
@Override
public String name() {
return "CTX_%s".formatted(ctx.getUnsignedValue().toString(16));
}
/**
* {@inheritDoc}
*
* <p>
* Consider the context value 0x80000000. The code is equivalent to:
*
* <pre>
* private static final {@link RegisterValue} CTX_80000000 = {@link JitCompiledPassage#createContext(Language, String) createContext}(LANGUAGE, "80000000");
* </pre>
*/
@Override
public void generateClinitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor sv) {
if (ctx == null) {
return;
}
cv.visitField(ACC_PRIVATE | ACC_STATIC | ACC_FINAL, name(), TDESC_REGISTER_VALUE, null,
null);
// []
sv.visitFieldInsn(GETSTATIC, gen.nameThis, "LANGUAGE", TDESC_LANGUAGE);
// [language]
sv.visitLdcInsn(ctx.getUnsignedValue().toString(16));
// [language,ctx:STR]
sv.visitMethodInsn(INVOKESTATIC, NAME_JIT_COMPILED_PASSAGE, "createContext",
MDESC_JIT_COMPILED_PASSAGE__CREATE_CONTEXT, true);
// [ctx:RV]
sv.visitFieldInsn(PUTSTATIC, gen.nameThis, name(), TDESC_REGISTER_VALUE);
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// [...]
if (ctx == null) {
rv.visitInsn(ACONST_NULL);
}
else {
rv.visitFieldInsn(GETSTATIC, gen.nameThis, name(), TDESC_REGISTER_VALUE);
}
// [...,ctx]
}
}

View file

@ -0,0 +1,101 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.JitPassage.ExtBranch;
import ghidra.pcode.emu.jit.JitPcodeThread;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPoint;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.ExitSlot;
import ghidra.program.model.lang.RegisterValue;
/**
* A field request for an {@link ExitSlot}.
*
* <p>
* One of these is allocated per {@link ExtBranch#to()}. At run time, the first time a branch is
* encountered from this passage to the given target, the slot calls
* {@link JitPcodeThread#getEntry(AddrCtx) getEntry}{@code (target)} and keeps the reference. Each
* subsequent encounter uses the kept reference. This reference is what gets returned by
* {@link JitCompiledPassage#run(int)}, so now the thread already has in hand the next
* {@link EntryPoint} to execute.
*
* @param target the target address-contextreg pair of the branch exiting via this slot
*/
public record FieldForExitSlot(AddrCtx target) implements InstanceFieldReq {
@Override
public String name() {
return "exit_%x_%s".formatted(target.address.getOffset(), target.biCtx.toString(16));
}
/**
* {@inheritDoc}
*
* <p>
* Consider the target {@code (ram:00401234,ctx=80000000)}. The declaration is equivalent to:
*
* <pre>
* private final {@link ExitSlot} exit_401234_80000000;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* exit_401234_80000000 = {@link JitCompiledPassage#createExitSlot(long, RegisterValue) createExitSlot}(0x401234, CTX_80000000);
* </pre>
*
* <p>
* Note that this method will ensure the {@code CTX_...} field is allocated and loads its value
* as needed.
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
FieldForContext ctxField = gen.requestStaticFieldForContext(target.rvCtx);
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(), TDESC_EXIT_SLOT, null, null);
// []
iv.visitVarInsn(ALOAD, 0);
// [this]
iv.visitInsn(DUP);
// [this,this]
iv.visitLdcInsn(target.address.getOffset());
// [this,this,target:LONG]
ctxField.generateLoadCode(gen, iv);
// [this,this,target:LONG,ctx:RV]
iv.visitMethodInsn(INVOKEINTERFACE, NAME_JIT_COMPILED_PASSAGE, "createExitSlot",
MDESC_JIT_COMPILED_PASSAGE__CREATE_EXIT_SLOT, true);
// [this,slot]
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(), TDESC_EXIT_SLOT);
// []
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// []
rv.visitVarInsn(ALOAD, 0);
// [this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(), TDESC_EXIT_SLOT);
// [slot]
}
}

View file

@ -0,0 +1,84 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.program.model.address.AddressSpace;
/**
* A field request for a pre-fetched {@link JitBytesPcodeExecutorStateSpace}
*
* <p>
* The field is used for indirect memory accesses. For those, the address space is given in the
* p-code, but the offset must be computed at run time. Thus, we can pre-fetch the state space, but
* not any particular page.
*
* @param space the address space of the state space to pre-fetch
*/
public record FieldForSpaceIndirect(AddressSpace space) implements InstanceFieldReq {
@Override
public String name() {
return "spaceInd_" + space.getName();
}
/**
* {@inheritDoc}
*
* <p>
* Consider the "ram" space. The declaration is equivalent to:
*
* <pre>
* private final {@link JitBytesPcodeExecutorStateSpace} spaceInd_ram;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* spaceInd_ram = state.getForSpace(ADDRESS_FACTORY.getAddressSpace(ramId));
* </pre>
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(),
TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE, null, null);
// [...]
iv.visitVarInsn(ALOAD, 0);
// [...,this]
gen.generateLoadJitStateSpace(space, iv);
// [...,this,jitspace]
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(),
TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE);
// [...]
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// [...]
rv.visitVarInsn(ALOAD, 0);
// [...,this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(),
TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE);
// [...,jitspace]
}
}

View file

@ -0,0 +1,87 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitDataFlowUseropLibrary;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
/**
* A field request for a pre-fetched userop definition
*
* <p>
* These are used to invoke userops using the Standard or Direct strategies.
*
* @param userop the definition to pre-fetch
* @see JitDataFlowUseropLibrary
*/
public record FieldForUserop(PcodeUseropDefinition<?> userop) implements InstanceFieldReq {
@Override
public String name() {
return "userop_" + userop.getName();
}
/**
* {@inheritDoc}
*
* <p>
* Consider the userop {@code syscall()}. The declaration is equivalent to:
*
* <pre>
* private final {@link PcodeUseropDefinition} userop_syscall;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* userop_syscall = {@link JitCompiledPassage#getUseropDefinition(String) getUseropdDefinition}("syscall");
* </pre>
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(), TDESC_PCODE_USEROP_DEFINITION, null,
null);
// []
iv.visitVarInsn(ALOAD, 0);
// [this]
iv.visitInsn(DUP);
// [this,this]
iv.visitLdcInsn(userop.getName());
// [this,this,name]
iv.visitMethodInsn(INVOKEINTERFACE, NAME_JIT_COMPILED_PASSAGE, "getUseropDefinition",
MDESC_JIT_COMPILED_PASSAGE__GET_USEROP_DEFINITION, true);
// [this,userop]
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(), TDESC_PCODE_USEROP_DEFINITION);
// []
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// []
rv.visitVarInsn(ALOAD, 0);
// [this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(), TDESC_PCODE_USEROP_DEFINITION);
// [userop]
}
}

View file

@ -0,0 +1,85 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitDataFlowUseropLibrary;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressFactory;
import ghidra.program.model.pcode.Varnode;
/**
* A field request for a pre-constructed varnode
*
* <p>
* These are used to invoke userops using the Standard strategy.
*
* @param vn the varnode to pre-construct
* @see JitDataFlowUseropLibrary
*/
public record FieldForVarnode(Varnode vn) implements StaticFieldReq {
@Override
public String name() {
Address addr = vn.getAddress();
return "VARNODE_%s_%s_%s".formatted(addr.getAddressSpace().getName().toUpperCase(),
Long.toUnsignedString(addr.getOffset(), 16), vn.getSize());
}
/**
* {@inheritDoc}
*
* <p>
* Consider the varnode (ram:00400000,4). The code is equivalent to:
*
* <pre>
* private static final {@link Varnode} VARNODE_ram_400000_4 = {@link JitCompiledPassage#createVarnode(AddressFactory, String, long, int) createVarnode}(ADDRESS_FACTORY, "ram", 0x400000, 4);
* </pre>
*/
@Override
public void generateClinitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor sv) {
cv.visitField(ACC_PRIVATE | ACC_STATIC | ACC_FINAL, name(), TDESC_VARNODE, null, null);
sv.visitFieldInsn(GETSTATIC, gen.nameThis, "ADDRESS_FACTORY", TDESC_ADDRESS_FACTORY);
sv.visitLdcInsn(vn.getAddress().getAddressSpace().getName());
sv.visitLdcInsn(vn.getAddress().getOffset());
sv.visitLdcInsn(vn.getSize());
sv.visitMethodInsn(INVOKESTATIC, NAME_JIT_COMPILED_PASSAGE, "createVarnode",
MDESC_JIT_COMPILED_PASSAGE__CREATE_VARNODE, true);
sv.visitFieldInsn(PUTSTATIC, gen.nameThis, name(), TDESC_VARNODE);
}
/**
* {@inheritDoc}
*
* <p>
* To clarify, this <em>does not</em> load a varnode's current value onto the JVM stack. That is
* done by {@link VarGen}. This loads a ref to the {@link Varnode} instance. Also, it's not
* precisely the same instance as given, but a re-construction of it as a plain {@link Varnode},
* i.e., just the (space,offset,size) triple.
*
*/
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
rv.visitFieldInsn(GETSTATIC, gen.nameThis, name(), TDESC_VARNODE);
}
}

View file

@ -0,0 +1,41 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
/**
* A field request for a pre-fetched or pre-constructed element
*/
interface FieldReq {
/**
* Derive a suitable name for the field
*
* @return the name
*/
String name();
/**
* Emit code to load the field onto the JVM stack
*
* @param gen the code generator
* @param rv the visitor often for the {@link JitCompiledPassage#run(int) run} method, but could
* be the static initializer or constructor
*/
void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv);
}

View file

@ -0,0 +1,223 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.reflect.TypeLiteral;
import org.objectweb.asm.Type;
import ghidra.generic.util.datastruct.SemisparseByteArray;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPoint;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.ExitSlot;
import ghidra.pcode.error.LowlevelError;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
import ghidra.program.model.address.*;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.pcode.Varnode;
/**
* Various constants (namely class names, type descriptions, method descriptions, etc. used during
* bytecode generation.
*/
@SuppressWarnings("javadoc")
public interface GenConsts {
public static final int BLOCK_SIZE = SemisparseByteArray.BLOCK_SIZE;
public static final String TDESC_ADDRESS = Type.getDescriptor(Address.class);
public static final String TDESC_ADDRESS_FACTORY = Type.getDescriptor(AddressFactory.class);
public static final String TDESC_ADDRESS_SPACE = Type.getDescriptor(AddressSpace.class);
public static final String TDESC_BYTE_ARR = Type.getDescriptor(byte[].class);
public static final String TDESC_EXIT_SLOT = Type.getDescriptor(ExitSlot.class);
public static final String TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE =
Type.getDescriptor(JitBytesPcodeExecutorState.class);
public static final String TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE =
Type.getDescriptor(JitBytesPcodeExecutorStateSpace.class);
public static final String TDESC_JIT_PCODE_THREAD = Type.getDescriptor(JitPcodeThread.class);
public static final String TDESC_LANGUAGE = Type.getDescriptor(Language.class);
public static final String TDESC_LIST = Type.getDescriptor(List.class);
public static final String TDESC_PCODE_USEROP_DEFINITION =
Type.getDescriptor(PcodeUseropDefinition.class);
public static final String TDESC_REGISTER_VALUE = Type.getDescriptor(RegisterValue.class);
public static final String TDESC_STRING = Type.getDescriptor(String.class);
public static final String TDESC_VARNODE = Type.getDescriptor(Varnode.class);
public static final String TSIG_LIST_ADDRCTX =
JitJvmTypeUtils.typeToSignature(new TypeLiteral<List<AddrCtx>>() {}.value);
public static final String MDESC_ADDR_CTX__$INIT = Type.getMethodDescriptor(Type.VOID_TYPE,
Type.getType(RegisterValue.class), Type.getType(Address.class));
public static final String MDESC_ADDRESS_FACTORY__GET_ADDRESS_SPACE =
Type.getMethodDescriptor(Type.getType(AddressSpace.class), Type.INT_TYPE);
public static final String MDESC_ADDRESS_SPACE__GET_ADDRESS =
Type.getMethodDescriptor(Type.getType(Address.class), Type.LONG_TYPE);
public static final String MDESC_ARRAY_LIST__$INIT = Type.getMethodDescriptor(Type.VOID_TYPE);
// NOTE: The void (String) form is private....
public static final String MDESC_ASSERTION_ERROR__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(Object.class));
public static final String MDESC_DOUBLE__DOUBLE_TO_RAW_LONG_BITS =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.DOUBLE_TYPE);
public static final String MDESC_DOUBLE__IS_NAN =
Type.getMethodDescriptor(Type.BOOLEAN_TYPE, Type.DOUBLE_TYPE);
public static final String MDESC_DOUBLE__LONG_BITS_TO_DOUBLE =
Type.getMethodDescriptor(Type.DOUBLE_TYPE, Type.LONG_TYPE);
public static final String MDESC_FLOAT__FLOAT_TO_RAW_INT_BITS =
Type.getMethodDescriptor(Type.INT_TYPE, Type.FLOAT_TYPE);
public static final String MDESC_FLOAT__INT_BITS_TO_FLOAT =
Type.getMethodDescriptor(Type.FLOAT_TYPE, Type.INT_TYPE);
public static final String MDESC_FLOAT__IS_NAN =
Type.getMethodDescriptor(Type.BOOLEAN_TYPE, Type.FLOAT_TYPE);
public static final String MDESC_ILLEGAL_ARGUMENT_EXCEPTION__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(String.class));
public static final String MDESC_INTEGER__BIT_COUNT =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_INTEGER__COMPARE_UNSIGNED =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_INTEGER__NUMBER_OF_LEADING_ZEROS =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_INTEGER__TO_UNSIGNED_LONG =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE__GET_LANGUAGE =
Type.getMethodDescriptor(Type.getType(Language.class));
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE__GET_SPACE_FOR =
Type.getMethodDescriptor(Type.getType(JitBytesPcodeExecutorStateSpace.class),
Type.getType(AddressSpace.class));
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__GET_DIRECT =
Type.getMethodDescriptor(Type.getType(byte[].class), Type.LONG_TYPE);
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__READ =
Type.getMethodDescriptor(Type.getType(byte[].class), Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__WRITE =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.LONG_TYPE, Type.getType(byte[].class),
Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__CONV_OFFSET2_TO_LONG =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__COUNT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_CONTEXT =
Type.getMethodDescriptor(Type.getType(RegisterValue.class), Type.getType(Language.class),
Type.getType(String.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_DECODE_ERROR =
Type.getMethodDescriptor(Type.getType(DecodePcodeExecutionException.class),
Type.getType(String.class), Type.LONG_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_EXIT_SLOT =
Type.getMethodDescriptor(Type.getType(ExitSlot.class), Type.LONG_TYPE,
Type.getType(RegisterValue.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_VARNODE =
Type.getMethodDescriptor(Type.getType(Varnode.class), Type.getType(AddressFactory.class),
Type.getType(String.class), Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__GET_CHAINED =
Type.getMethodDescriptor(Type.getType(EntryPoint.class), Type.getType(ExitSlot.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__GET_LANGUAGE =
Type.getMethodDescriptor(Type.getType(Language.class), Type.getType(String.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__GET_USEROP_DEFINITION =
Type.getMethodDescriptor(Type.getType(PcodeUseropDefinition.class),
Type.getType(String.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__INVOKE_USEROP =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(PcodeUseropDefinition.class),
Type.getType(Varnode.class), Type.getType(Varnode[].class));
public static final String MDESC_JIT_COMPILED_PASSAGE__READ_INTX =
Type.getMethodDescriptor(Type.INT_TYPE, Type.getType(byte[].class), Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__READ_LONGX =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.getType(byte[].class), Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__RETIRE_COUNTER_AND_CONTEXT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.LONG_TYPE, Type.getType(RegisterValue.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__S_CARRY_INT_RAW =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__S_CARRY_LONG_RAW =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__WRITE_INTX =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.INT_TYPE, Type.getType(byte[].class),
Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__WRITE_LONGX =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.LONG_TYPE, Type.getType(byte[].class),
Type.INT_TYPE);
public static final String MDESC_JIT_PCODE_THREAD__GET_STATE =
Type.getMethodDescriptor(Type.getType(JitThreadBytesPcodeExecutorState.class));
public static final String MDESC_LANGUAGE__GET_ADDRESS_FACTORY =
Type.getMethodDescriptor(Type.getType(AddressFactory.class));
public static final String MDESC_LANGUAGE__GET_DEFAULT_SPACE =
Type.getMethodDescriptor(Type.getType(AddressSpace.class));
public static final String MDESC_LIST__ADD =
Type.getMethodDescriptor(Type.BOOLEAN_TYPE, Type.getType(Object.class));
public static final String MDESC_LONG__BIT_COUNT =
Type.getMethodDescriptor(Type.INT_TYPE, Type.LONG_TYPE);
public static final String MDESC_LONG__COMPARE_UNSIGNED =
Type.getMethodDescriptor(Type.INT_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_LONG__NUMBER_OF_LEADING_ZEROS =
Type.getMethodDescriptor(Type.INT_TYPE, Type.LONG_TYPE);
public static final String MDESC_LOW_LEVEL_ERROR__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(String.class));
public static final String MDESC_PCODE_USEROP_DEFINITION__GET_DEFINING_LIBRARY =
Type.getMethodDescriptor(Type.getType(PcodeUseropLibrary.class));
public static final String MDESC_SLEIGH_LINK_EXCEPTION__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(String.class));
public static final String MDESC_$DOUBLE_UNOP =
Type.getMethodDescriptor(Type.DOUBLE_TYPE, Type.DOUBLE_TYPE);
public static final String MDESC_$FLOAT_UNOP =
Type.getMethodDescriptor(Type.FLOAT_TYPE, Type.FLOAT_TYPE);
public static final String MDESC_$INT_BINOP =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_$LONG_BINOP =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_$SHIFT_JJ =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_$SHIFT_JI =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_$SHIFT_IJ =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.LONG_TYPE);
public static final String MDESC_$SHIFT_II =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String NAME_ADDR_CTX = Type.getInternalName(AddrCtx.class);
public static final String NAME_ADDRESS = Type.getInternalName(Address.class);
public static final String NAME_ADDRESS_FACTORY = Type.getInternalName(AddressFactory.class);
public static final String NAME_ADDRESS_SPACE = Type.getInternalName(AddressSpace.class);
public static final String NAME_ARRAY_LIST = Type.getInternalName(ArrayList.class);
public static final String NAME_ASSERTION_ERROR = Type.getInternalName(AssertionError.class);
public static final String NAME_DOUBLE = Type.getInternalName(Double.class);
public static final String NAME_EXIT_SLOT = Type.getInternalName(ExitSlot.class);
public static final String NAME_FLOAT = Type.getInternalName(Float.class);
public static final String NAME_ILLEGAL_ARGUMENT_EXCEPTION =
Type.getInternalName(IllegalArgumentException.class);
public static final String NAME_INTEGER = Type.getInternalName(Integer.class);
public static final String NAME_JIT_BYTES_PCODE_EXECUTOR_STATE =
Type.getInternalName(JitBytesPcodeExecutorState.class);
public static final String NAME_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE =
Type.getInternalName(JitBytesPcodeExecutorStateSpace.class);
public static final String NAME_JIT_COMPILED_PASSAGE =
Type.getInternalName(JitCompiledPassage.class);
public static final String NAME_JIT_PCODE_THREAD = Type.getInternalName(JitPcodeThread.class);
public static final String NAME_LANGUAGE = Type.getInternalName(Language.class);
public static final String NAME_LIST = Type.getInternalName(List.class);
public static final String NAME_LONG = Type.getInternalName(Long.class);
public static final String NAME_LOW_LEVEL_ERROR = Type.getInternalName(LowlevelError.class);
public static final String NAME_MATH = Type.getInternalName(Math.class);
public static final String NAME_OBJECT = Type.getInternalName(Object.class);
public static final String NAME_PCODE_USEROP_DEFINITION =
Type.getInternalName(PcodeUseropDefinition.class);
public static final String NAME_SLEIGH_LINK_EXCEPTION =
Type.getInternalName(SleighLinkException.class);
public static final String NAME_THROWABLE = Type.getInternalName(Throwable.class);
public static final String NAME_VARNODE = Type.getInternalName(Varnode.class);
}

View file

@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
/**
* An instance field request initialized in the class constructor
*/
interface InstanceFieldReq extends FieldReq {
/**
* Emit the field declaration and its initialization bytecode
*
* <p>
* The declaration is emitted into the class definition, and the initialization code is emitted
* into the class constructor.
*
* @param gen the code generator
* @param cv the visitor for the class definition
* @param iv the visitor for the class constructor
*/
void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv);
}

View file

@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
/**
* A static field request initialized in the class initializer
*/
interface StaticFieldReq extends FieldReq {
/**
* Emit the field declaration and its initialization bytecode
*
* <p>
* The declaration is emitted into the class definition, and the initialization code is
* emitted into the class initializer.
*
* @param gen the code generator
* @param cv the visitor for the class definition
* @param sv the visitor for the class (static) initializer
*/
void generateClinitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor sv);
}

View file

@ -0,0 +1,90 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitBinOp;
/**
* An extension that provides conveniences and common implementations for binary p-code operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface BinOpGen<T extends JitBinOp> extends OpGen<T> {
/**
* Emit code between reading the left and right operands
*
* <p>
* This is invoked immediately after emitting code to push the left operand onto the stack,
* giving the implementation an opportunity to perform any manipulations of that operand
* necessary to set up the operation, before code to push the right operand is emitted.
*
* @param gen the code generator
* @param op the operator
* @param lType the actual type of the left operand
* @param rType the actual type of the right operand
* @param rv the method visitor
* @return the new actual type of the left operand
*/
default JitType afterLeft(JitCodeGenerator gen, T op, JitType lType, JitType rType,
MethodVisitor rv) {
return lType;
}
/**
* Emit code for the binary operator
*
* <p>
* At this point both operands are on the stack. After this returns, code to write the result
* from the stack into the destination operand will be emitted.
*
* @param gen the code generator
* @param op the operator
* @param block the block containing the operator
* @param lType the actual type of the left operand
* @param rType the actual type of the right operand
* @param rv the method visitor
* @return the actual type of the result
*/
JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv);
/**
* {@inheritDoc}
*
* <p>
* This default implementation emits code to load the left operand, invokes the
* {@link #afterLeft(JitCodeGenerator, JitBinOp, JitType, JitType, MethodVisitor) after-left}
* hook point, emits code to load the right operand, invokes
* {@link #generateBinOpRunCode(JitCodeGenerator, JitBinOp, JitBlock, JitType, JitType, MethodVisitor)
* generate-binop}, and finally emits code to write the destination operand.
*/
@Override
default void generateRunCode(JitCodeGenerator gen, T op, JitBlock block, MethodVisitor rv) {
JitType lType = gen.generateValReadCode(op.l(), op.lType());
JitType rType = op.rType().resolve(gen.getTypeModel().typeOf(op.r()));
lType = afterLeft(gen, op, lType, rType, rv);
JitType checkRType = gen.generateValReadCode(op.r(), op.rType());
assert checkRType == rType;
JitType outType = generateBinOpRunCode(gen, op, block, lType, rType, rv);
gen.generateVarWriteCode(op.out(), outType);
}
}

View file

@ -0,0 +1,125 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.ILOAD;
import static org.objectweb.asm.Opcodes.ISTORE;
import org.objectweb.asm.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitBinOp;
/**
* An extension for bitwise binary operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface BitwiseBinOpGen<T extends JitBinOp> extends BinOpGen<T> {
/**
* The JVM opcode to implement this operator with int operands on the stack.
*
* @return the opcode
*/
int intOpcode();
/**
* The JVM opcode to implement this operator with long operands on the stack.
*
* @return the opcode
*/
int longOpcode();
/**
* <b>WIP</b>: The implementation for multi-precision ints.
*
* @param gen the code generator
* @param type the type of each operand, including the reuslt
* @param mv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
default void generateMpIntBinOp(JitCodeGenerator gen, MpIntJitType type,
MethodVisitor mv) {
/**
* We need temp locals to get things in order. Read in right operand, do the op as we pop
* each left op. Then push it all back.
*
* No masking of the result is required, since both operands should already be masked, and
* the bitwise op cannot generate bits of more significance.
*/
// [lleg1,...,llegN,rleg1,rlegN] (N is least-significant leg)
int legCount = type.legsAlloc();
int firstIndex = gen.getAllocationModel().nextFreeLocal();
Label start = new Label();
Label end = new Label();
mv.visitLabel(start);
for (int i = 0; i < legCount; i++) {
mv.visitLocalVariable("result" + i, Type.getDescriptor(int.class), null, start, end,
firstIndex + i);
mv.visitVarInsn(ISTORE, firstIndex + i);
// NOTE: More significant legs have higher indices (reverse of stack)
}
for (int i = 0; i < legCount; i++) {
// [lleg1,...,llegN:INT]
mv.visitVarInsn(ILOAD, firstIndex + i);
// [lleg1,...,llegN:INT,rlegN:INT]
mv.visitInsn(intOpcode());
// [lleg1,...,olegN:INT]
mv.visitVarInsn(ISTORE, firstIndex + i);
// [lleg1,...]
}
// Push it all back, in reverse order
for (int i = 0; i < legCount; i++) {
mv.visitVarInsn(ILOAD, firstIndex + legCount - i - 1);
}
mv.visitLabel(end);
}
@Override
default JitType afterLeft(JitCodeGenerator gen, T op, JitType lType, JitType rType,
MethodVisitor rv) {
return TypeConversions.forceUniformZExt(lType, rType, rv);
}
/**
* {@inheritDoc}
*
* <p>
* This implementation reduces the need to just the JVM opcode. We simply ensure both operands
* have the same size and JVM type, select and emit the correct opcode, and return the type of
* the result.
*/
@Override
default JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv) {
rType = TypeConversions.forceUniformZExt(rType, lType, rv);
switch (rType) {
case IntJitType t -> rv.visitInsn(intOpcode());
case LongJitType t -> rv.visitInsn(longOpcode());
case MpIntJitType t when t.size() == lType.size() -> generateMpIntBinOp(gen, t, rv);
case MpIntJitType t -> TODO("MpInt of differing sizes");
default -> throw new AssertionError();
}
return lType;
}
}

View file

@ -0,0 +1,44 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IAND;
import static org.objectweb.asm.Opcodes.LAND;
import ghidra.pcode.emu.jit.op.JitBoolAndOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolAnd;
/**
* The generator for a {@link JitBoolAndOp bool_and}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolAnd}. Thus, this is identical to {@link IntAndOpGen}.
*/
public enum BoolAndOpGen implements BitwiseBinOpGen<JitBoolAndOp> {
/** The generator singleton */
GEN;
@Override
public int intOpcode() {
return IAND;
}
@Override
public int longOpcode() {
return LAND;
}
}

View file

@ -0,0 +1,59 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IXOR;
import static org.objectweb.asm.Opcodes.LXOR;
import org.objectweb.asm.MethodVisitor;
import ghidra.lifecycle.Unfinished;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitBoolNegateOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolNegate;
/**
* The generator for a {@link JitBoolNegateOp bool_negate}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolNegate}.
*/
public enum BoolNegateOpGen implements UnOpGen<JitBoolNegateOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitBoolNegateOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case IntJitType t -> {
rv.visitLdcInsn(1);
rv.visitInsn(IXOR);
}
case LongJitType t -> {
rv.visitLdcInsn(1L);
rv.visitInsn(LXOR);
}
case MpIntJitType t -> Unfinished.TODO("MpInt");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,44 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IOR;
import static org.objectweb.asm.Opcodes.LOR;
import ghidra.pcode.emu.jit.op.JitBoolOrOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolOr;
/**
* The generator for a {@link JitBoolOrOp bool_or}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolOr}. Thus, this is identical to {@link IntOrOpGen}.
*/
public enum BoolOrOpGen implements BitwiseBinOpGen<JitBoolOrOp> {
/** The generator singleton */
GEN;
@Override
public int intOpcode() {
return IOR;
}
@Override
public int longOpcode() {
return LOR;
}
}

View file

@ -0,0 +1,44 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IXOR;
import static org.objectweb.asm.Opcodes.LXOR;
import ghidra.pcode.emu.jit.op.JitBoolXorOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolXor;
/**
* The generator for a {@link JitBoolXorOp bool_xor}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolXor}. Thus, this is identical to {@link IntXorOpGen}.
*/
public enum BoolXorOpGen implements BitwiseBinOpGen<JitBoolXorOp> {
/** The generator singleton */
GEN;
@Override
public int intOpcode() {
return IXOR;
}
@Override
public int longOpcode() {
return LXOR;
}
}

View file

@ -0,0 +1,57 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.ACONST_NULL;
import static org.objectweb.asm.Opcodes.ARETURN;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPcodeThread;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.LongJitType;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitBranchIndOp;
/**
* The generator for a {@link JitBranchIndOp branchind}.
*
* <p>
* This emits code to load the target from the operand and then retire it to the program counter,
* along with the current flow context and live variables. It then emits code to return null so that
* the {@link JitPcodeThread thread} knows to loop to the <b>Fetch</b> step for the new counter.
*/
public enum BranchIndOpGen implements OpGen<JitBranchIndOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitBranchIndOp op, JitBlock block,
MethodVisitor rv) {
gen.generatePassageExit(block, () -> {
// [...]
JitType targetType = gen.generateValReadCode(op.target(), op.targetType());
// [...,target:?]
TypeConversions.generateToLong(targetType, LongJitType.I8, rv);
// [...,target:LONG]
}, op.branch().flowCtx(), rv);
rv.visitInsn(ACONST_NULL);
rv.visitInsn(ARETURN);
}
}

View file

@ -0,0 +1,97 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_JIT_COMPILED_PASSAGE__GET_CHAINED;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_JIT_COMPILED_PASSAGE;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.JitPcodeThread;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.op.JitBranchOp;
/**
* The generator for a {@link JitBranchOp branch}.
*
* <p>
* With an {@link IntBranch} record, this simply looks up the label for the target block and emits a
* block transition followed by a {@link #GOTO goto}.
*
* <p>
* With an {@link ExtBranch} record, this emits code to retire the target to the program counter,
* along with the target context and live variables. It then emits code to request the chained entry
* point from the target's exit slot and return it. The {@link JitPcodeThread thread} can then
* immediately execute the chained passage entry.
*/
public enum BranchOpGen implements OpGen<JitBranchOp> {
/** The generator singleton */
GEN;
/**
* Emit code that exits via a direct branch
*
* <p>
* This emits the {@link ExtBranch} record case.
*
* @param gen the code generator
* @param exit the target causing us to exit
* @param block the block containing the op
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
static void generateExtBranchCode(JitCodeGenerator gen, AddrCtx exit, JitBlock block,
MethodVisitor rv) {
FieldForExitSlot slotField = gen.requestFieldForExitSlot(exit);
gen.generatePassageExit(block, () -> {
// [...]
rv.visitLdcInsn(exit.address.getOffset());
// [...,target:LONG]
}, exit.rvCtx, rv);
// []
slotField.generateLoadCode(gen, rv);
// [slot]
rv.visitMethodInsn(INVOKESTATIC, NAME_JIT_COMPILED_PASSAGE, "getChained",
MDESC_JIT_COMPILED_PASSAGE__GET_CHAINED, true);
// [chained:ENTRY]
rv.visitInsn(ARETURN);
}
@Override
public void generateRunCode(JitCodeGenerator gen, JitBranchOp op, JitBlock block,
MethodVisitor rv) {
switch (op.branch()) {
case IntBranch ib -> {
JitBlock target = block.getTargetBlock(ib);
Label label = gen.labelForBlock(target);
VarGen.computeBlockTransition(gen, block, target).generate(rv);
rv.visitJumpInsn(GOTO, label);
}
case ExtBranch eb -> {
generateExtBranchCode(gen, eb.to(), block, rv);
}
default -> throw new AssertionError("Branch type confusion");
}
}
}

View file

@ -0,0 +1,85 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.ExtBranch;
import ghidra.pcode.emu.jit.JitPassage.IntBranch;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.gen.var.VarGen.BlockTransition;
import ghidra.pcode.emu.jit.op.JitCBranchOp;
/**
* The generator for a {@link JitCBranchOp cbranch}.
*
* <p>
* First, emits code to load the condition onto the JVM stack.
*
* <p>
* With an {@link IntBranch} record, this looks up the label for the target block and checks if a
* transition is necessary. If one is necessary, it emits an {@link #IFEQ ifeq} with the transition
* and {@link #GOTO goto} it guards. The {@code ifeq} skips to the fall-through case. If a
* transition is not necessary, it simply emits an {@link #IFNE ifne} to the target label.
*
* <p>
* With an {@link ExtBranch} record, this does the same as {@link BranchOpGen} but guarded by an
* {@link #IFEQ ifeq} that skips to the fall-through case.
*/
public enum CBranchOpGen implements OpGen<JitCBranchOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitCBranchOp op, JitBlock block,
MethodVisitor rv) {
JitType cType = gen.generateValReadCode(op.cond(), op.condType());
TypeConversions.generateIntToBool(cType, rv);
switch (op.branch()) {
case IntBranch ib -> {
JitBlock target = block.getTargetBlock(ib);
Label label = gen.labelForBlock(target);
BlockTransition transition = VarGen.computeBlockTransition(gen, block, target);
if (transition.needed()) {
Label fall = new Label();
rv.visitJumpInsn(IFEQ, fall);
transition.generate(rv);
rv.visitJumpInsn(GOTO, label);
rv.visitLabel(fall);
}
else {
rv.visitJumpInsn(IFNE, label);
}
}
case ExtBranch eb -> {
Label fall = new Label();
rv.visitJumpInsn(IFEQ, fall);
BranchOpGen.generateExtBranchCode(gen, eb.to(), block, rv);
rv.visitLabel(fall);
}
default -> throw new AssertionError("Branch type confusion");
}
}
}

View file

@ -0,0 +1,60 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_SLEIGH_LINK_EXCEPTION__$INIT;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_SLEIGH_LINK_EXCEPTION;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitCallOtherMissingOp;
import ghidra.pcode.exec.SleighLinkException;
/**
* The generator for a {@link JitCallOtherMissingOp callother-missing}.
*
* <p>
* This emits code to retire the program counter, context, and live variables, then throw a
* {@link SleighLinkException}.
*/
public enum CallOtherMissingOpGen implements OpGen<JitCallOtherMissingOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitCallOtherMissingOp op, JitBlock block,
MethodVisitor rv) {
gen.generatePassageExit(block, () -> {
rv.visitLdcInsn(gen.getAddressForOp(op.op()).getOffset());
}, gen.getExitContext(op.op()), rv);
String message = gen.getErrorMessage(op.op());
// [...]
rv.visitTypeInsn(NEW, NAME_SLEIGH_LINK_EXCEPTION);
// [...,error:NEW]
rv.visitInsn(DUP);
// [...,error:NEW,error:NEW]
rv.visitLdcInsn(message);
// [...,error:NEW,error:NEW,message]
rv.visitMethodInsn(INVOKESPECIAL, NAME_SLEIGH_LINK_EXCEPTION, "<init>",
MDESC_SLEIGH_LINK_EXCEPTION__$INIT, false);
// [...,error]
rv.visitInsn(ATHROW);
}
}

View file

@ -0,0 +1,249 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import java.lang.reflect.Method;
import java.lang.reflect.Parameter;
import org.objectweb.asm.*;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.JitPassage.DecodedPcodeOp;
import ghidra.pcode.emu.jit.analysis.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.gen.var.VarGen.BlockTransition;
import ghidra.pcode.emu.jit.op.JitCallOtherDefOp;
import ghidra.pcode.emu.jit.op.JitCallOtherOpIf;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The generator for a {@link JitCallOtherOpIf callother}.
*
* <p>
* The checks if Direct invocation is possible. If so, it emits code using
* {@link #generateRunCodeUsingDirectStrategy(JitCodeGenerator, JitCallOtherOpIf, JitBlock, MethodVisitor)}.
* If not, it emits code using
* {@link #generateRunCodeUsingRetirementStrategy(JitCodeGenerator, PcodeOp, JitBlock, PcodeUseropDefinition, MethodVisitor)}.
* Direct invocation is possible when the userop is {@link PcodeUseropDefinition#isFunctional()
* functional} and all of its parameters and return type have a supported primitive type.
* ({@code char} is not supported.) Regarding the invocation strategies, see
* {@link JitDataFlowUseropLibrary} and note that the Inline strategy is already handled by this
* point.
*
* <p>
* For the Standard strategy, we emit code to retire the program counter, decode context, and all
* live variables. We then request a field to hold the userop and emit code to load it. We then emit
* code to prepare its arguments and place them on the stack, namely the output varnode and an array
* for the input varnodes. We request a field for each varnode and emit code to load them as needed.
* For the array, we emit code to construct and fill it. We then emit code to invoke
* {@link JitCompiledPassage#invokeUserop(PcodeUseropDefinition, Varnode, Varnode[])}. The userop
* definition handles retrieving all of its inputs and writing the output, directly to the
* {@link JitBytesPcodeExecutorState state}. Thus, we now need only to emit code to re-birth all the
* live variables. If any errors occur, execution is interrupted as usual, and our state is
* consistent.
*
* <p>
* For the Direct strategy, we wish to avoid retirement and re-birth, so we request an
* {@link ExceptionHandler}. We request a field for the userop, just as in the Standard strategy,
* but we emit code to invoke {@link PcodeUseropDefinition#getDefiningLibrary()} instead. We can use
* {@link PcodeUseropDefinition#getJavaMethod()} <em>at generation time</em> to reflect its Java
* definition. We then emit code to cast the library and load each of the operands onto the JVM
* stack. We then emit the invocation of the Java method, guarded by the exception handler. We then
* have to consider whether the userop has an output operand and whether its definition returns a
* value. If both are true, we emit code to write the result. If neither is true, we're done. If a
* result is returned, but no output operand is provided, we <em>must</em> still emit a {@link #POP
* pop}.
*/
public enum CallOtherOpGen implements OpGen<JitCallOtherOpIf> {
/** The generator singleton */
GEN;
/**
* Emit code to implement the Standard strategy (see the class documentation)
*
* @param gen the code generator
* @param op the p-code op
* @param block the block containing the op
* @param userop the userop definition, wrapped by the {@link JitDataFlowUseropLibrary}
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public static void generateRunCodeUsingRetirementStrategy(JitCodeGenerator gen, PcodeOp op,
JitBlock block, PcodeUseropDefinition<?> userop, MethodVisitor rv) {
/**
* This is about the simplest (laziest) approach we could take for the moment, but it should
* suffice, depending on the frequency of CALLOTHER executions. We immediately retire all
* variables, then invoke the userop as it would be by the p-code interpreter. It can access
* its variables in the usual fashion. Although not ideal, it can also feed the executor
* (interpreter) ops to execute --- they won't be jitted here. Then, we liven the variables
* back.
*
* NOTE: The output variable should be "alive", so we need not store it into a local. It'll
* be made alive in the return block transition.
*
* TODO: Implement direct invocation for functional userops. NOTE: Cannot avoid block
* retirement and re-birth unless I also do direct invocation. Otherwise, the parameters are
* read from the state instead of from the local variables.
*/
BlockTransition transition = VarGen.computeBlockTransition(gen, block, null);
transition.generate(rv);
gen.generateRetirePcCtx(() -> {
rv.visitLdcInsn(gen.getAddressForOp(op).getOffset());
}, gen.getExitContext(op), rv);
// []
rv.visitVarInsn(ALOAD, 0);
// [this]
gen.requestFieldForUserop(userop).generateLoadCode(gen, rv);
// [this,userop]
if (op.getOutput() == null) {
rv.visitInsn(ACONST_NULL);
}
else {
gen.requestStaticFieldForVarnode(op.getOutput()).generateLoadCode(gen, rv);
}
// [this,userop,outVn]
rv.visitLdcInsn(op.getNumInputs() - 1);
rv.visitTypeInsn(ANEWARRAY, NAME_VARNODE);
// [this,userop,outVn,inVns:ARR]
for (int i = 1; i < op.getNumInputs(); i++) {
// [this,userop,outVn,inVns:ARR]
rv.visitInsn(DUP);
// [this,userop,outVn,inVns:ARR,inVns:ARR]
rv.visitLdcInsn(i - 1);
// [this,userop,outVn,inVns:ARR,inVns:ARR,index]
// Yes, including constants :/
Varnode input = op.getInput(i);
gen.requestStaticFieldForVarnode(input).generateLoadCode(gen, rv);
// [this,userop,outVn,inVns:ARR,inVns:ARR,index,inVn]
rv.visitInsn(AASTORE);
// [this,userop,outVn,inVns:ARR]
}
// [this,userop,outVn,inVns:ARR]
rv.visitMethodInsn(INVOKEINTERFACE, NAME_JIT_COMPILED_PASSAGE, "invokeUserop",
MDESC_JIT_COMPILED_PASSAGE__INVOKE_USEROP, true);
transition.generateInv(rv);
}
/**
* Emit code to implement the Direct strategy (see the class documentation)
*
* @param gen the code generator
* @param op the p-code op use-def node
* @param block the block containing the op
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public static void generateRunCodeUsingDirectStrategy(JitCodeGenerator gen,
JitCallOtherOpIf op, JitBlock block, MethodVisitor rv) {
FieldForUserop useropField = gen.requestFieldForUserop(op.userop());
// Set<Varnode> live = gen.vsm.getLiveVars(block);
/**
* NOTE: It doesn't matter if there are live variables. We still have to "retire" the
* program counter and contextreg if the userop throws an exception.
*/
final Label tryStart = new Label();
final Label tryEnd = new Label();
rv.visitTryCatchBlock(tryStart, tryEnd,
gen.requestExceptionHandler((DecodedPcodeOp) op.op(), block).label(), NAME_THROWABLE);
// []
useropField.generateLoadCode(gen, rv);
// [userop]
rv.visitMethodInsn(INVOKEINTERFACE, NAME_PCODE_USEROP_DEFINITION, "getDefiningLibrary",
MDESC_PCODE_USEROP_DEFINITION__GET_DEFINING_LIBRARY, true);
// [library:PcodeUseropLibrary]
Method method = op.userop().getJavaMethod();
String owningLibName = Type.getInternalName(method.getDeclaringClass());
rv.visitTypeInsn(CHECKCAST, owningLibName);
// [library:OWNING_TYPE]
Parameter[] parameters = method.getParameters();
for (int i = 0; i < op.args().size(); i++) {
JitVal arg = op.args().get(i);
Parameter p = parameters[i];
JitType type = gen.generateValReadCode(arg, JitTypeBehavior.ANY);
if (p.getType() == boolean.class) {
TypeConversions.generateIntToBool(type, rv);
}
else {
TypeConversions.generate(gen, type, JitType.forJavaType(p.getType()), rv);
}
}
// [library,params...]
rv.visitLabel(tryStart);
rv.visitMethodInsn(INVOKEVIRTUAL, owningLibName, method.getName(),
Type.getMethodDescriptor(method), false);
// [return?]
rv.visitLabel(tryEnd);
if (op instanceof JitCallOtherDefOp defOp) {
gen.generateVarWriteCode(defOp.out(), JitType.forJavaType(method.getReturnType()));
}
else if (method.getReturnType() != void.class) {
TypeConversions.generatePop(JitType.forJavaType(method.getReturnType()), rv);
}
}
/**
* Check if the Direct invocation strategy is applicable (see class documentation)
*
* @param op the p-code op use-def node
* @return true if applicable
*/
public static boolean canDoDirectInvocation(JitCallOtherOpIf op) {
if (!op.userop().isFunctional()) {
return false;
}
for (JitTypeBehavior type : op.inputTypes()) {
if (type == JitTypeBehavior.ANY) {
return false;
}
}
if (op instanceof JitCallOtherDefOp defOp) {
if (defOp.type() == JitTypeBehavior.ANY) {
return false;
}
}
return true;
}
@Override
public void generateRunCode(JitCodeGenerator gen, JitCallOtherOpIf op, JitBlock block,
MethodVisitor rv) {
if (canDoDirectInvocation(op)) {
generateRunCodeUsingDirectStrategy(gen, op, block, rv);
}
else {
generateRunCodeUsingRetirementStrategy(gen, op.op(), block, op.userop(), rv);
}
}
}

View file

@ -0,0 +1,43 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitVarScopeModel;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitCatenateOp;
/**
* The generator for a {@link JitCatenateOp catenate}.
*
* <p>
* We emit nothing. This generator ought never to be invoked, anyway, but things may change. The
* argument here is similar to that of {@link PhiOpGen}.
*
* @see JitVarScopeModel
*/
public enum CatenateOpGen implements OpGen<JitCatenateOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitCatenateOp op, JitBlock block,
MethodVisitor rv) {
throw new AssertionError("Cannnot generate synthetic op");
}
}

View file

@ -0,0 +1,104 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.GOTO;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.lifecycle.Unfinished;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitFloatTestOp;
/**
* An extension for float comparison operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface CompareFloatOpGen<T extends JitFloatTestOp> extends BinOpGen<T> {
/**
* The JVM opcode to perform the comparison with float operands on the stack.
*
* @return the opcode
*/
int fcmpOpcode();
/**
* The JVM opcode to perform the comparison with double operands on the stack.
*
* @return the opcode
*/
int dcmpOpcode();
/**
* The JVM opcode to perform the conditional jump.
*
* <p>
* The condition should correspond to the true case of the p-code operator.
*
* @return the opcode
*/
int condOpcode();
/**
* {@inheritDoc}
*
* <p>
* This implementation reduces the need to just a few opcodes: 1) the opcode for comparing in
* case of JVM {@code float}, 2) the opcode for comparing in the case of JVM {@code double}, and
* 3) the conditional jump on the result of that comparison. First, the comparison opcode is
* emitted. It should result in and int &lt;0, ==0, or &gt;0 on the stack, depending on whether
* L&lt;R, L==R, or L&gt;R, respectively. Then the conditional jump is emitted. We place labels
* in an if-else pattern to place either a 1 (true) or 0 (false) value of the appropriate p-code
* type on the stack.
*
* @implNote This template is consistently generated by the Java compiler (Adoptium OpenJDK 21),
* despite there being possible branchless implementations. That could indicate one of
* a few things: 1) the HotSpot JIT knows how to optimize this pattern, perhaps using
* branchless native instructions, 2) branchless optimizations don't yield the speedup
* here we might expect, or 3) they didn't care to optimize. <b>TODO</b>: Investigate
* in case it's thing 3. We might like to see if branchless JVM bytecodes can improve
* performance.
*/
@Override
default JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv) {
assert rType == lType;
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
Label lblTrue = new Label();
Label lblDone = new Label();
switch (rType) {
case FloatJitType t -> rv.visitInsn(fcmpOpcode());
case DoubleJitType t -> rv.visitInsn(dcmpOpcode());
case MpFloatJitType t -> Unfinished.TODO("MpFloat");
default -> throw new AssertionError();
}
rv.visitJumpInsn(condOpcode(), lblTrue);
TypeConversions.generateLdcFalse(outType, rv);
rv.visitJumpInsn(GOTO, lblDone);
rv.visitLabel(lblTrue);
TypeConversions.generateLdcTrue(outType, rv);
rv.visitLabel(lblDone);
return outType;
}
}

View file

@ -0,0 +1,143 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.lifecycle.Unfinished;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitIntTestOp;
/**
* An extension for integer comparison operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface CompareIntBinOpGen<T extends JitIntTestOp> extends BinOpGen<T> {
/**
* Whether the comparison of p-code integers is signed
*
* <p>
* If the comparison is unsigned, we will emit invocations of
* {@link Integer#compareUnsigned(int, int)} or {@link Long#compareUnsigned(long, long)},
* followed by a conditional jump corresponding to this p-code comparison op. If the comparison
* is signed, and the type fits in a JVM int, we emit the conditional jump of ints directly
* implementing this p-code comparison op. If the type requires a JVM long, we first emit an
* {@link #LCMP lcmp}, followed by the same opcode that would be used in the unsigned case.
*
* @return true if signed, false if not
*/
boolean isSigned();
/**
* The JVM opcode to perform the conditional jump for signed integers.
*
* @return the opcode
*/
int icmpOpcode();
/**
* Emits bytecode for the JVM int case
*
* @param lblTrue the target bytecode label for the true case
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
default void generateIntJump(Label lblTrue, MethodVisitor rv) {
if (isSigned()) {
rv.visitJumpInsn(icmpOpcode(), lblTrue);
}
else {
rv.visitMethodInsn(INVOKESTATIC, NAME_INTEGER, "compareUnsigned",
MDESC_INTEGER__COMPARE_UNSIGNED, false);
rv.visitJumpInsn(ifOpcode(), lblTrue);
}
}
/**
* Emits bytecode for the JVM long case
*
* @param lblTrue the target bytecode label for the true case
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
default void generateLongJump(Label lblTrue, MethodVisitor rv) {
if (isSigned()) {
rv.visitInsn(LCMP);
}
else {
rv.visitMethodInsn(INVOKESTATIC, NAME_LONG, "compareUnsigned",
MDESC_LONG__COMPARE_UNSIGNED, false);
}
rv.visitJumpInsn(ifOpcode(), lblTrue);
}
/**
* The JVM opcode to perform the conditional jump for unsigned or long integers.
*
* This is emitted <em>after</em> the application of {@link #LCMP} or the comparator method.
*
* @return the opcode
*/
int ifOpcode();
@Override
default JitType afterLeft(JitCodeGenerator gen, T op, JitType lType, JitType rType,
MethodVisitor rv) {
return TypeConversions.forceUniformZExt(lType, rType, rv);
}
/**
* {@inheritDoc}
*
* <p>
* This reduces the implementation to a flag for signedness, the opcode for the conditional jump
* on integer operands, and the opcode for a conditional jump after the comparison of longs. The
* JVM, does not provide conditional jumps on long operands, so we must first compare the longs,
* pushing an int onto the stack, and then conditionally jumping on that. This pattern is
* similar for unsigned comparison of integers.
*/
@Override
default JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv) {
Label lblTrue = new Label();
Label lblDone = new Label();
rType = TypeConversions.forceUniformZExt(rType, lType, rv);
switch (rType) {
case IntJitType t -> generateIntJump(lblTrue, rv);
case LongJitType t -> generateLongJump(lblTrue, rv);
case MpIntJitType t -> Unfinished.TODO("MpInt");
default -> throw new AssertionError();
}
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
TypeConversions.generateLdcFalse(outType, rv);
rv.visitJumpInsn(GOTO, lblDone);
rv.visitLabel(lblTrue);
TypeConversions.generateLdcTrue(outType, rv);
rv.visitLabel(lblDone);
return outType;
}
}

View file

@ -0,0 +1,42 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.op.JitCopyOp;
/**
* The generator for a {@link JitCopyOp copy}.
*
* <p>
* This uses the unary operator generator and emits nothing extra. The unary generator template will
* emit code to load the input operand, this emits nothing, and then the template emits code to
* write the output operand, effecting a simple copy.
*/
public enum CopyOpGen implements UnOpGen<JitCopyOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitCopyOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
return uType;
}
}

View file

@ -0,0 +1,54 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.INVOKESTATIC;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatAbsOp;
/**
* The generator for a {@link JitFloatAbsOp float_abs}.
*
* <p>
* This uses the unary operator generator and emits an invocation of {@link Math#abs(float)} or
* {@link Math#abs(double)}, depending on the type.
*/
public enum FloatAbsOpGen implements UnOpGen<JitFloatAbsOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatAbsOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case FloatJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "abs",
MDESC_$FLOAT_UNOP, false);
case DoubleJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "abs",
MDESC_$DOUBLE_UNOP, false);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,53 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.DADD;
import static org.objectweb.asm.Opcodes.FADD;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatAddOp;
/**
* The generator for a {@link JitFloatAddOp float_add}.
*
* <p>
* This uses the binary operator generator and simply emits {@link #FADD} or {@link #DADD} depending
* on the type.
*/
public enum FloatAddOpGen implements BinOpGen<JitFloatAddOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateBinOpRunCode(JitCodeGenerator gen, JitFloatAddOp op, JitBlock block,
JitType lType, JitType rType, MethodVisitor rv) {
assert rType == lType;
switch (lType) {
case FloatJitType t -> rv.visitInsn(FADD);
case DoubleJitType t -> rv.visitInsn(DADD);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return lType;
}
}

View file

@ -0,0 +1,59 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_$DOUBLE_UNOP;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_MATH;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatCeilOp;
/**
* The generator for a {@link JitFloatCeilOp float_ceil}.
*
* <p>
* This uses the unary operator generator and emits an invocation of {@link Math#ceil(double)},
* possibly surrounding it with conversions from and to float.
*/
public enum FloatCeilOpGen implements UnOpGen<JitFloatCeilOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatCeilOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case FloatJitType t -> {
// There apparently is no Math.ceil(float)???
rv.visitInsn(F2D);
rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "ceil", MDESC_$DOUBLE_UNOP, false);
rv.visitInsn(D2F);
}
case DoubleJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "ceil",
MDESC_$DOUBLE_UNOP, false);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,53 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.DDIV;
import static org.objectweb.asm.Opcodes.FDIV;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatDivOp;
/**
* The generator for a {@link JitFloatDivOp float_div}.
*
* <p>
* This uses the binary operator generator and simply emits {@link #FDIV} or {@link #DDIV} depending
* on the type.
*/
public enum FloatDivOpGen implements BinOpGen<JitFloatDivOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateBinOpRunCode(JitCodeGenerator gen, JitFloatDivOp op, JitBlock block,
JitType lType, JitType rType, MethodVisitor rv) {
assert rType == lType;
switch (lType) {
case FloatJitType t -> rv.visitInsn(FDIV);
case DoubleJitType t -> rv.visitInsn(DDIV);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return lType;
}
}

View file

@ -0,0 +1,47 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.*;
import ghidra.pcode.emu.jit.op.JitFloatEqualOp;
/**
* The generator for a {@link JitFloatEqualOp float_equal}.
*
* <p>
* This uses the float comparison operator generator and simply emits {@link #FCMPL} or
* {@link #DCMPL} depending on the type and then {@link #IFEQ}.
*/
public enum FloatEqualOpGen implements CompareFloatOpGen<JitFloatEqualOp> {
/** The generator singleton */
GEN;
@Override
public int fcmpOpcode() {
return FCMPL;
}
@Override
public int dcmpOpcode() {
return DCMPL;
}
@Override
public int condOpcode() {
return IFEQ;
}
}

View file

@ -0,0 +1,66 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.D2F;
import static org.objectweb.asm.Opcodes.F2D;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatFloat2FloatOp;
/**
* The generator for a {@link JitFloatFloat2FloatOp float_float2float}.
*
* <p>
* This uses the unary operator generator and emits {@link #F2D} or {@link #D2F}.
*/
public enum FloatFloat2FloatOpGen implements UnOpGen<JitFloatFloat2FloatOp> {
/** The generator singleton */
GEN;
private JitType gen(MethodVisitor rv, int opcode, JitType type) {
rv.visitInsn(opcode);
return type;
}
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatFloat2FloatOp op,
JitBlock block, JitType uType, MethodVisitor rv) {
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
return switch (uType) {
case FloatJitType ut -> switch (outType) {
case FloatJitType ot -> ot;
case DoubleJitType ot -> gen(rv, F2D, ot);
case MpFloatJitType ot -> TODO("MpFloat");
default -> throw new AssertionError();
};
case DoubleJitType ut -> switch (outType) {
case FloatJitType ot -> gen(rv, D2F, ot);
case DoubleJitType ot -> ot;
case MpFloatJitType ot -> TODO("MpFloat");
default -> throw new AssertionError();
};
case MpFloatJitType ot -> TODO("MpFloat");
default -> throw new AssertionError();
};
}
}

View file

@ -0,0 +1,59 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_$DOUBLE_UNOP;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_MATH;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatFloorOp;
/**
* The generator for a {@link JitFloatFloorOp float_floor}.
*
* <p>
* This uses the unary operator generator and emits an invocation of {@link Math#floor(double)},
* possibly surrounding it with conversions from and to float.
*/
public enum FloatFloorOpGen implements UnOpGen<JitFloatFloorOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatFloorOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case FloatJitType t -> {
// There apparently is no Math.floor(float)???
rv.visitInsn(F2D);
rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "floor", MDESC_$DOUBLE_UNOP, false);
rv.visitInsn(D2F);
}
case DoubleJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "floor",
MDESC_$DOUBLE_UNOP, false);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,66 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatInt2FloatOp;
/**
* The generator for a {@link JitFloatInt2FloatOp float_int2float}.
*
* <p>
* This uses the unary operator generator and emits {@link #I2F}, {@link #I2D}, {@link #L2F}, or
* {@link #L2D}.
*/
public enum FloatInt2FloatOpGen implements UnOpGen<JitFloatInt2FloatOp> {
/** The generator singleton */
GEN;
private JitType gen(MethodVisitor rv, int opcode, JitType type) {
rv.visitInsn(opcode);
return type;
}
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatInt2FloatOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
return switch (uType) {
case IntJitType ut -> switch (outType) {
case FloatJitType ot -> gen(rv, I2F, ot);
case DoubleJitType ot -> gen(rv, I2D, ot);
case MpFloatJitType ot -> TODO("MpInt/Float");
default -> throw new AssertionError();
};
case LongJitType ut -> switch (outType) {
case FloatJitType ot -> gen(rv, L2F, ot);
case DoubleJitType ot -> gen(rv, L2D, ot);
case MpFloatJitType ot -> TODO("MpInt/Float");
default -> throw new AssertionError();
};
case MpIntJitType ut -> TODO("MpInt/Float");
default -> throw new AssertionError();
};
}
}

View file

@ -0,0 +1,47 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.*;
import ghidra.pcode.emu.jit.op.JitFloatLessEqualOp;
/**
* The generator for a {@link JitFloatLessEqualOp float_lessequal}.
*
* <p>
* This uses the float comparison operator generator and simply emits {@link #FCMPG} or
* {@link #DCMPG} depending on the type and then {@link #IFLE}.
*/
public enum FloatLessEqualOpGen implements CompareFloatOpGen<JitFloatLessEqualOp> {
/** The generator singleton */
GEN;
@Override
public int fcmpOpcode() {
return FCMPG;
}
@Override
public int dcmpOpcode() {
return DCMPG;
}
@Override
public int condOpcode() {
return IFLE;
}
}

Some files were not shown because too many files have changed in this diff Show more