GP-4643: Add a JIT-accelerated p-code emulator (API/scripting only)

This commit is contained in:
Dan 2025-01-03 10:27:38 -05:00
parent 20285e267d
commit a8fae1fe5b
320 changed files with 32638 additions and 630 deletions

View file

@ -34,7 +34,7 @@ import ghidra.program.util.ProgramLocation;
import ghidra.trace.model.*; import ghidra.trace.model.*;
import ghidra.trace.model.modules.*; import ghidra.trace.model.modules.*;
import ghidra.trace.model.program.TraceProgramView; import ghidra.trace.model.program.TraceProgramView;
import ghidra.util.ComparatorMath; import ghidra.util.MathUtilities;
import ghidra.util.Msg; import ghidra.util.Msg;
public enum DebuggerStaticMappingUtils { public enum DebuggerStaticMappingUtils {
@ -163,8 +163,8 @@ public enum DebuggerStaticMappingUtils {
private Address max = null; private Address max = null;
public void consider(Address min, Address max) { public void consider(Address min, Address max) {
this.min = this.min == null ? min : ComparatorMath.cmin(this.min, min); this.min = this.min == null ? min : MathUtilities.cmin(this.min, min);
this.max = this.max == null ? max : ComparatorMath.cmax(this.max, max); this.max = this.max == null ? max : MathUtilities.cmax(this.max, max);
} }
public void consider(AddressRange range) { public void consider(AddressRange range) {

View file

@ -19,6 +19,7 @@ import ghidra.app.plugin.core.debug.stack.Sym.ConstSym;
import ghidra.pcode.exec.ConcretionError; import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic; import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.pcode.utils.Utils; import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.*; import ghidra.program.model.lang.*;
import ghidra.program.model.pcode.PcodeOp; import ghidra.program.model.pcode.PcodeOp;
@ -70,14 +71,14 @@ class SymPcodeArithmetic implements PcodeArithmetic<Sym> {
} }
@Override @Override
public Sym modBeforeStore(int sizeout, int sizeinAddress, Sym inAddress, public Sym modBeforeStore(int sizeinOffset, AddressSpace space, Sym inOffset, int sizeinValue,
int sizeinValue, Sym inValue) { Sym inValue) {
return inValue; return inValue;
} }
@Override @Override
public Sym modAfterLoad(int sizeout, int sizeinAddress, Sym inAddress, public Sym modAfterLoad(int sizeinOffset, AddressSpace space, Sym inOffset, int sizeinValue,
int sizeinValue, Sym inValue) { Sym inValue) {
return inValue; return inValue;
} }

View file

@ -505,33 +505,33 @@ public enum DebuggerPcodeUtils {
} }
@Override @Override
public WatchValue modBeforeStore(int sizeout, int sizeinAddress, WatchValue inAddress, public WatchValue modBeforeStore(int sizeinOffset, AddressSpace space, WatchValue inOffset,
int sizeinValue, WatchValue inValue) { int sizeinValue, WatchValue inValue) {
return new WatchValue( return new WatchValue(
new PrettyBytes(inValue.bytes.bigEndian, new PrettyBytes(inValue.bytes.bigEndian,
bytes.modBeforeStore(sizeout, sizeinAddress, inAddress.bytes.bytes, bytes.modBeforeStore(sizeinOffset, space, inOffset.bytes.bytes, sizeinValue,
sizeinValue, inValue.bytes.bytes)), inValue.bytes.bytes)),
STATE.modBeforeStore(sizeout, sizeinAddress, inAddress.state, STATE.modBeforeStore(sizeinOffset, space, inOffset.state, sizeinValue,
sizeinValue, inValue.state), inValue.state),
location.modBeforeStore(sizeout, sizeinAddress, inAddress.location, location.modBeforeStore(sizeinOffset, space, inOffset.location, sizeinValue,
sizeinValue, inValue.location), inValue.location),
READS.modBeforeStore(sizeout, sizeinAddress, inAddress.reads, READS.modBeforeStore(sizeinOffset, space, inOffset.reads, sizeinValue,
sizeinValue, inValue.reads)); inValue.reads));
} }
@Override @Override
public WatchValue modAfterLoad(int sizeout, int sizeinAddress, WatchValue inAddress, public WatchValue modAfterLoad(int sizeinOffset, AddressSpace space, WatchValue inOffset,
int sizeinValue, WatchValue inValue) { int sizeinValue, WatchValue inValue) {
return new WatchValue( return new WatchValue(
new PrettyBytes(getEndian().isBigEndian(), new PrettyBytes(getEndian().isBigEndian(),
bytes.modAfterLoad(sizeout, sizeinAddress, inAddress.bytes.bytes, bytes.modAfterLoad(sizeinOffset, space, inOffset.bytes.bytes, sizeinValue,
sizeinValue, inValue.bytes.bytes)), inValue.bytes.bytes)),
STATE.modAfterLoad(sizeout, sizeinAddress, inAddress.state, STATE.modAfterLoad(sizeinOffset, space, inOffset.state, sizeinValue,
sizeinValue, inValue.state), inValue.state),
location.modAfterLoad(sizeout, sizeinAddress, inAddress.location, location.modAfterLoad(sizeinOffset, space, inOffset.location, sizeinValue,
sizeinValue, inValue.location), inValue.location),
READS.modAfterLoad(sizeout, sizeinAddress, inAddress.reads, READS.modAfterLoad(sizeinOffset, space, inOffset.reads, sizeinValue,
sizeinValue, inValue.reads)); inValue.reads));
} }
@Override @Override

View file

@ -19,6 +19,7 @@ import java.math.BigInteger;
import ghidra.pcode.exec.ConcretionError; import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic; import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Endian; import ghidra.program.model.lang.Endian;
import ghidra.trace.model.memory.TraceMemoryState; import ghidra.trace.model.memory.TraceMemoryState;
@ -58,15 +59,15 @@ public enum TraceMemoryStatePcodeArithmetic implements PcodeArithmetic<TraceMemo
} }
@Override @Override
public TraceMemoryState modBeforeStore(int sizeout, int sizeinAddress, public TraceMemoryState modBeforeStore(int sizeinOffset, AddressSpace space,
TraceMemoryState inAddress, int sizeinValue, TraceMemoryState inValue) { TraceMemoryState inOffset, int sizeinValue, TraceMemoryState inValue) {
return inValue; // Shouldn't see STORE during Sleigh eval, anyway return inValue; // Shouldn't see STORE during Sleigh eval, anyway
} }
@Override @Override
public TraceMemoryState modAfterLoad(int sizeout, int sizeinAddress, TraceMemoryState inAddress, public TraceMemoryState modAfterLoad(int sizeinOffset, AddressSpace space,
int sizeinValue, TraceMemoryState inValue) { TraceMemoryState inOffset, int sizeinValue, TraceMemoryState inValue) {
if (inAddress == TraceMemoryState.KNOWN && inValue == TraceMemoryState.KNOWN) { if (inOffset == TraceMemoryState.KNOWN && inValue == TraceMemoryState.KNOWN) {
return TraceMemoryState.KNOWN; return TraceMemoryState.KNOWN;
} }
return TraceMemoryState.UNKNOWN; return TraceMemoryState.UNKNOWN;

View file

@ -24,8 +24,8 @@ import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSetView; import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.*; import ghidra.program.model.listing.*;
import ghidra.trace.model.memory.TraceMemoryRegion; import ghidra.trace.model.memory.TraceMemoryRegion;
import ghidra.util.ComparatorMath;
import ghidra.util.LockHold; import ghidra.util.LockHold;
import ghidra.util.MathUtilities;
import ghidra.util.exception.*; import ghidra.util.exception.*;
public class DBTraceProgramViewRootModule implements ProgramModule { public class DBTraceProgramViewRootModule implements ProgramModule {
@ -195,7 +195,7 @@ public class DBTraceProgramViewRootModule implements ProgramModule {
.getMinAddress(); .getMinAddress();
} }
// TODO: There has got to be a better way // TODO: There has got to be a better way
return reduceRegions(TraceMemoryRegion::getMinAddress, ComparatorMath::cmin); return reduceRegions(TraceMemoryRegion::getMinAddress, MathUtilities::cmin);
} }
@Override @Override
@ -206,7 +206,7 @@ public class DBTraceProgramViewRootModule implements ProgramModule {
.getMaxAddress(); .getMaxAddress();
} }
// TODO: There has got to be a better way // TODO: There has got to be a better way
return reduceRegions(TraceMemoryRegion::getMaxAddress, ComparatorMath::cmax); return reduceRegions(TraceMemoryRegion::getMaxAddress, MathUtilities::cmax);
} }
@Override @Override

View file

@ -983,6 +983,7 @@ public class BytesTracePcodeEmulatorTest extends AbstractTracePcodeEmulatorTest
TraceSleighUtils.evaluate("r1", tb.trace, 1, thread, 0)); TraceSleighUtils.evaluate("r1", tb.trace, 1, thread, 0));
} }
} }
@Test @Test
public void testITE_ContextFlow() throws Throwable { public void testITE_ContextFlow() throws Throwable {
try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "ARM:LE:32:v8T")) { try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "ARM:LE:32:v8T")) {

View file

@ -0,0 +1,232 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.util.opinion;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import generic.ULongSpan;
import ghidra.app.util.Option;
import ghidra.app.util.bin.ByteProvider;
import ghidra.app.util.importer.MessageLog;
import ghidra.framework.model.Project;
import ghidra.framework.store.LockException;
import ghidra.generic.util.datastruct.SemisparseByteArray;
import ghidra.program.database.function.OverlappingFunctionException;
import ghidra.program.model.address.*;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.CodeUnit;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.mem.MemoryConflictException;
import ghidra.program.model.symbol.SourceType;
import ghidra.program.model.symbol.SymbolUtilities;
import ghidra.util.NumericUtilities;
import ghidra.util.exception.CancelledException;
import ghidra.util.exception.InvalidInputException;
import ghidra.util.task.TaskMonitor;
public class JitLogLoader extends AbstractProgramLoader {
public final static String JIT_LOG_NAME = "OpenJDK 17 JIT compilation log";
@Override
public Collection<LoadSpec> findSupportedLoadSpecs(ByteProvider provider) throws IOException {
return getLanguageService().getLanguageCompilerSpecPairs(
new LanguageCompilerSpecQuery(null, null, null, null, null))
.stream()
.map(lcs -> new LoadSpec(this, 0, lcs, false))
.toList();
}
@Override
public String getName() {
return JIT_LOG_NAME;
}
@Override
public LoaderTier getTier() {
return LoaderTier.UNTARGETED_LOADER;
}
@Override
public int getTierPriority() {
return 100;
}
@Override
protected List<Loaded<Program>> loadProgram(ByteProvider provider, String loadedName,
Project project, String projectFolderPath, LoadSpec loadSpec, List<Option> options,
MessageLog log, Object consumer, TaskMonitor monitor)
throws IOException, LoadException, CancelledException {
LanguageCompilerSpecPair pair = loadSpec.getLanguageCompilerSpec();
CompilerSpec cSpec = pair.getCompilerSpec();
Language language = cSpec.getLanguage();
Program program =
createProgram(provider, loadedName, null, getName(), language, cSpec, consumer);
boolean success = false;
try {
loadInto(provider, loadSpec, options, log, program, monitor);
success = true;
createDefaultMemoryBlocks(program, language, log);
}
finally {
if (!success) {
program.release(consumer);
program = null;
}
}
List<Loaded<Program>> results = new ArrayList<>();
if (program != null) {
results.add(new Loaded<>(program, loadedName, projectFolderPath));
}
return results;
}
static class JitMethod {
final String name;
SemisparseByteArray bytes = new SemisparseByteArray();
Map<Address, String> comments = new HashMap<>();
public JitMethod(String name) {
this.name = name;
}
void appendComment(Address address, String line) {
comments.compute(address, (a, c) -> c == null ? line : c + "\n" + line);
}
}
List<JitMethod> methods = new ArrayList<>();
AddressSet fullSet = new AddressSet();
static final Pattern PAT_METHOD =
Pattern.compile("\\s*#\\s*\\{method\\}\\s*\\{0x[0-9A-Fa-f]+\\}(?<name>.*)");
static final Pattern PAT_COMMENT =
Pattern.compile("\\s*0x(?<addrHex>[0-9A-Fa-f]+):\\s*;(?<comment>.*)");
static final Pattern PAT_BYTES =
Pattern.compile("\\s*0x(?<addrHex>[0-9A-Fa-f]+):\\s*(?<bytes>[\\s\\|0-9A-Fa-f]+)");
@Override
protected void loadProgramInto(ByteProvider provider, LoadSpec loadSpec,
List<Option> options, MessageLog log, Program program, TaskMonitor monitor)
throws IOException, CancelledException {
monitor.setMessage("Reading lines");
JitMethod curMethod = null;
String line;
try (BufferedReader in =
new BufferedReader(new InputStreamReader(provider.getInputStream(0)))) {
while (null != (line = in.readLine())) {
Matcher matcher;
monitor.checkCanceled();
matcher = PAT_METHOD.matcher(line);
if (matcher.matches()) {
putMethod(curMethod, program);
curMethod = new JitMethod(matcher.group("name")
.replace("&apos;", "'")
.replace("&lt;", "<")
.replace("&gt;", ">"));
continue;
}
if (curMethod == null) {
continue;
}
matcher = PAT_COMMENT.matcher(line);
if (matcher.matches()) {
Address address =
program.getAddressFactory().getAddress(matcher.group("addrHex"));
curMethod.appendComment(address, matcher.group("comment"));
}
matcher = PAT_BYTES.matcher(line);
if (matcher.matches()) {
Address address =
program.getAddressFactory().getAddress(matcher.group("addrHex"));
curMethod.bytes.putData(address.getOffset(),
NumericUtilities.convertStringToBytes(
matcher.group("bytes").replace(" ", "").replace("|", "")));
}
}
}
putMethod(curMethod, program);
monitor.setMaximum(fullSet.getNumAddresses() + methods.size());
monitor.setMessage("Creating blocks");
for (AddressRange range : fullSet) {
monitor.checkCanceled();
try {
program.getMemory()
.createInitializedBlock("block" + range.getMinAddress(),
range.getMinAddress(), range.getLength(), (byte) 0, monitor, false);
}
catch (AddressOverflowException | LockException | IllegalArgumentException
| MemoryConflictException e) {
log.appendMsg("Could not create block " + range + ": " + e);
}
monitor.incrementProgress(1);
}
monitor.setMessage("Creating methods");
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
for (JitMethod method : methods) {
monitor.checkCanceled();
AddressSet body = new AddressSet();
for (ULongSpan span : method.bytes.getInitialized(0, -1).spans()) {
body.add(space.getAddress(span.min()), space.getAddress(span.max()));
if (span.length() > Integer.MAX_VALUE) {
log.appendMsg("Method too large: " + method.name);
continue;
}
byte[] data = new byte[(int) span.length()];
method.bytes.getData(span.min(), data);
try {
program.getMemory().setBytes(space.getAddress(span.min()), data);
}
catch (MemoryAccessException | AddressOutOfBoundsException e) {
log.appendMsg("Could not write bytes " + span + ": " + e);
}
}
for (Map.Entry<Address, String> ent : method.comments.entrySet()) {
program.getListing().setComment(ent.getKey(), CodeUnit.PRE_COMMENT, ent.getValue());
}
try {
program.getFunctionManager()
.createFunction(SymbolUtilities.replaceInvalidChars(method.name, true),
body.getMinAddress(), body,
SourceType.IMPORTED);
}
catch (InvalidInputException | OverlappingFunctionException e) {
log.appendMsg("Couldn't create function: " + method.name + ": " + e);
}
monitor.incrementProgress(1);
}
}
void putMethod(JitMethod method, Program program) {
if (method == null) {
return;
}
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
methods.add(method);
for (ULongSpan span : method.bytes.getInitialized(0, -1).spans()) {
fullSet.add(space.getAddress(span.min()), space.getAddress(span.max()));
}
}
}

View file

@ -43,7 +43,7 @@ public abstract class ArithmeticVarnodeEvaluator<T> extends AbstractVarnodeEvalu
* SLEIGH: {@code shift} the left piece then {@code or} it with the right piece. * SLEIGH: {@code shift} the left piece then {@code or} it with the right piece.
* *
* @param <T> the type of values * @param <T> the type of values
* @param arithmetic the p-code arithmetic for values of type {@link T} * @param arithmetic the p-code arithmetic for values of type {@code T}
* @param sizeTotal the expected output size in bytes * @param sizeTotal the expected output size in bytes
* @param upper the value of the left (more significant) piece * @param upper the value of the left (more significant) piece
* @param lower the value of the right (less significant) piece * @param lower the value of the right (less significant) piece
@ -143,8 +143,6 @@ public abstract class ArithmeticVarnodeEvaluator<T> extends AbstractVarnodeEvalu
T offset = evaluateVarnode(program, inOffset, already); T offset = evaluateVarnode(program, inOffset, already);
Varnode outVar = op.getOutput(); // Only for measuring size Varnode outVar = op.getOutput(); // Only for measuring size
T out = evaluateAbstract(program, space, offset, outVar.getSize(), already); T out = evaluateAbstract(program, space, offset, outVar.getSize(), already);
return arithmetic.modAfterLoad(outVar.getSize(), return arithmetic.modAfterLoad(op, space, offset, out);
inOffset.getSize(), offset,
outVar.getSize(), out);
} }
} }

View file

@ -15,8 +15,8 @@
*/ */
package ghidra.program.model.address; package ghidra.program.model.address;
import static ghidra.util.ComparatorMath.cmax; import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.ComparatorMath.cmin; import static ghidra.util.MathUtilities.cmin;
import java.util.Iterator; import java.util.Iterator;

View file

@ -15,8 +15,8 @@
*/ */
package ghidra.util; package ghidra.util;
import static ghidra.util.ComparatorMath.cmax; import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.ComparatorMath.cmin; import static ghidra.util.MathUtilities.cmin;
import ghidra.program.model.address.*; import ghidra.program.model.address.*;

View file

@ -15,8 +15,8 @@
*/ */
package ghidra.util; package ghidra.util;
import static ghidra.util.ComparatorMath.cmax; import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.ComparatorMath.cmin; import static ghidra.util.MathUtilities.cmin;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map.Entry; import java.util.Map.Entry;

View file

@ -15,8 +15,8 @@
*/ */
package ghidra.util; package ghidra.util;
import static ghidra.util.ComparatorMath.cmax; import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.ComparatorMath.cmin; import static ghidra.util.MathUtilities.cmin;
import java.util.Collection; import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;

View file

@ -15,8 +15,8 @@
*/ */
package ghidra.util; package ghidra.util;
import static ghidra.util.ComparatorMath.cmax; import static ghidra.util.MathUtilities.cmax;
import static ghidra.util.ComparatorMath.cmin; import static ghidra.util.MathUtilities.cmin;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;

View file

@ -19,6 +19,7 @@ import java.util.Objects;
import ghidra.pcode.exec.ConcretionError; import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic; import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Endian; import ghidra.program.model.lang.Endian;
import ghidra.program.model.lang.Language; import ghidra.program.model.lang.Language;
import ghidra.program.model.pcode.PcodeOp; import ghidra.program.model.pcode.PcodeOp;
@ -83,7 +84,7 @@ public enum TaintPcodeArithmetic implements PcodeArithmetic<TaintVec> {
* <p> * <p>
* We can't just naively return {@code in1}, because each unary op may mix the bytes of the * We can't just naively return {@code in1}, because each unary op may mix the bytes of the
* operand a little differently. For {@link PcodeOp#COPY}, we can, since no mixing happens at * operand a little differently. For {@link PcodeOp#COPY}, we can, since no mixing happens at
* all. This is also the case of both {@link NEGATE} operations ("negate" is a bit of a * all. This is also the case of both {@code NEGATE} operations ("negate" is a bit of a
* misnomer, as they merely inverts the bits.) For {@link PcodeOp#INT_ZEXT}, we append empties * misnomer, as they merely inverts the bits.) For {@link PcodeOp#INT_ZEXT}, we append empties
* to the correct end of the vector. Similarly, we replicate the most-significant element and * to the correct end of the vector. Similarly, we replicate the most-significant element and
* append for {@link PcodeOp#INT_SEXT}. For {@link PcodeOp#INT_2COMP} (which negates an integer * append for {@link PcodeOp#INT_SEXT}. For {@link PcodeOp#INT_2COMP} (which negates an integer
@ -183,9 +184,9 @@ public enum TaintPcodeArithmetic implements PcodeArithmetic<TaintVec> {
* Here we handle indirect taint for indirect writes * Here we handle indirect taint for indirect writes
*/ */
@Override @Override
public TaintVec modBeforeStore(int sizeout, int sizeinAddress, TaintVec inAddress, public TaintVec modBeforeStore(int sizeinOffset, AddressSpace space, TaintVec inOffset,
int sizeinValue, TaintVec inValue) { int sizeinValue, TaintVec inValue) {
return inValue.tagIndirectWrite(inAddress); return inValue.tagIndirectWrite(inOffset);
} }
/** /**
@ -195,9 +196,9 @@ public enum TaintPcodeArithmetic implements PcodeArithmetic<TaintVec> {
* Here we handle indirect taint for indirect reads * Here we handle indirect taint for indirect reads
*/ */
@Override @Override
public TaintVec modAfterLoad(int sizeout, int sizeinAddress, TaintVec inAddress, public TaintVec modAfterLoad(int sizeinOffset, AddressSpace space, TaintVec inOffset,
int sizeinValue, TaintVec inValue) { int sizeinValue, TaintVec inValue) {
return inValue.tagIndirectRead(inAddress); return inValue.tagIndirectRead(inOffset);
} }
/** /**

View file

@ -39,7 +39,7 @@ public class PCodeDfgGraphTask extends Task {
private GraphDisplayBroker graphService; private GraphDisplayBroker graphService;
protected HighFunction hfunction; protected HighFunction hfunction;
private AttributedGraph graph; protected AttributedGraph graph;
private PluginTool tool; private PluginTool tool;
public PCodeDfgGraphTask(PluginTool tool, GraphDisplayBroker graphService, public PCodeDfgGraphTask(PluginTool tool, GraphDisplayBroker graphService,

View file

@ -2,11 +2,6 @@ MODULE FILE LICENSE: lib/dex-ir-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/dex-reader-2.4.24.jar Apache License 2.0 MODULE FILE LICENSE: lib/dex-reader-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/dex-reader-api-2.4.24.jar Apache License 2.0 MODULE FILE LICENSE: lib/dex-reader-api-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/dex-translator-2.4.24.jar Apache License 2.0 MODULE FILE LICENSE: lib/dex-translator-2.4.24.jar Apache License 2.0
MODULE FILE LICENSE: lib/asm-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-analysis-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-commons-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-tree-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-util-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/baksmali-2.5.2.jar BSD-3-GRUVER MODULE FILE LICENSE: lib/baksmali-2.5.2.jar BSD-3-GRUVER
MODULE FILE LICENSE: lib/dexlib2-2.5.2.jar BSD-3-GRUVER MODULE FILE LICENSE: lib/dexlib2-2.5.2.jar BSD-3-GRUVER
MODULE FILE LICENSE: lib/util-2.5.2.jar BSD-3-GRUVER MODULE FILE LICENSE: lib/util-2.5.2.jar BSD-3-GRUVER

View file

@ -31,15 +31,11 @@ dependencies {
api project(':PDB') api project(':PDB')
// Used by "Android DEX to JAR" file system // Used by "Android DEX to JAR" file system
// dex2jar depends on asm-9.7.1, which is declared in Framework/Emulation
api 'de.femtopedia.dex2jar:dex-ir:2.4.24' api 'de.femtopedia.dex2jar:dex-ir:2.4.24'
api 'de.femtopedia.dex2jar:dex-reader:2.4.24' api 'de.femtopedia.dex2jar:dex-reader:2.4.24'
api 'de.femtopedia.dex2jar:dex-reader-api:2.4.24' api 'de.femtopedia.dex2jar:dex-reader-api:2.4.24'
api 'de.femtopedia.dex2jar:dex-translator:2.4.24' api 'de.femtopedia.dex2jar:dex-translator:2.4.24'
api 'org.ow2.asm:asm:9.7.1'
api 'org.ow2.asm:asm-analysis:9.7.1'
api 'org.ow2.asm:asm-commons:9.7.1'
api 'org.ow2.asm:asm-tree:9.7.1'
api 'org.ow2.asm:asm-util:9.7.1'
// Used by "Android DEX to SMALI" file system // Used by "Android DEX to SMALI" file system
api 'org.smali:baksmali:2.5.2' // requires guava-27.1-android or later api 'org.smali:baksmali:2.5.2' // requires guava-27.1-android or later

View file

@ -5,7 +5,6 @@
##MODULE IP: BSD-3-GRUVER ##MODULE IP: BSD-3-GRUVER
##MODULE IP: Copyright Distribution Permitted ##MODULE IP: Copyright Distribution Permitted
##MODULE IP: Creative Commons Attribution 2.5 ##MODULE IP: Creative Commons Attribution 2.5
##MODULE IP: INRIA License
##MODULE IP: Jython License ##MODULE IP: Jython License
##MODULE IP: LGPL 2.1 ##MODULE IP: LGPL 2.1
##MODULE IP: Public Domain ##MODULE IP: Public Domain

View file

@ -16,6 +16,7 @@
package ghidra.pcode.emu.sys; package ghidra.pcode.emu.sys;
import java.io.*; import java.io.*;
import java.lang.reflect.Method;
import java.util.*; import java.util.*;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -132,8 +133,8 @@ public interface EmuSyscallLibrary<T> extends PcodeUseropLibrary<T> {
* Derive a syscall number to calling convention map by scraping functions in the program's * Derive a syscall number to calling convention map by scraping functions in the program's
* "syscall" space. * "syscall" space.
* *
* @param program * @param program the program whose "syscall" space to scrape
* @return * @return the map of syscall number to calling convention
*/ */
public static Map<Long, PrototypeModel> loadSyscallConventionMap(Program program) { public static Map<Long, PrototypeModel> loadSyscallConventionMap(Program program) {
return loadSyscallFunctionMap(program).entrySet() return loadSyscallFunctionMap(program).entrySet()
@ -169,6 +170,37 @@ public interface EmuSyscallLibrary<T> extends PcodeUseropLibrary<T> {
Varnode outVar, List<Varnode> inVars) { Varnode outVar, List<Varnode> inVars) {
syslib.syscall(executor, library); syslib.syscall(executor, library);
} }
@Override
public boolean isFunctional() {
return false;
}
@Override
public boolean hasSideEffects() {
return true;
}
@Override
public boolean canInlinePcode() {
return false;
}
@Override
public PcodeUseropLibrary<?> getDefiningLibrary() {
return syslib;
}
@Override
public Method getJavaMethod() {
try {
return syslib.getClass()
.getMethod("syscall", PcodeExecutor.class, PcodeUseropLibrary.class);
}
catch (NoSuchMethodException | SecurityException e) {
throw new AssertionError(e);
}
}
} }
/** /**
@ -199,7 +231,7 @@ public interface EmuSyscallLibrary<T> extends PcodeUseropLibrary<T> {
*/ */
default PcodeUseropDefinition<T> getSyscallUserop() { default PcodeUseropDefinition<T> getSyscallUserop() {
return new SyscallPcodeUseropDefinition<>(this); return new SyscallPcodeUseropDefinition<>(this);
}; }
/** /**
* Retrieve the desired system call number according to the emulated system's conventions * Retrieve the desired system call number according to the emulated system's conventions

View file

@ -0,0 +1,5 @@
MODULE FILE LICENSE: lib/asm-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-analysis-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-commons-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-tree-9.7.1.jar INRIA License
MODULE FILE LICENSE: lib/asm-util-9.7.1.jar INRIA License

View file

@ -26,4 +26,10 @@ dependencies {
api project(':SoftwareModeling') api project(':SoftwareModeling')
api project(':Generic') api project(':Generic')
api project(':Utility') api project(':Utility')
api 'org.ow2.asm:asm:9.7.1'
api 'org.ow2.asm:asm-analysis:9.7.1'
api 'org.ow2.asm:asm-commons:9.7.1'
api 'org.ow2.asm:asm-tree:9.7.1'
api 'org.ow2.asm:asm-util:9.7.1'
} }

View file

@ -1,4 +1,5 @@
##VERSION: 2.0 ##VERSION: 2.0
##MODULE IP: INRIA License
Module.manifest||GHIDRA||||END| Module.manifest||GHIDRA||||END|
README.md||GHIDRA||||END| README.md||GHIDRA||||END|
src/test/resources/mock.cspec||GHIDRA||||END| src/test/resources/mock.cspec||GHIDRA||||END|

View file

@ -20,6 +20,7 @@ import generic.ULongSpan.ULongSpanSet;
import ghidra.app.emulator.memory.MemoryLoadImage; import ghidra.app.emulator.memory.MemoryLoadImage;
import ghidra.app.emulator.state.RegisterState; import ghidra.app.emulator.state.RegisterState;
import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.util.PseudoInstruction;
import ghidra.lifecycle.Transitional; import ghidra.lifecycle.Transitional;
import ghidra.pcode.emu.*; import ghidra.pcode.emu.*;
import ghidra.pcode.emu.PcodeMachine.SwiMode; import ghidra.pcode.emu.PcodeMachine.SwiMode;
@ -35,7 +36,6 @@ import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.Address; import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace; import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.*; import ghidra.program.model.lang.*;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp; import ghidra.program.model.pcode.PcodeOp;
import ghidra.util.Msg; import ghidra.util.Msg;
import ghidra.util.exception.CancelledException; import ghidra.util.exception.CancelledException;
@ -134,7 +134,7 @@ public class AdaptedEmulator implements Emulator {
PcodeExecutorState<byte[]> sharedState) { PcodeExecutorState<byte[]> sharedState) {
return new SleighInstructionDecoder(language, sharedState) { return new SleighInstructionDecoder(language, sharedState) {
@Override @Override
public Instruction decodeInstruction(Address address, RegisterValue context) { public PseudoInstruction decodeInstruction(Address address, RegisterValue context) {
try { try {
isDecoding = true; isDecoding = true;
return super.decodeInstruction(address, context); return super.decodeInstruction(address, context);
@ -147,8 +147,7 @@ public class AdaptedEmulator implements Emulator {
} }
} }
record StateBacking(MemoryFaultHandler faultHandler, MemoryLoadImage loadImage) { record StateBacking(MemoryFaultHandler faultHandler, MemoryLoadImage loadImage) {}
}
class AdaptedBytesPcodeExecutorState extends BytesPcodeExecutorState { class AdaptedBytesPcodeExecutorState extends BytesPcodeExecutorState {
public AdaptedBytesPcodeExecutorState(Language language, StateBacking backing) { public AdaptedBytesPcodeExecutorState(Language language, StateBacking backing) {

View file

@ -114,6 +114,15 @@ public class SemisparseByteArray {
getData(loc, data, 0, data.length); getData(loc, data, 0, data.length);
} }
public synchronized byte[] getDirect(final long loc) {
long blockNum = Long.divideUnsigned(loc, BLOCK_SIZE);
int blockOffset = (int) Long.remainderUnsigned(loc, BLOCK_SIZE);
if (blockOffset != 0) {
throw new IllegalArgumentException("Offset must be at block boundary");
}
return blocks.computeIfAbsent(blockNum, n -> new byte[BLOCK_SIZE]);
}
/** /**
* Copy a range of data from the semisparse array into a portion of the given byte array * Copy a range of data from the semisparse array into a portion of the given byte array
* *

View file

@ -273,13 +273,8 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
return suspended; return suspended;
} }
/** @Override
* Check for a p-code injection (override) at the given address public PcodeProgram getInject(Address address) {
*
* @param address the address, usually the program counter
* @return the injected program, most likely {@code null}
*/
protected PcodeProgram getInject(Address address) {
return injects.get(address); return injects.get(address);
} }

View file

@ -47,6 +47,8 @@ import ghidra.util.Msg;
* This class implements the control-flow logic of the target machine, cooperating with the p-code * This class implements the control-flow logic of the target machine, cooperating with the p-code
* program flow implemented by the {@link PcodeExecutor}. This implementation exists primarily in * program flow implemented by the {@link PcodeExecutor}. This implementation exists primarily in
* {@link #beginInstructionOrInject()} and {@link #advanceAfterFinished()}. * {@link #beginInstructionOrInject()} and {@link #advanceAfterFinished()}.
*
* @param <T> the type of variables in the emulator
*/ */
public class DefaultPcodeThread<T> implements PcodeThread<T> { public class DefaultPcodeThread<T> implements PcodeThread<T> {
@ -122,7 +124,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
* *
* @see PcodeMachine#addBreakpoint(Address, String) * @see PcodeMachine#addBreakpoint(Address, String)
*/ */
@PcodeUserop @PcodeUserop(functional = true)
public void emu_swi() { public void emu_swi() {
thread.swi(); thread.swi();
} }
@ -136,7 +138,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
* calls to this p-code op. Then, only if and when an erroneous inject is encountered will * calls to this p-code op. Then, only if and when an erroneous inject is encountered will
* the client be notified. * the client be notified.
*/ */
@PcodeUserop @PcodeUserop(functional = true)
public void emu_injection_err() { public void emu_injection_err() {
throw new InjectionErrorPcodeExecutionException(null, null); throw new InjectionErrorPcodeExecutionException(null, null);
} }
@ -148,6 +150,8 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
* <p> * <p>
* This executor checks for thread suspension and updates the program counter register upon * This executor checks for thread suspension and updates the program counter register upon
* execution of (external) branches. * execution of (external) branches.
*
* @param <T> the type of variables in the emulator
*/ */
public static class PcodeThreadExecutor<T> extends PcodeExecutor<T> { public static class PcodeThreadExecutor<T> extends PcodeExecutor<T> {
volatile boolean suspended = false; volatile boolean suspended = false;
@ -192,7 +196,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
} }
@Override @Override
protected void branchToAddress(Address target) { protected void branchToAddress(PcodeOp op, Address target) {
thread.branchToAddress(target); thread.branchToAddress(target);
} }
@ -249,7 +253,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
this.arithmetic = machine.arithmetic; this.arithmetic = machine.arithmetic;
PcodeExecutorState<T> sharedState = machine.getSharedState(); PcodeExecutorState<T> sharedState = machine.getSharedState();
PcodeExecutorState<T> localState = machine.createLocalState(this); PcodeExecutorState<T> localState = machine.createLocalState(this);
this.state = new ThreadPcodeExecutorState<>(sharedState, localState); this.state = createThreadState(sharedState, localState);
this.decoder = createInstructionDecoder(sharedState); this.decoder = createInstructionDecoder(sharedState);
this.library = createUseropLibrary(); this.library = createUseropLibrary();
@ -269,6 +273,18 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
this.reInitialize(); this.reInitialize();
} }
/**
* A factory method for the thread's (multiplexed) state
*
* @param sharedState the shared part of the state
* @param localState the thread-local part of the state
* @return the complete state
*/
protected ThreadPcodeExecutorState<T> createThreadState(PcodeExecutorState<T> sharedState,
PcodeExecutorState<T> localState) {
return new ThreadPcodeExecutorState<>(sharedState, localState);
}
/** /**
* A factory method for the instruction decoder * A factory method for the instruction decoder
* *
@ -465,8 +481,9 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
} }
} }
protected RegisterValue getContextAfterCommits() { public static RegisterValue getContextAfterCommits(Instruction instruction, long counter) {
PseudoInstruction pins = (PseudoInstruction) instruction; PseudoInstruction pins = (PseudoInstruction) instruction;
Language language = instruction.getPrototype().getLanguage();
try { try {
SleighParserContext parserCtx = (SleighParserContext) pins.getParserContext(); SleighParserContext parserCtx = (SleighParserContext) pins.getParserContext();
var procCtx = new DisassemblerContextAdapter() { var procCtx = new DisassemblerContextAdapter() {
@ -477,7 +494,8 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
if (!value.getRegister().isProcessorContext()) { if (!value.getRegister().isProcessorContext()) {
return; return;
} }
if (!address.equals(counter)) { if (address.getOffset() != counter &&
!Objects.equals(pins.getAddress(), address)) {
Msg.warn(this, "Context applied somewhere other than the counter."); Msg.warn(this, "Context applied somewhere other than the counter.");
return; return;
} }
@ -492,6 +510,10 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
} }
} }
protected RegisterValue getContextAfterCommits() {
return getContextAfterCommits(instruction, counter.getOffset());
}
/** /**
* Resolve a finished instruction, advancing the program counter if necessary * Resolve a finished instruction, advancing the program counter if necessary
*/ */

View file

@ -15,7 +15,9 @@
*/ */
package ghidra.pcode.emu; package ghidra.pcode.emu;
import ghidra.app.util.PseudoInstruction;
import ghidra.program.model.address.Address; import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.RegisterValue; import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.Instruction; import ghidra.program.model.listing.Instruction;
@ -23,6 +25,13 @@ import ghidra.program.model.listing.Instruction;
* A means of decoding machine instructions from the bytes contained in the machine state * A means of decoding machine instructions from the bytes contained in the machine state
*/ */
public interface InstructionDecoder { public interface InstructionDecoder {
/**
* Get the language for this decoder
*
* @return the language
*/
Language getLanguage();
/** /**
* Decode the instruction starting at the given address using the given context * Decode the instruction starting at the given address using the given context
* *
@ -33,7 +42,7 @@ public interface InstructionDecoder {
* @param context the disassembler/decode context * @param context the disassembler/decode context
* @return the instruction * @return the instruction
*/ */
Instruction decodeInstruction(Address address, RegisterValue context); PseudoInstruction decodeInstruction(Address address, RegisterValue context);
/** /**
* Inform the decoder that the emulator thread just branched * Inform the decoder that the emulator thread just branched

View file

@ -46,6 +46,8 @@ import ghidra.util.Msg;
* TODO: "State modifiers" are a feature of the older {@link Emulator}. They are crudely * TODO: "State modifiers" are a feature of the older {@link Emulator}. They are crudely
* incorporated into threads extended from this abstract class, so that they do not yet need to be * incorporated into threads extended from this abstract class, so that they do not yet need to be
* ported to this emulator. * ported to this emulator.
*
* @param <T> the type of variables in the emulator
*/ */
public class ModifiedPcodeThread<T> extends DefaultPcodeThread<T> { public class ModifiedPcodeThread<T> extends DefaultPcodeThread<T> {

View file

@ -234,7 +234,7 @@ public interface PcodeMachine<T> {
* <p> * <p>
* This will attempt to compile the given source against this machine's userop library and then * This will attempt to compile the given source against this machine's userop library and then
* inject it at the given address. The resulting p-code <em>replaces</em> that which would be * inject it at the given address. The resulting p-code <em>replaces</em> that which would be
* executed by decoding the instruction at the given address. The means the machine will not * executed by decoding the instruction at the given address. That means the machine will not
* decode, nor advance its counter, unless the Sleigh causes it. In most cases, the Sleigh will * decode, nor advance its counter, unless the Sleigh causes it. In most cases, the Sleigh will
* call {@link PcodeEmulationLibrary#emu_exec_decoded()} to cause the machine to decode and * call {@link PcodeEmulationLibrary#emu_exec_decoded()} to cause the machine to decode and
* execute the overridden instruction. * execute the overridden instruction.
@ -254,6 +254,14 @@ public interface PcodeMachine<T> {
*/ */
void inject(Address address, String source); void inject(Address address, String source);
/**
* Check for a p-code injection (override) at the given address
*
* @param address the address, usually the program counter
* @return the injected program, most likely {@code null}
*/
PcodeProgram getInject(Address address);
/** /**
* Remove the inject, if present, at the given address * Remove the inject, if present, at the given address
* *

View file

@ -15,8 +15,6 @@
*/ */
package ghidra.pcode.emu; package ghidra.pcode.emu;
import java.util.Objects;
import ghidra.app.util.PseudoInstruction; import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emulate.InstructionDecodeException; import ghidra.pcode.emulate.InstructionDecodeException;
import ghidra.pcode.exec.DecodePcodeExecutionException; import ghidra.pcode.exec.DecodePcodeExecutionException;
@ -58,6 +56,7 @@ public class SleighInstructionDecoder implements InstructionDecoder {
/** /**
* Construct a Sleigh instruction decoder * Construct a Sleigh instruction decoder
* *
* @see DefaultPcodeThread#createInstructionDecoder(PcodeExecutorState)
* @param language the language to decoder * @param language the language to decoder
* @param state the state containing the target program, probably the shared state of the p-code * @param state the state containing the target program, probably the shared state of the p-code
* machine. It must be possible to obtain concrete buffers on this state. * machine. It must be possible to obtain concrete buffers on this state.
@ -75,6 +74,11 @@ public class SleighInstructionDecoder implements InstructionDecoder {
Disassembler.getDisassembler(language, addrFactory, TaskMonitor.DUMMY, listener); Disassembler.getDisassembler(language, addrFactory, TaskMonitor.DUMMY, listener);
} }
@Override
public Language getLanguage() {
return language;
}
protected boolean useCachedInstruction(Address address, RegisterValue context) { protected boolean useCachedInstruction(Address address, RegisterValue context) {
if (block == null) { if (block == null) {
return false; return false;
@ -100,7 +104,7 @@ public class SleighInstructionDecoder implements InstructionDecoder {
} }
@Override @Override
public Instruction decodeInstruction(Address address, RegisterValue context) { public PseudoInstruction decodeInstruction(Address address, RegisterValue context) {
lastMsg = DEFAULT_ERROR; lastMsg = DEFAULT_ERROR;
if (!useCachedInstruction(address, context)) { if (!useCachedInstruction(address, context)) {
parseNewBlock(address, context); parseNewBlock(address, context);
@ -116,7 +120,7 @@ public class SleighInstructionDecoder implements InstructionDecoder {
* However, if the cached instruction's context does not match the desired one, assume we're * However, if the cached instruction's context does not match the desired one, assume we're
* starting a new block. That check will have to wait for the decode call, though. * starting a new block. That check will have to wait for the decode call, though.
*/ */
if (block.getInstructionAt(address) == null) { if (block == null || block.getInstructionAt(address) == null) {
block = null; block = null;
} }
} }

View file

@ -149,8 +149,8 @@ public class ThreadPcodeExecutorState<T> implements PcodeExecutorState<T> {
* *
* <p> * <p>
* This will only clear the thread's local state, lest we invoke clear on the shared state for * This will only clear the thread's local state, lest we invoke clear on the shared state for
* every thread. Instead, if necessary, the machine should clear its local state then clear each * every thread. Instead, if necessary, the machine should clear its shared state then clear
* thread's local state. * each thread's local state.
*/ */
@Override @Override
public void clear() { public void clear() {

View file

@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.exec.PcodeExecutorState;
import ghidra.program.model.address.AddressSpace;
/**
* The run-time executor state for the JIT-accelerated p-code emulator
*
* @see JitDefaultBytesPcodeExecutorState
* @see JitBytesPcodeExecutorStatePiece
* @see JitBytesPcodeExecutorStateSpace
*/
public interface JitBytesPcodeExecutorState extends PcodeExecutorState<byte[]> {
/**
* For generated code to side-step the space lookup
*
* @param space the address space
* @return the state space
*/
JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space);
}

View file

@ -0,0 +1,131 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.exec.AbstractBytesPcodeExecutorStatePiece;
import ghidra.pcode.exec.BytesPcodeExecutorStateSpace;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
import ghidra.program.model.pcode.PcodeOp;
/**
* The state piece for {@link JitDefaultBytesPcodeExecutorState}
*
* <p>
* This provides access to the internals so that translated passages can pre-fetch certain objects
* to optimize state accesses.
*/
public class JitBytesPcodeExecutorStatePiece
extends AbstractBytesPcodeExecutorStatePiece<JitBytesPcodeExecutorStateSpace> {
/**
* An object to manage state for a specific {@link AddressSpace}
*/
public class JitBytesPcodeExecutorStateSpace extends BytesPcodeExecutorStateSpace<Void> {
/**
* Construct a state space
*
* @param language the emulation target language
* @param space the address space
* @param backing any extra read-through backing (not used)
*/
public JitBytesPcodeExecutorStateSpace(Language language, AddressSpace space,
Void backing) {
super(language, space, backing);
}
/**
* Pre-fetch the byte array for the block (page) containing the given offset
*
* <p>
* A translated passage is likely to call this several times in its constructor to pre-fetch
* the byte arrays for variables (ram, register, and unique) that it accesses directly,
* i.e., with a fixed offset. The generated code will then access the byte array directly to
* read and write the variable values in the emulator's state.
*
* @param offset the {@link Address#getOffset() offset} within this address space.
* @return the byte array for the containing block
*/
public byte[] getDirect(long offset) {
return bytes.getDirect(offset);
}
/**
* Read a variable from this (pre-fetched) state space
*
* <p>
* A translated passage is likely to call
* {@link JitBytesPcodeExecutorStatePiece#getForSpace(AddressSpace, boolean)} once or twice
* in its constructor to pre-fetch the per-space backing of any indirect memory variables
* that it accesses, i.e., variables with a dynamic offset. These are usually required for
* {@link PcodeOp#LOAD} and {@link PcodeOp#STORE} ops. The generated code will then invoke
* this method (and {@link #write(long, byte[], int, int) write}) passing in the offset to
* access variables in the emulator's state at runtime.
*
* @param offset the offset (known at runtime)
* @param size the size of the variable
* @return the value of the variable as a byte array
*/
public byte[] read(long offset, int size) {
return read(offset, size, Reason.EXECUTE_READ);
}
}
/**
* A state space map that creates a {@link JitBytesPcodeExecutorStateSpace} for each needed
* {@link AddressSpace}
*/
class JitBytesSpaceMap extends SimpleSpaceMap<JitBytesPcodeExecutorStateSpace> {
@Override
protected JitBytesPcodeExecutorStateSpace newSpace(AddressSpace space) {
return new JitBytesPcodeExecutorStateSpace(language, space, null);
}
}
/**
* Construct a state piece
*
* @param language the emulation target language
*/
public JitBytesPcodeExecutorStatePiece(Language language) {
super(language);
}
@Override
protected AbstractSpaceMap<JitBytesPcodeExecutorStateSpace> newSpaceMap() {
return new JitBytesSpaceMap();
}
@Override
public void clear() {
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*
* <p>
* Overridden to grant public access. The JIT-generated constructors will need to invoke this
* method.
*/
@Override
public JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space, boolean toWrite) {
return super.getForSpace(space, toWrite);
}
}

View file

@ -0,0 +1,276 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodHandles.Lookup;
import java.util.EnumSet;
import org.objectweb.asm.ClassWriter;
import ghidra.pcode.emu.jit.analysis.*;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassageClass;
import ghidra.pcode.exec.PcodeExecutorState;
/**
* The Just-in-Time (JIT) translation engine that powers the {@link JitPcodeEmulator}.
*
* <p>
* This is the translation engine from "any" machine language into JVM bytecode. The same caveats
* that apply to interpretation-based p-code emulation apply to JIT-accelerated emulation: Ghidra
* must have a Sleigh specification for the emulation target language, there must be userop
* libraries (built-in or user-provided) defining any userops encountered during the course of
* execution, all dependent code must be loaded or stubbed out, etc.
*
* <p>
* A passage is decoded at a desired entry point using the {@link JitPassageDecoder}. This compiler
* then translates the passage into bytecode. It will produce a classfile which is then loaded and
* returned to the emulator (or other client). The provided class will have three principal methods,
* not counting getters: 1) The class initializer, which initializes static fields; 2) The
* constructor, which takes a thread and initializes instance fields, and 3) The
* {@link JitCompiledPassage#run(int) run} method, which comprises the actual translation. A static
* field {@code ENTRIES} describes each entry point generated by the compiler. To execute the
* passage starting at a given entry point, the emulation thread must retrieve the index of the
* appropriate entry (i.e., address and contextreg value), instantiate the class, and then invoke
* the run method, passing it the entry index. The translated passage will read variables from the
* thread's {@link JitBytesPcodeExecutorState state} as needed, perform the equivalent operations as
* expressed in the source p-code, and then write the resulting variables back into the state.
* Memory variables are treated similarly, but without scope-based optimizations. In this manner,
* execution of the translated passage produces exactly the same effect on the emulation state as
* interpretation of the same p-code passage. The run method returns the next entry point to execute
* or {@code null} when the emulator must look up the next entry point.
*
* <p>
* Translation of a passage takes place in distinct phases. See each respective class for details of
* its design and implementation:
*
* <ol>
* <li>Control Flow Analysis: {@link JitControlFlowModel}</li>
* <li>Data Flow Analysis: {@link JitDataFlowModel}</li>
* <li>Variable Scope Analysis: {@link JitVarScopeModel}</li>
* <li>Type Assignment: {@link JitTypeModel}</li>
* <li>Variable Allocation: {@link JitAllocationModel}</li>
* <li>Operation Elimination: {@link JitOpUseModel}</li>
* <li>Code Generation: {@link JitCodeGenerator}</li>
* </ol>
*
* <h2>Control Flow Analysis</h2>
* <p>
* Some rudimentary control flow analysis is performed during decode, but the output of decode is a
* passage, i.e., collection of <em>strides</em>, not basic blocks. The control flow analysis breaks
* each stride down into basic blocks at the p-code level. Note that a single instruction's pcode
* (as well as any user instrumentation on that instruction's address) may have complex control
* flow. Additionally, branches that leave an instruction preclude execution of its remaining
* p-code. Thus, p-code basic blocks do not coincide precisely with instruction-level basic blocks.
* See {@link JitControlFlowModel}.
*
* <h2>Data Flow Analysis</h2>
* <p>
* Most every following step consumes the control flow analysis. Data flow analysis interprets each
* basic block independently using an abstraction that produces a use-def graph. A varnode that is
* read before it is written produces a "missing" variable. Those missing variables are converted to
* <em>phi</em> nodes and later resolved during inter-block analysis. The graph is also able to
* consider aliasing, partial accesses, overlapping accesses, etc., by synthesizing operations to
* model those effects. See {@link JitDataFlowModel}.
*
* <h2>Variable Scope Analysis</h2>
* <p>
* Because accessing {@link PcodeExecutorState} is expensive (relative to accessing a JVM local
* variable), the translation seeks to minimize such accesses. This is generally not recommended for
* memory accesses, as there is no telling in multi-threaded applications whether a given memory
* variable is shared/volatile or not. However, for registers and uniques, we can allocate the
* variables as JVM locals. Then we only "birth" them (read them in) when they come into scope and
* "retire" them (write them out) when they leave scope. This analyzer determines which variables
* are in scope (alive) in which basic blocks. See {@link JitVarScopeModel}.
*
* <h2>Type Assignment</h2>
* <p>
* For those variables we allocate as JVM locals, we have to choose a type, because the JVM requires
* it. We have essentially 4 to choose from. (Though we could also choose a <em>reference</em> type,
* depending on the strategy we eventually choose for multi-precision arithmetic.) Those four are
* the JVM primitives: int, float, long, and double. For those more familiar with Java but not the
* JVM, the smaller integral primitives are all represented by JVM ints. The JVM does not permit
* type confusion, e.g., the application of float addition {@code FADD} to int variables. However,
* the emulation target may permit type confusion. (Those familiar with the constant 0x5f759df may
* appreciate intentional type confusion.) When this happens, we must explicitly convert by calling,
* e.g., {@link Float#floatToRawIntBits(float)}, which is essentially just a bit cast. Nevertheless,
* we seek to reduce the number of such calls we encode into the translation. See
* {@link JitTypeModel}.
*
* <h2>Variable Allocation</h2>
* <p>
* Once we've decided the type of each use-def variable node, we allocate JVM locals and assign
* their types accordingly. To keep things simple and fast, we just allocate variables by varnode.
* Partial/overlapping accesses are coalesced to the containing varnode and cause the type to be a
* JVM int (to facilitate shifting and masking). Otherwise, types are assigned according to the most
* common use of the varnode, i.e., by taking a vote among the use-def variable nodes sharing that
* varnode. See {@link JitAllocationModel}.
*
* <h2>Operation Elimination</h2>
* <p>
* Each instruction typically produces several p-code ops, the outputs of which may not actually be
* used by any subsequent op. This analysis seeks to identify such p-code ops and remove them. Since
* many ISAs employ "flags," which are set by nearly every arithmetic instruction, such ops are
* incredibly common. Worse yet, their computation is very expensive, because the JVM does not have
* comparable flag registers, nor does it provide opcodes for producing comparable values. We have
* to emit the bit banging operations ourselves. Thus, performing this elimination stands to improve
* execution speed significantly. However, eliminating these operations may lead to confusing
* results if execution is interrupted and the state inspected by a user. The effects of the
* eliminated operations will be missing. Even though they do not (or should not) matter, the user
* may expect to see them. Thus, this step can be toggled by
* {@link JitConfiguration#removeUnusedOperations()}. See {@link JitOpUseModel}.
*
* <h2>Code Generation</h2>
* <p>
* For simplicity, we seek to generate JVM bytecode in the same order as the source p-code ops.
* There are several details given the optimizations informed by all the preceding analysis. For
* example, the transfer of control to the requested entry point, the placement of variable birth
* and retirement on control flow edges (including fall-through).... We take an object-oriented
* approach to the translation of each p-code op, the handling of each variable's allocation and
* access, the conversion of types, etc. This phase outputs the final classfile bytes, which are
* then loaded as a hidden class. See {@link JitCodeGenerator}.
*
* @implNote There are static fields in this class for configuring diagnostics. They are meant to be
* modified only temporarily by developers seeking to debug issues in the translation
* engine.
*/
public class JitCompiler {
/**
* Diagnostic toggles
*/
public enum Diag {
/** Print each passage (instructions and p-code ops) before translation */
PRINT_PASSAGE,
/** Print the contents (p-code) of each basic block and flows/branches among them */
PRINT_CFM,
/** Print the ops of each basic block in SSA (sort of) form */
PRINT_DFM,
/** Print the list of live variables for each basic block */
PRINT_VSM,
/** Print each synthetic operation, e.g., catenation, subpiece, phi */
PRINT_SYNTH,
/** Print each eliminated op */
PRINT_OUM,
/** Enable ASM's trace for each generated classfile */
TRACE_CLASS,
/** Save the generated {@code .class} file to disk for offline examination */
DUMP_CLASS;
}
/**
* The set of enabled diagnostic toggles.
*
* <p>
* In production, this should be empty.
*/
public static final EnumSet<Diag> ENABLE_DIAGNOSTICS = EnumSet.noneOf(Diag.class);
/**
* Exclude a given address offset from ASM's {@link ClassWriter#COMPUTE_MAXS} and
* {@link ClassWriter#COMPUTE_FRAMES}.
*
* <p>
* Unfortunately, when automatic computation of frames and maxes fails, the ASM library offers
* little in terms of diagnostics. It usually crashes with an NPE or an AIOOBE. Worse, when this
* happens, it fails to output any of the classfile trace. To help with this, a developer may
* identify the address of the passage seed that causes such a failure and set this variable to
* its offset. This will prevent ASM from attempting this computation so that it at least prints
* the trace and dumps out the classfile to disk (if those {@link Diag}nostics are enabled).
*
* <p>
* Once the trace/classfile is obtained, set this back to -1 and then apply debug prints in the
* crashing method. Since it's probably in the ASM library, you'll need to use your IDE /
* debugger to inject those prints. The way to do this in Eclipse is to set a "conditional
* breakpoint" then have the condition print the value and return false, so that execution
* continues. Sadly, this will still slow execution down considerably, so you'll want to set
* some other conditional breakpoint to catch when the troublesome passage is being translated.
* Probably the most helpful thing to print is the bytecode offset of each basic block ASM is
* processing as it computes the frames. Once it crashes, look at the last couple of bytecode
* offsets in the dumped classfile.
*/
public static final long EXCLUDE_MAXS = -1L;
/**
* The JIT emulator's configuration
*/
private final JitConfiguration config;
/**
* Construct a p-code to bytecode translator.
*
* <p>
* In general, this should only be used by the JIT emulator and its test suite.
*
* @param config the configuration
*/
public JitCompiler(JitConfiguration config) {
this.config = config;
}
/**
* Translate a passage using the given lookup
*
* @param lookup a lookup that can access everything the passage may need, e.g., userop
* libraries. Likely, this should come from the emulator, which may be in a script.
* If you are unsure what to use here, use {@link MethodHandles#lookup()}. If you see
* errors about accessing stuff during the compilation, ensure everything the
* emulator needs is accessible from the method calling
* {@link MethodHandles#lookup()}.
* @param passage the decoded passage to compile
* @return the compiled class, not instantiated for any particular thread
*/
public JitCompiledPassageClass compilePassage(Lookup lookup, JitPassage passage) {
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_PASSAGE)) {
System.err.println(passage);
}
JitAnalysisContext context = new JitAnalysisContext(config, passage);
JitControlFlowModel cfm = new JitControlFlowModel(context);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_CFM)) {
cfm.dumpResult();
}
JitDataFlowModel dfm = new JitDataFlowModel(context, cfm);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_DFM)) {
dfm.dumpResult();
}
JitVarScopeModel vsm = new JitVarScopeModel(cfm, dfm);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_VSM)) {
vsm.dumpResult();
}
JitTypeModel tm = new JitTypeModel(dfm);
JitAllocationModel am = new JitAllocationModel(context, dfm, vsm, tm);
JitOpUseModel oum = new JitOpUseModel(context, cfm, dfm, vsm);
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_SYNTH)) {
dfm.dumpSynth();
}
if (ENABLE_DIAGNOSTICS.contains(Diag.PRINT_OUM)) {
oum.dumpResult();
}
JitCodeGenerator gen = new JitCodeGenerator(lookup, context, cfm, dfm, vsm, tm, am, oum);
return gen.load();
}
/**
* Get this compiler's configuration
*
* @return the configuration
*/
public JitConfiguration getConfiguration() {
return config;
}
}

View file

@ -0,0 +1,52 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
/**
* The configuration for a JIT-accelerated emulator.
*
* @param maxPassageInstructions The (soft) maximum number of instructions to decode per translated
* passage. A passage can consist of several control-flow connected basic blocks. The
* decoder will decode contiguous streams of instructions with fall-through (called
* <em>strides</em>), adding seeds where it encounters branches. It will not stop
* mid-stride, but checks the instruction count before proceeding to another seed. If it
* exceeds the max, it stops.
* @param maxPassageOps The (soft) maximum number of p-code ops. This is similar to
* {@link #maxPassageInstructions}, but limits the number of p-code ops generated.
* <b>NOTE:</b> The JVM limits each method to 65,535 total bytes of bytecode. If this
* limit is exceeded, the ASM library throws an exception. When this happens, the
* compiler will retry the whole process, but with this configuration parameter halved.
* @param maxPassageStrides The maximum number of strides to include.
* @param removeUnusedOperations Some p-code ops produce outputs that are never used later. One
* common case is flags computed from arithmetic operations. If this option is enabled,
* the JIT compiler will remove those p-code ops.
* @param emitCounters Causes the translator to emit a call to
* {@link JitPcodeThread#count(int, int)} at the start of each basic block.
*/
public record JitConfiguration(
int maxPassageInstructions,
int maxPassageOps,
int maxPassageStrides,
boolean removeUnusedOperations,
boolean emitCounters) {
/**
* Construct a default configuration
*/
public JitConfiguration() {
this(1000, 5000, 10, true, true);
}
}

View file

@ -0,0 +1,66 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState;
import ghidra.pcode.exec.BytesPcodeArithmetic;
import ghidra.pcode.exec.DefaultPcodeExecutorState;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
/**
* The default implementation of {@link JitBytesPcodeExecutorState}.
*
* <p>
* <b>NOTE</b>: This is distinct from {@link JitDataFlowState}, which is used during the
* interpretation and analysis of the passage to translate. This state, in contrast, is the concrete
* state of the emulation target, but accessible in special ways to the translation output. In
* particular, the constructor of each translation is permitted direct access to some of this
* state's internals, so that it can pre-fetch, e.g., backing arrays for direct memory access
* operations.
*
* <p>
* This is just an extension of {@link DefaultPcodeExecutorState} that wraps the corresponding
* {@link JitBytesPcodeExecutorStatePiece}.
*/
public class JitDefaultBytesPcodeExecutorState extends DefaultPcodeExecutorState<byte[]>
implements JitBytesPcodeExecutorState {
/**
* Construct a new state for the given language
*
* @param language the emulation target language
*/
public JitDefaultBytesPcodeExecutorState(Language language) {
super(new JitBytesPcodeExecutorStatePiece(language),
BytesPcodeArithmetic.forLanguage(language));
}
/**
* Get the piece cast to the type we know it is
*
* @return the piece
*/
protected JitBytesPcodeExecutorStatePiece getPiece() {
return (JitBytesPcodeExecutorStatePiece) this.piece;
}
@Override
public JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space) {
return getPiece().getForSpace(space, true);
}
}

View file

@ -0,0 +1,119 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.lang.reflect.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.reflect.TypeLiteral;
import org.objectweb.asm.ClassVisitor;
/**
* Some utilities for generating type signatures, suitable for use with
* {@link ClassVisitor#visitField(int, String, String, String, Object)}.
*
* <p>
* <b>WARNING:</b> It seems to me, the internal representation of signatures as accepted by the ASM
* API is not fixed from version to version. In the future, these utilities may need to be updated
* to work with multiple versions, if the representation changes in a newer classfile format.
* Hopefully, the upcoming classfile API will obviate the need for any of this.
*/
public enum JitJvmTypeUtils {
;
/**
* Get the internal name of a class as in {@link org.objectweb.asm.Type#getInternalName(Class)}.
*
* @param cls the class
* @return the internal name
*/
public static String classToInternalName(Class<?> cls) {
return org.objectweb.asm.Type.getInternalName(cls);
}
/**
* Presume the given type is a {@link Class} and get its internal name
*
* @param type the type
* @return the internal name
*/
public static String rawToInternalName(Type type) {
return classToInternalName((Class<?>) type);
}
/**
* Get the signature of the given wildcard type
*
* <ul>
* <li>{@code sig(?) = *}</li>
* <li>{@code sig(? super MyType) = -sig(MyType)}</li>
* <li>{@code sig(? extends MyType) = +sig(MyType)}</li>
* </ul>
*
* @param wt the type
* @return the signature
*/
public static String wildToSignature(WildcardType wt) {
Type lower = wt.getLowerBounds().length == 0 ? null : wt.getLowerBounds()[0];
Type upper = wt.getUpperBounds()[0];
if (lower == null && upper == Object.class) {
return "*";
}
if (lower == null) {
return "+" + typeToSignature(upper);
}
if (upper == Object.class) {
return "-" + typeToSignature(lower);
}
throw new UnsupportedOperationException();
}
/**
* Get the signature of the given type
*
* <p>
* For the use case this supports, probably the best way to obtain a {@link Type} is via
* {@link TypeLiteral}.
*
* <p>
* As of the JVM 21, internal type signatures are derived as:
*
* <ul>
* <li>{@code sig(my.MyType) = Lmy/MyType.class;}</li>
* <li>{@code sig(my.MyType[]) = [sig(my.MyType)}</li>
* <li>{@code sig(my.MyType<Yet, Another, ...>) = Lmy/MyType<sig(Yet), sig(Another), ...>;}</li>
* <li>Wildcard types as in {@link #wildToSignature(WildcardType)}</li>
* <li>Type variables are not supported by these utilities</li>
* </ul>
*
* @param type the type
* @return the signature
*/
public static String typeToSignature(Type type) {
return switch (type) {
case Class<?> cls -> "L" + classToInternalName(cls) + ";";
case GenericArrayType arr -> "[" + typeToSignature(arr.getGenericComponentType());
case ParameterizedType pt -> "L" + rawToInternalName(pt.getRawType()) + "<" +
Stream.of(pt.getActualTypeArguments())
.map(a -> typeToSignature(a))
.collect(Collectors.joining(",")) +
">;";
case WildcardType wt -> wildToSignature(wt);
default -> throw new UnsupportedOperationException();
};
}
}

View file

@ -0,0 +1,849 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.math.BigInteger;
import java.util.*;
import java.util.stream.Collectors;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockSplitter;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitDataFlowModel;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.op.OpGen;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOverflowException;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.ContextChangeException;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.mem.ByteMemBufferImpl;
import ghidra.program.model.pcode.*;
import ghidra.program.util.ProgramContextImpl;
/**
* A selection of instructions decoded from an emulation target, the generated p-code ops, and
* associated metadata.
*
* <p>
* Note that the generated p-code ops include those injected by the emulator's client using
* {@link PcodeMachine#inject(Address, String)} and {@link PcodeThread#inject(Address, String)},
* which also includes breakpoints, i.e, {@link PcodeMachine#addBreakpoint(Address, String)}.
*
* @see JitPassageDecoder Passage decoding
*/
public class JitPassage extends PcodeProgram {
/**
* Check if a given p-code op could fall through
*
* <p>
* Conditional branches and non-branching ops are the only ones that can fall through. Note that
* for JIT purposes, a {@link PcodeOp#CALL CALL} op <em>does not</em> fall through! For
* decompilation, it hints that it's branching to a subroutine that <em>usually</em> returns
* back to the caller, but the JIT compiler does not take that hint. 1) There's no guarantee it
* will actually return. 2) Even if it did, it would be via a {@link PcodeOp#RETURN}, which is
* an <em>indirect</em> branch. An indirect branch is not sufficient to join two strides in the
* same passage. Thus, we have little to gain by falling through a call, and the more likely
* outcome is the JIT and/or ASM library will eliminate the code following the call.
*
* @param op the op to consider
* @return true if the op does or could fall through
*/
public static boolean hasFallthrough(PcodeOp op) {
if (op instanceof NopPcodeOp) {
return true;
}
return switch (op.getOpcode()) {
case PcodeOp.BRANCH, PcodeOp.BRANCHIND -> false;
case PcodeOp.CALL, PcodeOp.CALLIND, PcodeOp.RETURN -> false;
case PcodeOp.UNIMPLEMENTED -> false;
case PcodeOp.CBRANCH -> true;
default -> true;
};
}
/**
* An address-context pair
*
* <p>
* Because decode is sensitive to the contextreg value, we have to consider that visiting the
* same address with a different context could produce a completely different stride. Thus, we
* subsume the context value in a sense as part of the address when seeding the passage decoder,
* when referring to the "location" of p-code ops, when exiting a translated passage, etc.
*/
public static final class AddrCtx implements Comparable<AddrCtx> {
/**
* An address-context pair for synthetic p-code ops
*
* <p>
* This is currently used in probing an instruction (possibly instrumented) for fall
* through, and in testing.
*/
public static final AddrCtx NOWHERE = new AddrCtx(null, Address.NO_ADDRESS);
/**
* Derive the address-context pair from an instruction's context
*
* @param insCtx the context
* @return the address and input decode context of the instruction whose context was given
*/
public static AddrCtx fromInstructionContext(InstructionContext insCtx) {
return new AddrCtx(getInCtx(insCtx), insCtx.getAddress());
}
/**
* Derive the address-context pair from an instruction
*
* @param instruction the instruction
* @return the instruction's address and input decode context
*/
public static AddrCtx fromInstruction(Instruction instruction) {
return fromInstructionContext(instruction.getInstructionContext());
}
/**
* The contextreg value as a big integer
*
* <p>
* This is 0 when the language does not have a context register
*/
public final BigInteger biCtx;
/**
* The contextreg as a register value
*
* <p>
* This is {@code null} when the language does not have a context register
*/
public final RegisterValue rvCtx;
/**
* The address
*/
public final Address address;
/**
* Construct an address-context pair
*
* @param ctx the contextreg value
* @param address the address
*/
public AddrCtx(RegisterValue ctx, Address address) {
this.biCtx = ctx == null ? BigInteger.ZERO : ctx.getUnsignedValue();
this.rvCtx = ctx;
this.address = Objects.requireNonNull(address);
}
@Override
public String toString() {
return "AddrCtx[ctx=%s,addr=%s]".formatted(rvCtx, address);
}
@Override
public int hashCode() {
return Objects.hash(biCtx, address);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof AddrCtx that)) {
return false;
}
return this.biCtx.equals(that.biCtx) &&
this.address.equals(that.address);
}
@Override
public int compareTo(AddrCtx that) {
int c;
c = this.biCtx.compareTo(that.biCtx);
if (c != 0) {
return c;
}
c = this.address.compareTo(that.address);
if (c != 0) {
return c;
}
return 0;
}
}
/**
* Derive the decode context value from the given instruction context
*
* @param insCtx the context
* @return the input decode context from the instruction whose context was given
*/
protected static RegisterValue getInCtx(InstructionContext insCtx) {
ProcessorContextView procCtx = insCtx.getProcessorContext();
Register contextreg = procCtx.getBaseContextRegister();
if (contextreg == Register.NO_CONTEXT) {
return null;
}
return procCtx.getRegisterValue(contextreg);
}
/**
* Derive the decode context value from the given instruction
*
* @param instruction the instruction
* @return the input decode context from the instruction
*/
protected static RegisterValue getInCtx(Instruction instruction) {
return getInCtx(instruction.getInstructionContext());
}
/**
* A branch in the p-code
*/
public interface Branch {
/**
* The op performing the branch
*
* @return the "from" op
*/
PcodeOp from();
/**
* Indicates whether this branch represents a fall-through case.
*
* <p>
* Note that the {@link #from()} may not be an actual branching p-code op when
* {@code isFall} is true. A "fall-through" branch happens in two cases. First, and most
* obvious, is to describe the fall-through case of a {@link PcodeOp#CBRANCH conditional
* branch}. Second is when for a p-code op the immediately precedes the target of some other
* branch. That branch causes a split in basic blocks, and so to encode the fall through
* from that op into the basic block immediately after, a fall-through branch is added.
*
* @return true if this branch is the fall-through case.
*/
default boolean isFall() {
return false;
}
/**
* Get a string description of the branch target
*
* @return the description
*/
default String describeTo() {
return toString();
}
}
/**
* A branch to another p-code op in the same passage
*
* <p>
* The {@link JitCodeGenerator} translates internal branches into JVM bytecodes for the
* equivalent branch to the translation of the target p-code op. Thus, we remain executing
* inside the {@link JitCompiledPassage#run(int) run} method. This branch type incurs the least
* run-time cost.
*
* @param from see {@link #from()}
* @param to the target p-code op
* @param isFall see {@link #isFall()}
*/
public record IntBranch(PcodeOp from, PcodeOp to, boolean isFall) implements Branch {}
/**
* A branch to an address (and context value) not in the same passage
*
* <p>
* When execution encounters this branch, the {@link JitCompiledPassage#run(int) run} method
* sets the emulator's program counter and context to the {@link #to() branch target} and
* returns the appropriate entry point for further execution.
*
* Note that this branch type is used by the decoder to track queued decode seeds as well.
* External branches that get decoded are changed into internal branches.
*
* @param from see {@link #from()}
* @param to the target address-context pair
*/
public record ExtBranch(PcodeOp from, AddrCtx to) implements Branch {}
/**
* A branch to a dynamic address
*
* <p>
* When execution encounters this branch, the {@link JitCompiledPassage#run(int) run} method
* will set the emulator's program counter to the computed address and its context to
* {@link #flowCtx()}, then return the appropriate entry point for further execution.
*
* <p>
* TODO: Some analysis may be possible to narrow the possible addresses to a known few and then
* treat this as several {@link IntBranch}es; however, I worry this is too expensive for what it
* gets us. This will be necessary if we are to JIT, e.g., a switch table.
*
* @param from see {@link #from()}
* @param flowCtx the decode context after the branch is taken
*/
public record IndBranch(PcodeOp from, RegisterValue flowCtx) implements Branch {}
/**
* A "branch" representing an error
*
* <p>
* When execution encounters this branch, the {@link JitCompiledPassage#run(int) run} method
* throws an exception. This branch is used to encode error conditions that may not actually be
* encountered at run time. Some cases are:
*
* <ul>
* <li>An instruction decode error &mdash; synthesized as a {@link DecodeErrorPcodeOp}</li>
* <li>An {@link PcodeOp#UNIMPLEMENTED unimplemented} instruction</li>
* <li>A {@link PcodeOp#CALLOTHER call} to an undefined userop</li>
* </ul>
*
* <p>
* The decoder and translator may encounter such an error, but unless execution actually reaches
* the error, the emulator need not crash. Thus, we note the error and generate code that will
* actually throw it in the translation, only if it's actually encountered.
*
* <p>
* Note that the {@link OpGen} for the specific p-code op generating the error will decide what
* exception type to throw.
*
* @param from see {@link #from()}
* @param message the error message for the exception
*/
public record ErrBranch(PcodeOp from, String message) implements Branch {}
/**
* An extension of {@link PcodeOp} that carries along with it the address and decode context
* where it occurred.
*
* <p>
* There is a difference between {@link #at}'s {@link AddrCtx#address address} vs.
* {@link #getSeqnum() seqnum}'s {@link SequenceNumber#getTarget() target}. The former is
* determined by the {@link JitPassageDecoder} and applied to all p-code ops generated at that
* address (and context value), including those from injected Sleigh. The latter is determined
* by the {@link Instruction} (or injected {@link PcodeProgram}), which have less information
* about their origins. There are also {@link DecodeErrorPcodeOp} and {@link NopPcodeOp}, which
* are synthesized by the {@link JitPassageDecoder} without an instruction or inject. This
* information is required for bookkeeping, esp., when updating the emulator's program counter
* and decode context when a p-code op produces an unexpected run-time error.
*/
public static class DecodedPcodeOp extends PcodeOp {
private final AddrCtx at;
/**
* Construct a new p-code op, decoded by the {@link JitPassageDecoder}
*
* @param at the address and context value where the op was produced
* @param seqnum the p-code op sequence number
* @param opcode the p-code opcode
* @param inputs the input varnodes
* @param output the output varnode, or {@link null} if none or not applicable
*/
DecodedPcodeOp(AddrCtx at, SequenceNumber seqnum, int opcode, Varnode[] inputs,
Varnode output) {
super(seqnum, opcode, inputs, output);
this.at = at;
}
/**
* Re-write a p-code op including its address and context value
*
* <p>
* Aside from {@link #at}, everything is copied from the given original p-code op.
*
* @param at the address and context value where the op was produced
* @param original the original p-code op
*/
public DecodedPcodeOp(AddrCtx at, PcodeOp original) {
this(at, original.getSeqnum(), original.getOpcode(), original.getInputs(),
original.getOutput());
}
/**
* Get the address and context value where this op was produced
*
* @return the address-context pair
*/
public AddrCtx getAt() {
return at;
}
/**
* Get the address where this op was produced
*
* @return the address
*/
public Address getCounter() {
return at.address;
}
/**
* Get the decode context where this op was produced
*
* @return the decode context
*/
public RegisterValue getContext() {
return at.rvCtx;
}
/**
* Check if this op represents the start of an instruction
*
* <p>
* If this p-code op was produced by an inject, this will return false! It only returns true
* for an op that is genuinely the first op in the result of {@link Instruction#getPcode()}.
* <b>WARNING:</b> This should <em>not</em> be used for branching purposes, because branches
* to a given address are meant to target any injections there, too. Currently, this is used
* only to count the number of instructions actually executed.
*
* @see JitBlock#instructionCount()
* @see JitCompiledPassage#count(int, int)
* @see JitPcodeThread#count(int, int)
* @return true if this op is the first of an instruction
*/
public boolean isInstructionStart() {
SequenceNumber seq = getSeqnum();
return seq.getTime() == 0 && seq.getTarget().equals(at.address);
}
}
/**
* A synthetic p-code op that represents a return from the {@link JitCompiledPassage#run(int)}
* method.
*
* <p>
* When execution encounters this op (and the corresponding {@link ExtBranch}), the emulator's
* program counter and context values are set to the {@link ExtBranch#to() branch target}, and
* the appropriate entry point is returned.
*
* <p>
* This is used in a few ways: The simplest, though perhaps not obvious, way is when the decoder
* encounters an existing entry point. We avoid re-translating the same instructions by forcing
* the stride to end. However, the last instruction in that stride would have fall through,
* causing dangling control flow. To mitigate that, we append a synthetic exit op to return the
* existing entry point. The emulator can then resume execution accordingly.
*
* <p>
* The next is even less obvious. When the emulation client (or user) injects Sleigh, a common
* mistake is to forget control flow. The decoder detects this when "falling through" does not
* actually advance the program counter. In this case, we append this synthetic op to exit the
* translated passage. While it still results in an endless loop (just like the
* interpretation-based emulator), it's easier to interrupt and diagnose when we exit the
* translation between each "iteration."
*
* <p>
* The last is a small hack: The decoder needs to know whether each instruction (possibly
* instrumented by an inject) falls through. To do this, it appends an exit op to the very end
* of the instruction's (and inject's) ops and performs rudimentary control flow analysis (see
* {@link BlockSplitter}). It then seeks a path from start to exit. If one is found, it has fall
* through. This "probe" op is <em>not</em> included in the decoded stride.
*
*/
public static class ExitPcodeOp extends PcodeOp {
/**
* Construct a synthetic exit op
*
* @param at the address and context value to set on the emulator when exiting the
* {@link JitCompiledPassage#run(int)} method
*/
public ExitPcodeOp(AddrCtx at) {
super(new SequenceNumber(at.address, 0), PcodeOp.BRANCH, new Varnode[] {
new Varnode(at.address, 0) }, null);
}
}
/**
* A synthetic op representing the initial seed of a decoded passage.
*
* <p>
* Because we use a queue of {@link ExtBranch}es as the seed queue, and the initial seed has no
* real {@link Branch#from()}, we synthesize a {@link PcodeOp#BRANCH branch op} from the entry
* address to itself. This synthetic op is <em>not</em> included in the decoded stride.
*/
public static class EntryPcodeOp extends PcodeOp {
/**
* Construct the passage entry p-code op.
*
* @param entry the target address and decode context of the passage seed
*/
public EntryPcodeOp(AddrCtx entry) {
super(Address.NO_ADDRESS, 0, PcodeOp.BRANCH, new Varnode[] {
new Varnode(entry.address, 0) });
}
}
/**
* A synthetic p-code op meant to encode "no operation"
*
* <p>
* P-code does not have a NOP opcode, because there's usually no reason to produce such. A NOP
* machine instruction just produces an empty list of p-code ops, denoting "no operation."
* However, for bookkeeping purposes in our JIT translator, we occasionally need some op to hold
* an important place, but that op needs to do nothing. We use this in two situations:
*
* <ul>
* <li>An instruction (possibly because of an inject) that does nothing. Yes, essentially a NOP
* machine instruction. Because another op may target this instruction, and {@link Branch}es
* need to target a p-code op, we synthesize a p-code "nop" to hold that position. The
* alternative is to figure out what op immediately follows the branch target, but such an op
* may not have been decoded, yet. It's easier just to synthesize the nop.</li>
* <li>A p-code branch to the end of an instruction. Most often a slaspec author that means to
* skip the remainder of an instruction will use {@code goto inst_next}; however, because of
* sub-table structuring and/or personal preferences, sometimes we see {@code goto <end>;} where
* {@code <end>} is at the end of the instruction, and thus, no p-code op actually follows it.
* We essentially have the same situation and the NOP machine instruction where we can either
* synthesize a placeholder nop, or else we have to figure out what op does (or will) actually
* follow the label.</li>
* </ul>
*/
public static class NopPcodeOp extends DecodedPcodeOp {
/**
* Construct a synthetic p-code "nop"
*
* @param at the address-context pair where the op was generated
* @param seq the sequence where the nop is inserted. For machine-code NOP, this should be
* 0. For a branch to the end of an instruction, this should be the next sequence
* number (so that the branch targets this nop)
*/
public NopPcodeOp(AddrCtx at, int seq) {
super(at, new SequenceNumber(at.address, seq), PcodeOp.UNIMPLEMENTED, new Varnode[] {},
null);
}
}
/**
* A synthetic p-code op denoting a decode error
*
* <p>
* The decoder may encounter several decode errors as it selects and decodes the passage. An
* instruction is selected because the JIT believes it <em>may</em> be executed by the emulator.
* (Predicting this and making good selections is a matter of further research.) Encounting a
* decode error along a possible path is not cause to throw an exception. However; if the
* emulator does in fact attempt to execute the bytes which it can't decode, then we do throw
* the exception. This p-code op is synthesized where such decode errors occur, and the
* translator will generate code that actually throw the exception. Note that the error message
* is placed in the corresponding {@link ErrBranch}.
*/
public static class DecodeErrorPcodeOp extends DecodedPcodeOp {
/**
* Construct a p-code op representing an instruction decode error.
*
* @param at the address and decode context where the error occurred
*/
public DecodeErrorPcodeOp(AddrCtx at) {
super(at, new SequenceNumber(at.address, 0), PcodeOp.UNIMPLEMENTED, new Varnode[] {},
null);
}
}
/**
* An instruction denoting a decode error
*
* <p>
* The Sleigh disassembler normally denotes this with a {@link PseudoInstruction} having an
* {@link InvalidPrototype}. We essentially do the same here, but with custom types that are
* simpler to identify. Additionally, the types contain additional information, e.g., the error
* message. We also need the prototype to produce a single {@link DecodeErrorPcodeOp}.
*/
public static class DecodeErrorInstruction extends PseudoInstruction {
/**
* The prototype for the decode error instruction
*/
static class DecodeErrorPrototype extends InvalidPrototype {
public DecodeErrorPrototype(Language language) {
super(language);
}
@Override
public PcodeOp[] getPcode(InstructionContext context, PcodeOverride override) {
return new PcodeOp[] {
new DecodeErrorPcodeOp(AddrCtx.fromInstructionContext(context)) };
}
}
/**
* An implementation of {@link ProcessorContext} to satisfy the requirements of the
* {@link PseudoInstruction}.
*
* <p>
* This need do little more than provide the decode context register value.
*/
static class DecodeErrorProcessorContext implements ProcessorContext {
private final Language language;
private final RegisterValue ctx;
public DecodeErrorProcessorContext(Language language, RegisterValue ctx) {
this.language = language;
this.ctx = ctx;
}
@Override
public Register getBaseContextRegister() {
return language.getContextBaseRegister();
}
@Override
public List<Register> getRegisters() {
return language.getRegisters();
}
@Override
public Register getRegister(String name) {
return language.getRegister(name);
}
@Override
public BigInteger getValue(Register register, boolean signed) {
if (register == language.getContextBaseRegister()) {
return signed ? ctx.getSignedValue() : ctx.getUnsignedValue();
}
return null;
}
@Override
public RegisterValue getRegisterValue(Register register) {
if (register == language.getContextBaseRegister()) {
return ctx;
}
return null;
}
@Override
public boolean hasValue(Register register) {
return register == language.getContextBaseRegister();
}
@Override
public void setValue(Register register, BigInteger value)
throws ContextChangeException {
}
@Override
public void setRegisterValue(RegisterValue value)
throws ContextChangeException {
}
@Override
public void clearRegister(Register register) throws ContextChangeException {
}
}
private final String message;
/**
* Construct an instruction to indicate a decode error
*
* @param language the emulation target langauge
* @param address the address where decode was attempted
* @param ctx the input decode context
* @param message a message for the {@link DecodePcodeExecutionException} if the emulator
* attempts to execute this instruction
* @throws AddressOverflowException never
*/
public DecodeErrorInstruction(Language language, Address address, RegisterValue ctx,
String message) throws AddressOverflowException {
super(address, new DecodeErrorPrototype(language),
new ByteMemBufferImpl(address, new byte[] { 0 }, language.isBigEndian()),
new DecodeErrorProcessorContext(language, ctx));
this.message = message;
}
/**
* Get the message for the exception, should this instruction be "executed"
*
* @return the error message
*/
public String getMessage() {
return message;
}
}
/**
* Create an instruction to indicate a decode error
*
* <p>
* The resulting instruction will produce a single {@link DecodeErrorPcodeOp}. The translator
* will generate code that throws a {@link DecodePcodeExecutionException} should execution reach
* it.
*
* @param language the emulation target language
* @param address the address where decode was attempted
* @param ctx the input decode context
* @param message a message for the {@link DecodePcodeExecutionException}
* @return the new "instruction"
*/
public static DecodeErrorInstruction decodeError(Language language, Address address,
RegisterValue ctx, String message) {
try {
return new DecodeErrorInstruction(language, address, ctx, message);
}
catch (AddressOverflowException e) {
throw new AssertionError(e);
}
}
private final List<Instruction> instructions;
private final AddrCtx entry;
private final PcodeUseropLibrary<Object> decodeLibrary;
private final Map<PcodeOp, Branch> branches;
private final Map<PcodeOp, AddrCtx> entries;
private final Register contextreg;
private final ProgramContextImpl defaultContext;
/**
* Construct a new passage
*
* @param language the translation source language, i.e., the emulation target language. See
* {@link #getLanguage()}
* @param entry see {@link #getEntry()}
* @param code the p-code ops, grouped by stride. Within each stride, they are ordered as
* decoded and produced by their instructions. The strides are sorted by seed, with
* precedence to the decode context value. See {@link #getInstructions()}. See
* {@link #getCode()}.
* @param decodeLibrary see {@link #getDecodeLibrary()}
* @param instructions see {@link #getInstructions()}
* @param branches see {@link #getBranches()}
* @param entries see {@link #getOpEntry(PcodeOp)}
*/
public JitPassage(SleighLanguage language, AddrCtx entry, List<PcodeOp> code,
PcodeUseropLibrary<Object> decodeLibrary, List<Instruction> instructions,
Map<PcodeOp, Branch> branches, Map<PcodeOp, AddrCtx> entries) {
super(language, code, decodeLibrary.getSymbols(language));
this.entry = entry;
this.decodeLibrary = decodeLibrary;
this.instructions = instructions;
this.branches = branches;
this.entries = entries;
this.contextreg = language.getContextBaseRegister();
if (contextreg != Register.NO_CONTEXT) {
defaultContext = new ProgramContextImpl(language);
language.applyContextSettings(defaultContext);
}
else {
defaultContext = null;
}
}
/**
* Get all of the instructions in the passage.
*
* <p>
* These are grouped by stride. Within each stride, the instructions are listed in decode order.
* The strides are ordered by seed address-context pair, with context value taking precedence.
*
* @return the list of instructions
*/
public List<Instruction> getInstructions() {
return instructions;
}
/**
* {@inheritDoc}
*
* <p>
* Conventionally, the first instruction of the program is the entry. Note this might
* <em>not</em> be the initial seed. If the decoded passage contains a branch to an address
* preceding the seed, and a stride results from it, then that stride's p-code will occur
* earlier in the list. This is not a problem. The code generator will export many entry points,
* and the seed must be among them. "Entering" at that seed is achieved using a switch table at
* the start of the generated bytecode.
*/
@Override
public List<PcodeOp> getCode() {
return super.getCode();
}
/**
* Get the initial seed of this passage.
*
* <p>
* This is informational only. It should be used in naming things and/or in diagnostics.
*
* @return the address-context pair
*/
public AddrCtx getEntry() {
return entry;
}
/**
* Get the userop library that was used during decode of the passage
*
* <p>
* This often wraps the emulator's userop library. Downstream components, namely the
* {@link JitDataFlowModel}, will need this when translating {@link PcodeOp#CALLOTHER calls} to
* userops.
*
* @return the library
*/
public PcodeUseropLibrary<Object> getDecodeLibrary() {
return decodeLibrary;
}
/**
* Get all of the (non-fall-through) branches in the passage
*
* @return the branches, keyed by {@link Branch#from()}.
*/
public Map<PcodeOp, Branch> getBranches() {
return branches;
}
@Override
public String toString() {
return "<" + getClass().getSimpleName() + ":\n " + instructions.stream().map(i -> {
return "(" + getInCtx(i) + ") " + i.getAddressString(false, true) + " " + i.toString();
}).collect(Collectors.joining("\n ")) + "\n>\n" + format(true);
}
/**
* Check if a given p-code op is the first of an instruction.
*
* <p>
* <b>NOTE</b>: If an instruction is at an address with an inject, then the first op produced by
* the inject is considered the "entry" to the instruction. This is to ensure that any control
* flow to the injected address executes the injected code, not just the instruction's code.
*
* @param op the op to check.
* @return the address-context pair that generated the op, if it is the first there, or
* {@code null}
*/
public AddrCtx getOpEntry(PcodeOp op) {
return entries.get(op);
}
/**
* If the given p-code op is known to cause an error, e.g., an unimplemented instruction, get
* the error message.
*
* @param op the p-code op causing the error
* @return the message for the error caused
*/
public String getErrorMessage(PcodeOp op) {
Branch branch = branches.get(op);
return switch (branch) {
case null -> throw new AssertionError("No branch record for op: " + op);
case ErrBranch err -> err.message;
default -> throw new AssertionError("Wrong branch type " + branch + " for op: " + op);
};
}
}

View file

@ -0,0 +1,413 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.lang.invoke.MethodHandles.Lookup;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.objectweb.asm.MethodTooLargeException;
import ghidra.pcode.emu.PcodeEmulator;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.analysis.JitDataFlowModel;
import ghidra.pcode.emu.jit.analysis.JitDataFlowUseropLibrary;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPointPrototype;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassageClass;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.program.model.address.AddressRange;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
import ghidra.program.model.pcode.Varnode;
import ghidra.util.Msg;
/**
* An extension of {@link PcodeEmulator} that applies Just-in-Time (JIT) translation to accelerate
* execution.
*
* <p>
* This is meant as a near drop-in replacement for the class it extends. Aside from some additional
* configuration, and some annotations you might add to a {@link PcodeUseropLibrary}, if applicable,
* you can simply replace {@code new PcodeEmulator()} with {@code new JitPcodeEmulator(...)}.
*
* <h1>A JIT-Accelerated P-code Emulator for the Java Virtual Machine</h1>
*
* <p>
* There are two major tasks to achieving JIT-accelerated p-code emulation: 1) The translation of
* p-code to a suitable target's machine language, and 2) The selection, decoding, and cache
* management of passages of machine code translations. For our purposes, the target language is JVM
* bytecode, which introduces some restrictions which make the translation process substantially
* different than targeting native machine language.
*
* <h2>Terminology</h2>
*
* <p>
* Because of the potential for confusion of terms with similar meanings from similar disciplines,
* and to distinguish our particular use of the terms, we establish some definitions up front:
*
* <ul>
*
* <li><b>Basic block</b>: A block of <em>p-code</em> ops for which there are no branches into or
* from, except at its top and bottom. Note that this definition pertains only to p-code ops in the
* same passage. Branches into a block from ops generated elsewhere in the translation source need
* not be considered. Note also that p-code basic blocks might not coincide with machine-code basic
* blocks.</li>
*
* <li><b>Bytecode</b>: Shorthand for "JVM bytecode." Others sometimes use this to mean any machine
* code, but for us "bytecode" only refers to the JVM's machine code.</li>
*
* <li><b>Decode context</b>: The input contextreg value for decoding an instruction. This is often
* paired with an address to seed passages, identify an instruction's "location," and identify an
* entry point.</li>
*
* <li><b>Emulation host</b>: The machine or environment on which the emulation target is being
* hosted. This is usually also the <b>translation target</b>. For our purposes, this is the JVM,
* often the same JVM executing Ghidra.</li>
*
* <li><b>Emulation target</b>: The machine being emulated. As opposed to the <b>translation
* target</b> or <b>emulation host</b>. While this can include many aspects of a target platform, we
* often just mean the Instruction Set Architecture (ISA, or <b>language</b>) of the machine.</li>
*
* <li><b>Entry point</b>: An address (and contextreg value) by which execution may enter a passage.
* In addition to the decode seed, the translator may expose many entries into a given passage,
* usually at branch targets or the start of each basic block coinciding with an instruction.</li>
*
* <li><b>Instruction</b>: A single machine-code instruction.</li>
*
* <li><b>Machine code</b>: The sequence of bytes and/or decoded instructions executed by a
* machine.</li>
*
* <li><b>Passage</b>: A collection of strides connected by branches. Often each stride begins at
* the target of some branch in another stride.</li>
*
* <li><b>P-code</b>: An intermediate representation used by Ghidra in much of its analysis and
* execution modeling. For our purposes, we mean "low p-code," which is the common language into
* which the source machine code is translated before final translation to bytecode.</li>
*
* <li><b>P-code op</b>: A single p-code operation. A single instruction usually generates several
* p-code ops.</li>
*
* <li><b>Stride</b>: A contiguous sequence of instructions (and their emitted p-code) connected by
* fall-through. Note that conditional branches may appear in the middle of the stride. So long as
* fall-through is possible, the stride may continue.</li>
*
* <li><b>Translation source</b>: The machine code of the <b>emulation target</b> that is being
* translated and subsequently executed by the <b>emulation host</b>.</li>
*
* <li><b>Translation target</b>: The target of the JIT translation, usually the <b>emulation
* host</b>. For our purposes, this is always JVM bytecode.</li>
*
* <li><b>Varnode</b>: The triple (space,offset,size) giving the address and size of a variable in
* the emulation target's machine state. This is distinct from a variable node (see {@link JitVal})
* in the {@link JitDataFlowModel use-def} graph. The name "{@link Varnode}" is an unfortunate
* inheritance from the Ghidra API, where they <em>can</em> represent genuine variable nodes in the
* "high p-code" returned by the decompiler. However, the emulator consumes the "low p-code" where
* varnodes are mere triples, which is how we use the term.</li>
*
* </ul>
*
* <h2>Just-in-Time Translation</h2>
* <p>
* For details of the translation process, see {@link JitCompiler}.
*
* <h2>Translation Cache</h2>
* <p>
* This class, aside from overriding and replacing the state and thread objects with respective
* extensions, manages a part of the translation cache. For reasons discussed in the translation
* section, there are two levels of caching. Once a passage is translated into a classfile, it must
* be loaded as a class and then instantiated for the thread executing it. Thus, at the machine (or
* emulator) level, each translated passage's class is cached. Then, each thread caches its instance
* of that class. When a thread encounters an address (and contextreg value) that it has not yet
* translated, it requests that the emulator perform that translation. The details of this check are
* described in {@link #getEntryPrototype(AddrCtx, JitPassageDecoder)} and
* {@link JitPcodeThread#getEntry(AddrCtx)}.
*/
public class JitPcodeEmulator extends PcodeEmulator {
/**
* The compiler which translates passages into JVM classes
*/
protected final JitCompiler compiler;
/**
* A lookup to access non-public things
*/
private final Lookup lookup;
/**
* This emulator's cache of passage translations, incl. all entry points.
*
* <p>
* TODO: Invalidation of entries. One possible complication is any thread may still have an
* instance of one, and could possibly be executing it. Perhaps this could be a weak hash map,
* and they'll stay alive by virtue of the instances pointing to their classes? Still, we might
* like to impose a total size max, which would have to be implemented among the threads. Other
* reasons we may need to invalidate include:
*
* <ol>
* <li>Self-modifying code (we'll probably want to provide a configuration toggle given how
* expensive that may become).</li>
* <li>Changes to the memory map. At the moment, however, the p-code emulator does not provide a
* memory management unit (MMU).</li>
* <li>Addition of a new inject by the user or script. This one's actually pretty likely. For
* now, we might just document that injects should not be changes once execution starts.</li>
* </ol>
*/
protected final Map<AddrCtx, CompletableFuture<EntryPointPrototype>> codeCache =
new HashMap<>();
/**
* Create a JIT-accelerated p-code emulator
*
* @param language the emulation target langauge
* @param config configuration options for this emulator
* @param lookup a lookup in case the emulator (or its target) needs access to non-public
* elements, e.g., to access a nested {@link PcodeUseropLibrary}.
*/
public JitPcodeEmulator(Language language, JitConfiguration config, Lookup lookup) {
super(language);
this.compiler = new JitCompiler(config);
this.lookup = lookup;
}
@Override
protected PcodeExecutorState<byte[]> createSharedState() {
return new JitDefaultBytesPcodeExecutorState(language);
}
@Override
protected PcodeExecutorState<byte[]> createLocalState(PcodeThread<byte[]> thread) {
return new JitDefaultBytesPcodeExecutorState(language);
}
@Override
protected JitPcodeThread createThread(String name) {
return new JitPcodeThread(name, this);
}
@Override
public JitPcodeThread newThread() {
return (JitPcodeThread) super.newThread();
}
@Override
public JitPcodeThread newThread(String name) {
return (JitPcodeThread) super.newThread(name);
}
/**
* {@inheritDoc}
*
* <p>
* Userops can be optimized by the JIT translator under certain circumstances. To read more, see
* {@link JitDataFlowUseropLibrary}. DO NOT extend that library. The internals use it to wrap
* the library you provide here, but its documentation describes when and how the JIT translator
* optimizes invocations to your userops.
*
* <p>
* <b>WARNING</b>: Userops that accept floating point types via direct invocation should be
* careful that the sizes match exactly. That is, if you pass a {@code float} argument to a
* {@code double} parameter, you may have problems. This <em>does not</em> imply a conversion of
* floating point type. Instead, it will simply zero-fill the upper bits (as if zero-exending an
* integer) and reinterpret the resulting bits as a double. This is almost certainly
* <em>not</em> what you want. Until/unless we resolve this, the userop implementor must accept
* the proper types. It's possible multiple versions of the userop must be provided (overloading
* is not supported) to accept types of various sizes.
*/
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return super.createUseropLibrary();
}
/**
* Check if the emulator already has translated a given entry point.
*
* <p>
* This is used by the decoder to detect if it should end a stride before reaching its natural
* end (i.e., a non-fall-through instruction.) This was a design decision to reduce
* re-translation of the same machine code. Terminating the stride will cause execution to exit
* the translated passage, but it will then immediately enter the existing translated passage.
*
* @param pcCtx the program counter and contextreg value to check
* @return true if the emulator has a translation which can be entered at the given pcCtx.
*/
public boolean hasEntryPrototype(AddrCtx pcCtx) {
/**
* TODO: Investigate ignoring synchronization and instead catching the CME. This would be to
* avoid locking on every instruction decode. If we thing there's no an entry, and there
* turns out we just won a race, it's little loss.
*
* I don't think in the grand scheme of things, this is the most expensive operation of the
* translation. Nevertheless, it'll be hit a lot, so worth investigating.
*/
synchronized (codeCache) {
CompletableFuture<EntryPointPrototype> proto = codeCache.get(pcCtx);
return proto != null && proto.isDone();
}
}
/**
* Translate a new passage starting at the given seed.
*
* <p>
* Note the compiler must provide an entry to the resulting passage at the requested seed. It
* and any additional entry points are placed into the code cache. Each thread executing the
* passage must still create (and ought to cache) an instance of the translation.
*
* @param pcCtx the seed address and contextreg value for decoding and selecting a passage
* @param decoder the passage decoder, provided by the thread
* @return the class that is the translation of the passage, and information about its entry
* points.
*/
protected JitCompiledPassageClass compileWithMaxOpsBackoff(AddrCtx pcCtx,
JitPassageDecoder decoder) {
int maxOps = getConfiguration().maxPassageOps();
while (maxOps > 0) {
JitPassage decoded = decoder.decodePassage(pcCtx, maxOps);
try {
return compiler.compilePassage(lookup, decoded);
}
catch (MethodTooLargeException e) {
Msg.warn(this, "Method too large for " + pcCtx + " with maxOps=" + maxOps +
". Retrying with half.");
maxOps >>= 1;
}
}
/**
* This would be caused by an exceptionally large stride, perhaps with a good bit of
* instrumentation.
*
* TODO: If this happens, we'll need to be willing to stop decoding mid-stride. I think it's
* easily doable, as we already do this when we hit an address with an existing entry point.
*
* NOTE: We still need to treat each instruction, along with any instrumentation on it, as
* an atomic unit. I can't imagine a single instruction maxing out the Java method size,
* though.
*/
throw new AssertionError();
}
/**
* Get the entry prototype for a given address and contextreg value.
*
* <p>
* An <b>entry prototype</b> is a class representing a translated passage and an index
* identifying the point at which to enter the passage. The compiler numbers each entry point it
* generates and provides those indices via a static field in the output class. Those entry
* point indices are entered into the code cache for each translated passage. If no entry point
* exists for the requested address and contextreg value, the emulator will decode and translate
* a new passage at the requested seed.
*
* <p>
* It's a bit odd to take the thread's decoder for a machine-level thing; however, all thread
* decoders ought to have the same behavior. The particular thread's decoder will have better
* cached instruction block state for decoding in the vicinity of its past execution, though.
*
* @param pcCtx the counter and decoder context
* @param decoder the thread's decoder needing this entry point prototype
* @return the entry point prototype
* @see JitPcodeThread#getEntry(AddrCtx)
*/
public EntryPointPrototype getEntryPrototype(AddrCtx pcCtx, JitPassageDecoder decoder) {
/**
* NOTE: It is possible for a race condition, still, if (very likely) the passage provides
* multiple entry points. It's not ideal, but still correct, I think, if this happens.
*/
CompletableFuture<EntryPointPrototype> proto;
boolean wasAbsent;
synchronized (codeCache) {
proto = codeCache.get(pcCtx);
wasAbsent = proto == null;
if (wasAbsent) {
proto = new CompletableFuture<>();
codeCache.put(pcCtx, proto);
// Won't know to put other entry points, yet
}
}
/**
* TODO: I'm not sure it makes sense to do this computation without the lock.
*
* On the one hand, it allows threads to avoid stalling on every translation, and instead
* only on translations for the same entry point. However, if we do keep the lock, then we
* can avoid the race condition on alternative entry points.
*/
if (wasAbsent) {
/**
* Go ahead and use this thread instead of spawning another, because this one can't
* proceed until compilation is completed, anyway.
*/
try {
JitCompiledPassageClass compiled = compileWithMaxOpsBackoff(pcCtx, decoder);
synchronized (codeCache) {
for (Entry<AddrCtx, EntryPointPrototype> ent : compiled.getBlockEntries()
.entrySet()) {
if (ent.getKey().equals(pcCtx)) {
proto.complete(ent.getValue());
}
else {
codeCache.put(ent.getKey(),
CompletableFuture.completedFuture(ent.getValue()));
}
}
}
}
catch (Throwable t) {
proto.completeExceptionally(t);
}
}
try {
return proto.get();
}
catch (InterruptedException e) {
throw new AssertionError(e);
}
catch (ExecutionException e) {
return ExceptionUtils.rethrow(e);
}
}
/**
* Get the configuration for this emulator.
*
* @return the configuration
*/
public JitConfiguration getConfiguration() {
return compiler.getConfiguration();
}
/**
* {@inheritDoc}
*
* <p>
* <b>TODO</b>: The JIT-accelerated emulator does not currently implement access breakpoints.
* Furthermore, because JIT generated code is granted direct access to the emulator's state
* internals, it is not sufficient to override
* {@link PcodeExecutorStatePiece#getVar(AddressSpace, Object, int, boolean, Reason) getVar} and
* related.
*/
@Override
public void addAccessBreakpoint(AddressRange range, AccessKind kind) {
throw new UnsupportedOperationException();
}
}

View file

@ -0,0 +1,278 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import java.util.HashMap;
import java.util.Map;
import ghidra.lifecycle.Internal;
import ghidra.pcode.emu.*;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.decode.JitPassageDecoder;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPoint;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPointPrototype;
import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.ProgramContext;
/**
* A JIT-accelerated thread of p-code emulation
*
* <p>
* This class implements the actual JIT-accelerated execution loop. In contrast to the normal
* per-instruction Fetch-Execute-Store loop inherited from {@link DefaultPcodeThread}, this thread's
* {@link #run()} method implements a per-<em>passage</em> Fetch-Decode-Translate-Execute loop.
*
*
* <h2>Fetch</h2>
* <p>
* The Fetch step involves checking the code cache for an existing translation at the thread's
* current counter and decode context. Cache entries are keyed by <em>passage entry point</em>, that
* is an address (and context reg value, if applicable) within a passage where execution is
* permitted to enter. This typically consists of the passage's seed as well as each branch target
* in the same passage. If one is found, we skip the Decode and Translate steps, and proceed
* directly to Execute.
*
* <h2>Decode</h2>
* <p>
* The Decode step involves decoding and selecting several instructions into a <em>passage</em>. A
* passage may comprise of several instructions connected by control flow. Often it is a few long
* strides of instructions connected by a few branches. The decoder will avoid selecting
* instructions that are already included in an existing translated passage. The reason for this
* complexity is that JVM bytecode cannot be rewritten or patched once loaded. For more details, see
* {@link JitPassageDecoder}.
*
* <h2>Translate</h2>
* <p>
* The Translate step involves translating the selected passage of instructions. The result of this
* translation implements {@link JitCompiledPassage}. For details of this translation process, see
* {@link JitCompiler}. The compiled passage provides a list of its entry points. Each is added to
* the emulator's code cache. Among those should be the seed required by this iteration of the
* execution loop, and so that entry point is chosen.
*
* <h2>Execute</h2>
* <p>
* The chosen entry point is then executed. This step is as simple as invoking the
* {@link EntryPoint#run()} method. This, in turn, invokes {@link JitCompiledPassage#run(int)},
* providing the entry point's index as an argument. The index identifies to the translated passage
* the desired address of entry, and so it jumps directly to the corresponding translation. That
* translation performs all the equivalent operations of the selected instructions, adhering to any
* control flow within. When control flow exits the passage, the method returns, and the loop
* repeats.
*/
public class JitPcodeThread extends BytesPcodeThread {
/**
* This thread's passage decoder, which is based on its {@link #getDecoder() instruction
* decoder}.
*/
protected final JitPassageDecoder passageDecoder;
/**
* This thread's cache of translations instantiated for this thread.
*
* <p>
* As an optimization, the translator generates classes which pre-fetch portions of the thread's
* state. Thus, the class must be instantiated for each particular thread needing to execute it.
*
* <p>
* TODO: Invalidation of entries. There are several reasons an entry may need to be invalidated:
* Expiration, eviction, or perhaps because the {@link EntryPointPrototype} (from the emulator)
* was invalidated.
*/
protected final Map<AddrCtx, EntryPoint> codeCache = new HashMap<>();
/**
* Create a thread
*
* <p>
* This should only be called by the emulator and its test suites.
*
* @param name the name of the thread
* @param machine the machine creating the thread
*/
public JitPcodeThread(String name, JitPcodeEmulator machine) {
super(name, machine);
this.passageDecoder = createPassageDecoder();
}
@Override
protected ThreadPcodeExecutorState<byte[]> createThreadState(
PcodeExecutorState<byte[]> sharedState, PcodeExecutorState<byte[]> localState) {
return new JitThreadBytesPcodeExecutorState((JitDefaultBytesPcodeExecutorState) sharedState,
(JitDefaultBytesPcodeExecutorState) localState);
}
/**
* Create the passage decoder
*
* <p>
* This is an extension point in case the decoder needs to be replaced with a further extension.
*
* @return the new passage decoder
*/
protected JitPassageDecoder createPassageDecoder() {
return new JitPassageDecoder(this);
}
@Override
public JitPcodeEmulator getMachine() {
return (JitPcodeEmulator) super.getMachine();
}
@Override
public JitThreadBytesPcodeExecutorState getState() {
return (JitThreadBytesPcodeExecutorState) super.getState();
}
@Internal
@Override
public PcodeProgram getInject(Address address) {
return super.getInject(address);
}
/**
* An accessor so the passage decoder can retrieve its thread's instruction decoder.
*
* @return the decoder
*/
@Internal
public InstructionDecoder getDecoder() {
return decoder;
}
/**
* An accessor so the passage decoder can query the language's default program context.
*
* @return the context
*/
@Internal
public ProgramContext getDefaultContext() {
return defaultContext;
}
@Override
public void inject(Address address, String source) {
/**
* TODO: Flush code cache? Alternatively, establish some convention where injects cannot be
* changed in the life cycle? I don't like that solution. It is workable, I think, though,
* but the user would have to add state to a library in order to configure/toggle each
* injection.
*
* Is it enough to identify which passages contain the address and just remove those? I
* think, so. The only nuance I can think of is that the inject may change the block
* structure, i.e., new entries are possible, but I don't think that matters terribly. The
* caching algorithm should work that out.
*/
super.inject(address, source);
}
/**
* Check if the <em>emulator</em> has an entry prototype for the given address and contextreg
* value.
*
* <p>
* This simply passes through to the emulator. It does not matter whether or not this thread has
* instantiated the prototype or not. If any thread has caused the emulator to translate the
* given entry, this will return true.
*
* @see JitPcodeEmulator#hasEntryPrototype(AddrCtx)
* @param pcCtx the address and contextreg to check
* @return true if the emulator has a translation which can be entered at the given pcCtx.
*/
public boolean hasEntry(AddrCtx pcCtx) {
return getMachine().hasEntryPrototype(pcCtx);
}
/**
* Get the translated and instantiated entry point for the given address and contextreg value.
*
* <p>
* An <b>entry point</b> is an instance of a class representing a translated passage and an
* index identifying the point at which to enter the passage. In essence, it is an instance of
* an <b>entry prototype</b> for this thread.
*
* <p>
* This will first check the cache for an existing instance. Then, it will delegate to the
* emulator. The emulator will check its cache for an existing translation. If one is found, we
* simply take it and instantiate it for this thread. Otherwise, the emulator translates a new
* passage at the given seed, and we instantiate it for this thread.
*
* @see JitPcodeEmulator#getEntryPrototype(AddrCtx, JitPassageDecoder)
* @param pcCtx the counter and decoder context
* @return the entry point
*/
public EntryPoint getEntry(AddrCtx pcCtx) {
/**
* NOTE: Placeholders are not needed at the thread level, but at the machine level.
*/
return codeCache.computeIfAbsent(pcCtx,
k -> getMachine().getEntryPrototype(k, passageDecoder).createInstance(this));
}
/**
* {@inheritDoc}
*
* <p>
* We override only this method to accelerate execution using JIT translation. Implementing
* single stepping via JIT doesn't make much sense from an efficiency standpoint. However, this
* thread still supports stepping via interpretation (as inherited). Our implementation permits
* mixing the two execution paradigms; however, using JIT after a few single steps will incur
* some waste as the JIT translates an otherwise uncommon entry point. Depending on
* circumstances and the order of operations, the effect of this on overall efficiency may vary
* because of caching.
*/
@Override
public void run() {
setSuspended(false);
if (frame != null) {
finishInstruction();
}
EntryPoint next = null;
while (!isSuspended()) {
if (next == null) {
next = getEntry(new AddrCtx(getContext(), getCounter()));
}
try {
next = next.run();
}
catch (SuspendedPcodeExecutionException e) {
// Cool.
}
}
}
/**
* This is called before each basic block is executed.
*
* <p>
* This gives the thread an opportunity to track and control execution, if desired. It provides
* the number of instructions and additional p-code ops about to be completed. If the counts
* exceed a desired schedule, or if the thread is suspended, this method may throw an exception
* to interrupt execution. This can be toggled in the emulator's configuration.
*
* @see JitConfiguration#emitCounters()
* @param instructions the number of instruction about to be completed
* @param trailingOps the number of ops of a final partial instruction about to be completed. If
* the block does not complete any instruction, this is the number of ops continuing
* in the current (partial) instruction.
*/
public void count(int instructions, int trailingOps) {
if (isSuspended()) {
throw new SuspendedPcodeExecutionException(null, null);
}
}
}

View file

@ -0,0 +1,57 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit;
import ghidra.pcode.emu.ThreadPcodeExecutorState;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.program.model.address.AddressSpace;
/**
* The equivalent to {@link ThreadPcodeExecutorState} that multiplexes shared and local state for
* the JIT-accelerated p-code emulator
*/
public class JitThreadBytesPcodeExecutorState extends ThreadPcodeExecutorState<byte[]>
implements JitBytesPcodeExecutorState {
/**
* Construct a new thread state
*
* @param sharedState the shared portion (e.g., ram space)
* @param localState the local portion (i.e., register, unique spaces)
*/
public JitThreadBytesPcodeExecutorState(JitDefaultBytesPcodeExecutorState sharedState,
JitDefaultBytesPcodeExecutorState localState) {
super(sharedState, localState);
}
@Override
public JitDefaultBytesPcodeExecutorState getSharedState() {
return (JitDefaultBytesPcodeExecutorState) super.getSharedState();
}
@Override
public JitDefaultBytesPcodeExecutorState getLocalState() {
return (JitDefaultBytesPcodeExecutorState) super.getLocalState();
}
@Override
public JitBytesPcodeExecutorStateSpace getForSpace(AddressSpace space) {
if (isThreadLocalSpace(space)) {
return getLocalState().getForSpace(space);
}
return getSharedState().getForSpace(space);
}
}

View file

@ -0,0 +1,944 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import static ghidra.pcode.emu.jit.analysis.JitVarScopeModel.maxAddr;
import static ghidra.pcode.emu.jit.analysis.JitVarScopeModel.overlapsLeft;
import static org.objectweb.asm.Opcodes.*;
import java.math.BigInteger;
import java.util.*;
import java.util.Map.Entry;
import org.apache.commons.collections4.iterators.ReverseListIterator;
import org.objectweb.asm.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.var.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressFactory;
import ghidra.program.model.lang.Endian;
import ghidra.program.model.pcode.Varnode;
/**
* Type variable allocation phase for JIT-accelerated emulation.
*
* <p>
* The implements the Variable Allocation phase of the {@link JitCompiler} using a very simple
* placement and another "voting" algorithm to decide the allocated JVM variable types. We place/map
* variables by their storage varnodes, coalescing them as needed. Coalescing is performed for
* overlapping, but not abutting varnodes. This allocation is anticipated by the
* {@link JitVarScopeModel}, which performs the actual coalescing. Because multiple SSA variables
* will almost certainly occupy the same varnode, we employ another voting system. For example, the
* register {@code RAX} may be re-used many times within a passage. In some cases, it might be used
* to return a floating-point value. In others (and <em>probably</em> more commonly) it will be used
* to return an integral value. The more common case in the passage determines the JVM type of the
* local variable allocated for {@code RAX}. Note that variables which occupy only part of a
* coalesced varnode always vote for a JVM {@code int}, because of the shifting and masking required
* to extract that part.
*
* <p>
* The allocation process is very simple, presuming successful type assignment:
*
* <ol>
* <li>Vote Tabulation</li>
* <li>Index Reservation</li>
* <li>Handler Creation</li>
* </ol>
*
* <h2>Vote Tabulation</h2>
* <p>
* Every SSA variable (excluding constants and memory variables) contributes a vote for the type of
* its allocated local. If the varnode matches exactly, the vote is for the JVM type of the
* variable's assigned p-code type. The type mapping is simple: For integral types, we allocate
* using the smaller JVM type that fits the p-code type. For floating-point types, we allocate using
* the JVM type that exactly matches the p-code type. If the varnode is larger, i.e., because it's
* the result of coalescing, then the vote is for the smaller JVM integer type that fits the full
* varnode. Consider the following p-code:
*
* <pre>
* 1. RAX = FLOAT_ADD RCX, RDX
* 2. EAX = FLOAT_ADD EBX, 0x3f800000:4 # 1.0f
* </pre>
*
* <p>
* Several values and variables are at play here. We tabulate the type assignments and resulting
* votes:
*
* <p>
* <table border="1">
* <tr>
* <th>SSA Var</th>
* <th>Type</th>
* <th>Varnode</th>
* <th>Vote</th>
* </tr>
* <tr>
* <td>{@code RCX}<sub>in</sub></td>
* <td>{@link DoubleJitType#F8 float8}</td>
* <td>{@code RCX}</td>
* <td>{@code double}</td>
* </tr>
* <tr>
* <td>{@code RDX}<sub>in</sub></td>
* <td>{@link DoubleJitType#F8 float8}</td>
* <td>{@code RDX}</td>
* <td>{@code double}</td>
* </tr>
* <tr>
* <td>{@code RAX}<sub>1</sub></td>
* <td>{@link DoubleJitType#F8 float8}</td>
* <td>{@code RAX}</td>
* <td>{@code double}</td>
* </tr>
* <tr>
* <td>{@code EBX}<sub>in</sub></td>
* <td>{@link FloatJitType#F4 float4}</td>
* <td>{@code EBX}</td>
* <td>{@code float}</td>
* </tr>
* <tr>
* <td>{@code 0x3f800000:4}</td>
* <td>{@link FloatJitType#F4 float4}</td>
* </tr>
* <tr>
* <td>{@code EAX}<sub>2</sub></td>
* <td>{@link FloatJitType#F4 float4}</td>
* <td>{@code RAX}</td>
* <td>{@code long}</td>
* </tr>
* </table>
*
* <p>
* The registers {@code RCX}, {@code RDX}, and {@code EBX} are trivially allocated as locals of JVM
* types {@code double}, {@code double}, and {@code float}, respectively. It is also worth noting
* that {@code 0x3f800000} is allocated as a {@code float} constant in the classfile's constant
* pool. Now, we consider {@code RAX}. The varnodes for {@code RAX}<sub>1</sub> and
* {@code EAX}<sub>2</sub> are coalesced to {@code RAX}. {@code RAX}<sub>1</sub> casts its vote for
* {@code double}; whereas, {@code EAX}<sub>2</sub> casts its vote for {@code long}. This is because
* placing {@code EAX}<sub>2</sub>'s value into the larger varnode requires bitwise operators, which
* on the JVM, require integer operands. Thus the votes result in a tie, and favoring integral
* types, we allocate {@code RAX} in a JVM {@code long}.
*
* <h2>Index Reservation</h2>
* <p>
* After all the votes have been tabulated, we go through the results in address order, reserving
* JVM local indices and assigning types. Note that we must reserve two indices for every variable
* of type {@code long} or {@code double}, as specific by the JVM. Each of these reservations is
* tracked in a {@link JvmLocal}. Note that index 0 is already reserved by the JVM for the
* {@code this} ref, so we start our counting at 1. Also, some portions of the code generator may
* need to allocate additional temporary locals, so we must allow access to the next free index
* after all reservations are complete.
*
* <h2>Handler Creation</h2>
* <p>
* This actually extends a little beyond allocation, but this is a suitable place for it: All SSA
* values are assigned a handler, including constants and memory variables. Variables which access
* the same varnode get the same handler. For varnodes that are allocated in a JVM local, we create
* a handler that generates loads and stores to that local, e.g., {@link Opcodes#ILOAD iload}. For
* constant varnodes, we create a handler that generates {@link Opcodes#LDC ldc} instructions. For
* memory varnodes, we create a handler that generates a sequence of method invocations on the
* {@link JitBytesPcodeExecutorState state}. The code generator will delegate to these handlers in
* order to generate reads and writes of the corresponding variables, as well as to prepare any
* resources to facilitate access, e.g., pre-fetching items from the
* {@link JitBytesPcodeExecutorState state} in the generated constructor.
*
* @implNote There are many artifacts below that anticipate supporting p-code types greater than 8
* bytes in size. One method to support that is to allocate multiple JVM locals per p-code
* varnode. Consider a 16-byte (128-bit) integer. We could allocate 4 JVM {@code int}
* locals and then emit bytecode that performs the gradeschool-style arithmetic. I suspect
* this would perform better than just using refs to {@link BigInteger}, because it avoids
* heap pollution, and also may avoid some unnecessary arithmetic, esp., for the more
* significant portions that get dropped.
* @implNote <b>TODO</b>: It would be nice to detect varnode re-use under a different type and
* generate the appropriate declarations and handlers. This doesn't seem terribly complex,
* and it stands to spare us some casts. What's not clear is whether this offers any real
* run-time benefit.
*/
public class JitAllocationModel {
/**
* An allocated JVM local
*
* @param index the index reserved for this local
* @param name the human-readable name for this local
* @param type a type for this local
* @param vn the varnode whose value this local holds
*/
public record JvmLocal(int index, String name, SimpleJitType type, Varnode vn) {
/**
* Emit bytecode into the class constructor.
*
* @param gen the code generator
* @param iv the visitor for the class constructor
*/
public void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
VarGen.generateValInitCode(gen, vn);
}
/**
* Emit bytecode at the top of the {@link JitCompiledPassage#run(int) run} method.
*
* <p>
* This will declare all of the allocated locals for the entirety of the method.
*
* @param gen the code generator
* @param start a label at the top of the method
* @param end a label at the end of the method
* @param rv the visitor for the run method
*/
public void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
rv.visitLocalVariable(name, Type.getDescriptor(type.javaType()), null, start, end,
index);
}
/**
* Emit bytecode to load the varnode's value onto the JVM stack.
*
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateLoadCode(MethodVisitor rv) {
rv.visitVarInsn(type.opcodeLoad(), index);
}
/**
* Emit bytecode to store the value on the JVM stack into the varnode.
*
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateStoreCode(MethodVisitor rv) {
rv.visitVarInsn(type.opcodeStore(), index);
}
/**
* Emit bytecode to bring this varnode into scope.
*
* <p>
* This will copy the value from the {@link JitBytesPcodeExecutorState state} into the local
* variable.
*
* @param gen the code generator
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateBirthCode(JitCodeGenerator gen, MethodVisitor rv) {
VarGen.generateValReadCodeDirect(gen, type, vn, rv);
generateStoreCode(rv);
}
/**
* Emit bytecode to take this varnode out of scope.
*
* <p>
* This will copy the value from the local variable into the
* {@link JitBytesPcodeExecutorState state}.
*
* @param gen the code generator
* @param rv the visitor for the {@link JitCompiledPassage#run(int)} method
*/
public void generateRetireCode(JitCodeGenerator gen, MethodVisitor rv) {
generateLoadCode(rv);
VarGen.generateValWriteCodeDirect(gen, type, vn, rv);
}
}
/**
* A handler that knows how to load and store variable values onto and from the JVM stack.
*/
public interface VarHandler {
/**
* Get the p-code type of the variable this handler handles.
*
* @return the type
*/
JitType type();
/**
* Emit bytecode into the class constructor.
*
* @param gen the code generator
* @param iv the visitor for the class constructor
*/
void generateInitCode(JitCodeGenerator gen, MethodVisitor iv);
/**
* If needed, emit bytecode at the top of the {@link JitCompiledPassage#run(int) run}
* method.
*
* @param gen the code generator
* @param start a label at the top of the method
* @param end a label at the end of the method
* @param rv the visitor for the run method
*/
void generateDeclCode(JitCodeGenerator gen, Label start, Label end, MethodVisitor rv);
/**
* Emit bytecode to load the varnode's value onto the JVM stack.
*
* @param gen the code generator
* @param type the p-code type of the value expected on the JVM stack by the proceeding
* bytecode
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv);
/**
* Emit bytecode to load the varnode's value onto the JVM stack.
*
* @param gen the code generator
* @param type the p-code type of the value produced on the JVM stack by the preceding
* bytecode
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv);
}
/**
* A handler for p-code variables composed of a single JVM local variable.
*/
public interface OneLocalVarHandler extends VarHandler {
/**
* Get the local variable into which this p-code variable is allocated
*
* @return the local
*/
JvmLocal local();
@Override
default void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
// Generator inits decls directly
}
@Override
default void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
// Generator calls decls directly
}
@Override
default void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
local().generateLoadCode(rv);
TypeConversions.generate(gen, this.type(), type, rv);
}
@Override
default void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
TypeConversions.generate(gen, type, this.type(), rv);
local().generateStoreCode(rv);
}
}
/**
* The handler for a p-code variable allocated in one JVM {@code int}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record IntVarAlloc(JvmLocal local, IntJitType type) implements OneLocalVarHandler {}
/**
* The handler for a p-code variable allocated in one JVM {@code long}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record LongVarAlloc(JvmLocal local, LongJitType type) implements OneLocalVarHandler {}
/**
* The handler for a p-code variable allocated in one JVM {@code float}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record FloatVarAlloc(JvmLocal local, FloatJitType type) implements OneLocalVarHandler {}
/**
* The handler for a p-code variable allocated in one JVM {@code double}.
*
* @param local the JVM local
* @param type the p-code type
*/
public record DoubleVarAlloc(JvmLocal local, DoubleJitType type)
implements OneLocalVarHandler {}
/**
* A portion of a multi-local variable handler.
*
* <p>
* This portion is allocated in a JVM local. When loading with a positive shift, the value is
* shifted to the right to place it into position.
*
* @param local the local variable allocated to this part
* @param shift the number of bytes and direction to shift
*/
public record MultiLocalPart(JvmLocal local, int shift) {
private JitType chooseLargerType(JitType t1, JitType t2) {
return t1.size() > t2.size() ? t1 : t2;
}
/**
* Emit bytecode to load the value from this local and position it in a value on the JVM
* stack.
*
* <p>
* If multiple parts are to be combined, the caller should emit a bitwise or after all loads
* but the first.
*
* @param gen the code generator
* @param type the p-code type of the value expected on the stack by the proceeding
* bytecode, which may be to load additional parts
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*
* @implNote We must keep temporary values in a variable of the larger of the local's or the
* expected type, otherwise bits may get dropped while positioning the value.
*/
public void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
local.generateLoadCode(rv);
JitType tempType = chooseLargerType(local.type, type);
TypeConversions.generate(gen, local.type, tempType, rv);
if (shift > 0) {
switch (tempType) {
case IntJitType t -> {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(IUSHR);
}
case LongJitType t -> {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(LUSHR);
}
default -> throw new AssertionError();
}
}
else if (shift < 0) {
switch (tempType) {
case IntJitType t -> {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(ISHL);
}
case LongJitType t -> {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(LSHL);
}
default -> throw new AssertionError();
}
}
TypeConversions.generate(gen, tempType, type, rv);
}
/**
* Emit bytecode to extract this part from the value on the JVM stack and store it in the
* local variable.
*
* <p>
* If multiple parts are to be stored, the caller should emit a {@link Opcodes#DUP dup} or
* {@link Opcodes#DUP2 dup2} before all stores but the last.
*
* @param gen the code generator
* @param type the p-code type of the value expected on the stack by the proceeding
* bytecode, which may be to load additional parts
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*
* @implNote We must keep temporary values in a variable of the larger of the local's or the
* expected type, otherwise bits may get dropped while positioning the value.
*/
public void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
JitType tempType = chooseLargerType(local.type, type);
TypeConversions.generate(gen, type, tempType, rv);
switch (tempType) {
case IntJitType t -> {
if (shift > 0) {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(ISHL);
}
else if (shift < 0) {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(IUSHR);
}
}
case LongJitType t -> {
if (shift > 0) {
rv.visitLdcInsn(shift * Byte.SIZE);
rv.visitInsn(LSHL);
}
else if (shift < 0) {
rv.visitLdcInsn(-shift * Byte.SIZE);
rv.visitInsn(LUSHR);
}
}
default -> throw new AssertionError();
}
TypeConversions.generate(gen, tempType, local.type, rv);
switch (local.type) {
case IntJitType t -> {
int mask = -1 >>> (Integer.SIZE - Byte.SIZE * type.size());
if (shift > 0) {
mask <<= shift * Byte.SIZE;
}
else {
mask >>>= -shift * Byte.SIZE;
}
rv.visitLdcInsn(mask);
rv.visitInsn(IAND);
local.generateLoadCode(rv);
rv.visitLdcInsn(~mask);
rv.visitInsn(IAND);
rv.visitInsn(IOR);
local.generateStoreCode(rv);
}
case LongJitType t -> {
long mask = -1L >>> (Long.SIZE - Byte.SIZE * type.size());
if (shift > 0) {
mask <<= shift * Byte.SIZE;
}
else {
mask >>>= -shift * Byte.SIZE;
}
rv.visitLdcInsn(mask);
rv.visitInsn(LAND);
local.generateLoadCode(rv);
rv.visitLdcInsn(~mask);
rv.visitInsn(LAND);
rv.visitInsn(LOR);
local.generateStoreCode(rv);
}
default -> throw new AssertionError();
}
}
}
/**
* The handler for a variable allocated in a composition of locals
*
* <p>
* This can also handle a varnode that is a subpiece of a local variable allocated for a larger
* varnode. For example, this may handle {@code EAX}, when we have allocated a {@code long} to
* hold all of {@code RAX}.
*
* @param parts the parts describing how the locals are composed
* @param type the p-code type of the (whole) variable
*/
public record MultiLocalVarHandler(List<MultiLocalPart> parts, JitType type)
implements VarHandler {
@Override
public void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
// Generator calls local inits directly
}
@Override
public void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
// Generator calls local decls directly
}
@Override
public void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
parts.get(0).generateLoadCode(gen, this.type, rv);
for (MultiLocalPart part : parts.subList(1, parts.size())) {
part.generateLoadCode(gen, this.type, rv);
switch (this.type) {
case IntJitType t -> rv.visitInsn(IOR);
case LongJitType t -> rv.visitInsn(LOR);
default -> throw new AssertionError();
}
}
TypeConversions.generate(gen, this.type, type, rv);
}
@Override
public void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
TypeConversions.generate(gen, type, this.type, rv);
for (MultiLocalPart part : parts.subList(1, parts.size()).reversed()) {
switch (this.type) {
case IntJitType t -> rv.visitInsn(DUP);
case LongJitType t -> rv.visitInsn(DUP2);
default -> throw new AssertionError();
}
part.generateStoreCode(gen, this.type, rv);
}
parts.get(0).generateStoreCode(gen, this.type, rv);
}
}
/**
* A dummy handler for values/variables that are not allocated in JVM locals
*/
public enum NoHandler implements VarHandler {
/** Singleton */
INSTANCE;
@Override
public JitType type() {
return null;
}
@Override
public void generateInitCode(JitCodeGenerator gen, MethodVisitor iv) {
}
@Override
public void generateDeclCode(JitCodeGenerator gen, Label start, Label end,
MethodVisitor rv) {
}
@Override
public void generateLoadCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
throw new AssertionError();
}
@Override
public void generateStoreCode(JitCodeGenerator gen, JitType type, MethodVisitor rv) {
throw new AssertionError();
}
}
/**
* The descriptor of a p-code variable
*
* <p>
* This is just a logical grouping of a varnode and its assigned p-code type.
*/
private record VarDesc(int spaceId, long offset, int size, JitType type) {
/**
* Create a descriptor from the given varnode and type
*
* @param vn the varnode
* @param type the p-code type
* @return the descriptor
*/
static VarDesc fromVarnode(Varnode vn, JitType type) {
return new VarDesc(vn.getSpace(), vn.getOffset(), vn.getSize(), type);
}
/**
* Derive a name for this variable, to use in the name of allocated local(s)
*
* @return the name
*/
public String name() {
return "s%d_%x_%d_%s".formatted(spaceId, offset, size, type.nm());
}
/**
* Convert this descriptor back to a varnode
*
* @param factory the address factory for the emulation target language
* @return the varnode
*/
public Varnode toVarnode(AddressFactory factory) {
return new Varnode(factory.getAddressSpace(spaceId).getAddress(offset), size);
}
}
private final JitDataFlowModel dfm;
private final JitVarScopeModel vsm;
private final JitTypeModel tm;
private final SleighLanguage language;
private final Endian endian;
private int nextLocal = 2; // 0:this, 1:blockId in run(int blockId)
private final Map<JitVal, VarHandler> handlers = new HashMap<>();
private final Map<Varnode, VarHandler> handlersPerVarnode = new HashMap<>();
private final NavigableMap<Address, JvmLocal> locals = new TreeMap<>();
/**
* Construct the allocation model.
*
* @param context the analysis context
* @param dfm the data flow moel
* @param vsm the variable scope model
* @param tm the type model
*/
public JitAllocationModel(JitAnalysisContext context, JitDataFlowModel dfm,
JitVarScopeModel vsm, JitTypeModel tm) {
this.dfm = dfm;
this.vsm = vsm;
this.tm = tm;
this.endian = context.getEndian();
this.language = context.getLanguage();
allocate();
}
/**
* Reserve (allocate) one local for the given p-code variable
*
* @param name the name of the JVM local
* @param type the p-code type represented by the local
* @param desc the variable's descriptor
* @return the allocated JVM local
*/
private JvmLocal genFreeLocal(String name, SimpleJitType type, VarDesc desc) {
int i = nextLocal;
if (type.javaType() == long.class || type.javaType() == double.class) {
nextLocal += 2;
}
else {
nextLocal += 1;
}
return new JvmLocal(i, name, type, desc.toVarnode(language.getAddressFactory()));
}
/**
* Get the next free local index without reserving it
*
* <p>
* This should be used by operator code generators <em>after</em> all the
* {@link JitBytesPcodeExecutorState state} bypassing local variables have been allocated. The
* variables should be scoped to that operator only, so that the ids used are freed for the next
* operator.
*
* @return the next id
*/
public int nextFreeLocal() {
return nextLocal;
}
/**
* Reserve (allocate) several locals for the given p-code variable
*
* @param name a prefix to name each JVM local
* @param types a p-code type that describes what each local stores
* @param desc the (whole) variable's descriptor
* @return the allocated JVM locals from most to least significant
*/
private List<JvmLocal> genFreeLocals(String name, List<SimpleJitType> types,
VarDesc desc) {
JvmLocal[] result = new JvmLocal[types.size()];
Iterable<SimpleJitType> it = language.isBigEndian()
? types
: () -> new ReverseListIterator<SimpleJitType>(types);
long offset = desc.offset;
int i = 0;
for (SimpleJitType t : it) {
VarDesc d = new VarDesc(desc.spaceId, offset, t.size(), t);
result[i] = genFreeLocal(name + "_" + i, t, d);
offset += t.size();
i++;
}
return List.of(result);
}
/**
* A content for assigning a type to a varnode
*
* <p>
* Because several SSA variables can share one varnode, we let each cast a vote to determine the
* JVM type of the local(s) allocated to it.
*
* @implNote <b>TODO</b>: This type contest could receive more detailed information from the
* type model, but perhaps that's more work than it's worth. I would have to
* communicate all votes, not just the winner....
*/
record TypeContest(Map<JitType, Integer> map) {
/**
* Start a new contest
*/
public TypeContest() {
this(new HashMap<>());
}
/**
* Cast a vote for the given type
*
* @param type the type
*/
public void vote(JitType type) {
map.compute(type.ext(), (t, v) -> v == null ? 1 : v + 1);
}
/**
* Choose the winner, favoring integral types
*
* @return the winning type
*/
public JitType winner() {
int max = map.values().stream().max(Integer::compare).get();
return map.entrySet()
.stream()
.filter(e -> e.getValue() == max)
.map(Map.Entry::getKey)
.sorted(Comparator.comparing(JitType::pref))
.findFirst()
.get();
}
}
private final Map<Varnode, TypeContest> typeContests = new HashMap<>();
/**
* Create a handler for the variable stored by the one given local
*
* @param local the local
* @return the handler
*/
private OneLocalVarHandler createOneLocalHandler(JvmLocal local) {
return switch (local.type) {
case IntJitType t -> new IntVarAlloc(local, t);
case LongJitType t -> new LongVarAlloc(local, t);
case FloatJitType t -> new FloatVarAlloc(local, t);
case DoubleJitType t -> new DoubleVarAlloc(local, t);
default -> throw new AssertionError();
};
}
/**
* Create a handler for a multi-part or subpiece varnode
*
* @param vn the varnode
* @return a handler to access the value of the given varnode, as allocated in one or more
* locals.
*/
private VarHandler createComplicatedHandler(Varnode vn) {
Entry<Address, JvmLocal> leftEntry = locals.floorEntry(vn.getAddress());
assert overlapsLeft(leftEntry.getValue().vn, vn);
Address min = leftEntry.getKey();
NavigableMap<Address, JvmLocal> sub = locals.subMap(min, true, maxAddr(vn), true);
List<MultiLocalPart> parts = new ArrayList<>();
for (JvmLocal local : sub.values()) {
int offset = (int) switch (endian) {
case BIG -> maxAddr(leftEntry.getValue().vn).subtract(maxAddr(vn));
case LITTLE -> vn.getAddress().subtract(leftEntry.getKey());
};
parts.add(new MultiLocalPart(local, offset));
}
return new MultiLocalVarHandler(parts, JitTypeBehavior.INTEGER.type(vn.getSize()));
}
/**
* Get (creating if necessary) the handler for the given variable's varnode.
*
* @param vv the variable
* @return the handler
*/
private VarHandler getOrCreateHandlerForVarnodeVar(JitVarnodeVar vv) {
return handlersPerVarnode.computeIfAbsent(vv.varnode(), vn -> {
JvmLocal oneLocal = locals.get(vn.getAddress());
if (oneLocal != null && oneLocal.vn.equals(vn)) {
return createOneLocalHandler(oneLocal);
}
return createComplicatedHandler(vn);
});
}
/**
* Get (creating if necessary) the handler for the given value
*
* @param v the value
* @return a handler for the value's varnode, if it is a register or unique; otherwise, the
* dummy handler
*/
private VarHandler createHandler(JitVal v) {
if (v instanceof JitConstVal) {
return NoHandler.INSTANCE;
}
if (v instanceof JitMemoryVar) {
return NoHandler.INSTANCE;
}
if (v instanceof JitVarnodeVar vv) {
return getOrCreateHandlerForVarnodeVar(vv);
}
throw new AssertionError();
}
/**
* Perform the actual allocations
*/
private void allocate() {
for (JitVal v : dfm.allValues()) {
if (v instanceof JitVarnodeVar vv && !(v instanceof JitMemoryVar)) {
Varnode vn = vv.varnode();
Varnode coalesced = vsm.getCoalesced(vn);
TypeContest tc = typeContests.computeIfAbsent(coalesced, __ -> new TypeContest());
if (vn.equals(coalesced)) {
tc.vote(tm.typeOf(v));
}
else {
tc.vote(JitTypeBehavior.INTEGER.type(coalesced.getSize()));
}
}
}
for (Map.Entry<Varnode, TypeContest> entry : typeContests.entrySet()
.stream()
.sorted(Comparator.comparing(e -> e.getKey().getAddress()))
.toList()) {
VarDesc desc = VarDesc.fromVarnode(entry.getKey(), entry.getValue().winner());
switch (desc.type()) {
case SimpleJitType t -> {
locals.put(entry.getKey().getAddress(), genFreeLocal(desc.name(), t, desc));
}
case MpIntJitType t -> {
for (JvmLocal leg : genFreeLocals(desc.name(), t.legTypes(), desc)) {
locals.put(leg.vn.getAddress(), leg);
}
}
default -> throw new AssertionError();
}
}
for (JitVal v : dfm.allValuesSorted()) {
handlers.put(v, createHandler(v));
}
}
/**
* Get the handler for the given value (constant or variable in the use-def graph)
*
* @param v the value
* @return the handler
*/
public VarHandler getHandler(JitVal v) {
return handlers.get(v);
}
/**
* Get all of the locals allocated
*
* @return the locals
*/
public Collection<JvmLocal> allLocals() {
return locals.values();
}
/**
* Get all of the locals allocated for the given varnode
*
*
* @implNote This is used by the code generator to birth and retire the local variables, given
* that scope is analyzed in terms of varnodes.
* @param vn the varnode
* @return the locals
*/
public Collection<JvmLocal> localsForVn(Varnode vn) {
Address min = vn.getAddress();
Address floor = locals.floorKey(min);
if (floor != null) {
min = floor;
}
return locals.subMap(min, true, maxAddr(vn), true).values();
}
}

View file

@ -0,0 +1,105 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.program.model.lang.Endian;
import ghidra.program.model.pcode.PcodeOp;
/**
* A collection of state that is shared among several phases of the translation process.
*
* @see JitCompiler
*/
public class JitAnalysisContext {
private final JitConfiguration config;
private final JitPassage passage;
private final SleighLanguage language;
private final Endian endian;
/**
* Construct a new context, starting with the given configuration and source passage
*
* @param config the JIT compiler's configuration
* @param passage the passage selected for translation
*/
public JitAnalysisContext(JitConfiguration config, JitPassage passage) {
this.config = config;
this.passage = passage;
this.language = passage.getLanguage();
this.endian = language.isBigEndian() ? Endian.BIG : Endian.LITTLE;
}
/**
* Get the JIT compiler configuration
*
* @return the configuration
*/
public JitConfiguration getConfiguration() {
return config;
}
/**
* Get the source passage
*
* @return the passage
*/
public JitPassage getPassage() {
return passage;
}
/**
* Get the translation source (i.e., emulation target) language
*
* @return the language
*/
public SleighLanguage getLanguage() {
return language;
}
/**
* Get the endianness of the translation source, i.e., emulation target.
*
* @return the endianness
*/
public Endian getEndian() {
return endian;
}
/**
* Check if the given p-code op is the first of an instruction.
*
* @param op the op to check
* @return the address-context pair
* @see JitPassage#getOpEntry(PcodeOp)
*/
public AddrCtx getOpEntry(PcodeOp op) {
return passage.getOpEntry(op);
}
/**
* Get the error message for a given p-code op
*
* @param op the p-code op generating the error
* @return the message
* @see JitPassage#getErrorMessage(PcodeOp)
*/
public String getErrorMessage(PcodeOp op) {
return passage.getErrorMessage(op);
}
}

View file

@ -0,0 +1,586 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.decode.DecoderForOneStride;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.PcodeProgram;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.SequenceNumber;
/**
* The control flow analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Control Flow Analysis phase of the {@link JitCompiler}. Some rudimentary
* analysis is performed during passage decoding &mdash; note the {@link BlockSplitter} is exported
* for use in {@link DecoderForOneStride}. This is necessary to evaluate whether an instruction
* (especially an inject-instrumented instruction) has fall-through. Without that information, the
* decoder cannot know whether it has reached the end of its stride. Note that the decoder records
* all the branches it encounters and includes them as metadata in the passage. Because branches
* need to record the source and target p-code op, the decoder is well suited. Additionally, it has
* to compute these anyway, and we'd rather avoid duplicative work by this analyzer.
*
* <p>
* The decoded passage contains a good deal of information, but the primary inputs at this point are
* the ordered list of p-code ops and the branches. This model's primary responsibility is to break
* the passage down into basic blocks at the p-code level. Even though the p-code ops have all been
* concatenated together when constructing the passage, we know, by definition, that each stride
* will end with an unconditional branch (or else a synthesized {@link ExitPcodeOp}. Note also that
* {@link JitPassage#getBranches()} only includes the non-fall-through branches, because these are
* all that are recorded by the decoder. Thus, it is also this model's responsibility to create the
* fall-through branches. These will occur to represent the "false" case of any conditional
* branches, and to represent "unconditional fall through."
*
* <p>
* The algorithm for this is fairly straightforward and has been implemented primarily in
* {@link BlockSplitter}. Most everything else in this class is data management and the types
* representing the model.
*
* <p>
* <b>NOTE:</b> It is technically possible for a userop to branch, but this analysis does not
* consider that. Instead, the emulator will decide how to handle those. Conventionally, I'd rather
* a userop <em>never</em> perform control flow. Instead, I'd rather see things like
* <code>pc = my_control_op(); goto [pc];</code>.
*/
public class JitControlFlowModel {
/**
* An exception thrown when control flow might run off the edge of the passage.
*
* <p>
* By definition a passage is a collection of strides, and each stride is terminated by some op
* without fall through (or else a synthesized {@link ExitPcodeOp}. In particular, the last
* stride cannot end in fall through. If it did, there would be no op for it to fall through to.
* While this should never happen, it is easy in the course of development to allow it by
* accident. The control flow analysis can detect this as it finished splitting the passage into
* blocks. If the final block has fall through, the passage is said to have "unterminated flow,"
* and this exception is thrown. We do not wait until execution of the passage to throw this. It
* is thrown during translation, as it represents an assertion failure in the translation
* process. That is, the decoder produced an unsound passage.
*/
public static class UnterminatedFlowException extends IllegalArgumentException {
/**
* Construct the exception
*/
public UnterminatedFlowException() {
super("Final block cannot fall through");
}
}
/**
* A flow from one block to another
*
* <p>
* This is just a wrapper around an {@link IntBranch} that allows us to quickly identify what
* two blocks it connects. Note that to connect two blocks in the passage, the branch must by
* definition be an {@link IntBranch}.
*
* <p>
* If this flow represents entry into the passage, then {@link #from()} and {@link #branch()}
* may be null
*
* @param from the block from which execution flows. In the case of a non-fall-through branch,
* the block should end with the branching p-code op. For conditional fall-through,
* it should end with the {@link PcodeOp#CBRANCH} op. For unconditional fall-through,
* it could end with any op having fall through.
* @param to the block to which execution flows. The block must start with the
* {@link IntBranch#to() target op} of the branch.
* @param branch the branch effecting the flow of execution
*/
public record BlockFlow(JitBlock from, JitBlock to, IntBranch branch) {
/**
* Create an entry flow to the given block
*
* @param to the block to which execution flows
* @return the flow
*/
public static BlockFlow entry(JitBlock to) {
return new BlockFlow(null, to, null);
}
}
/**
* A basic block of p-code
*
* <p>
* This follows the formal definition of a basic block, but at the p-code level. All flows into
* the block enter at its first op, and all flows out of the block exit at its last op. The
* block also contains information about these flows as well as branches out of the passage via
* this block.
*/
public static class JitBlock extends PcodeProgram {
private Map<IntBranch, BlockFlow> flowsFrom = new HashMap<>();
private Map<IntBranch, BlockFlow> flowsTo = new HashMap<>();
private List<IntBranch> branchesFrom = new ArrayList<>();
private List<IntBranch> branchesTo = new ArrayList<>();
private List<Branch> branchesOut = new ArrayList<>();
private final int instructions;
private final int trailingOps;
/**
* Construct a new block
*
* @param program the program (i.e., passage) from which this block is derived
* @param code the subset of ops, in execution order, comprising this block
*/
public JitBlock(PcodeProgram program, List<PcodeOp> code) {
super(program, List.copyOf(code));
int instructions = 0;
int trailingOps = 0;
for (PcodeOp op : code) {
if (op instanceof DecodedPcodeOp dec && dec.isInstructionStart()) {
instructions++;
trailingOps = 0;
}
else if (op instanceof DecodedPcodeOp) {
trailingOps++;
}
}
this.instructions = instructions;
this.trailingOps = trailingOps;
}
@Override
protected String getHead() {
return super.getHead() + "[start=" + start() + "]";
}
@Override
public String toString() {
return getHead();
}
/**
* Get the first p-code op in this block
*
* @return the first p-code op
*/
public PcodeOp first() {
return code.getFirst();
}
/**
* Get the sequence number of the first op
*
* <p>
* This is used for display and testing purposes only.
*
* @return the sequence number
*/
public SequenceNumber start() {
return code.getFirst().getSeqnum();
}
/**
* Get the sequence number of the last op
*
* <p>
* This is used for display and testing purposes only.
*
* @return the sequence number
*/
public SequenceNumber end() {
return code.getLast().getSeqnum();
}
/**
* Convert our collections to immutable ones
*/
private void cook() {
flowsFrom = Collections.unmodifiableMap(flowsFrom);
flowsTo = Collections.unmodifiableMap(flowsTo);
branchesFrom = Collections.unmodifiableList(branchesFrom);
branchesTo = Collections.unmodifiableList(branchesTo);
branchesOut = Collections.unmodifiableList(branchesOut);
}
/**
* Get (internal) flows leaving this block
*
* @return the flows, keyed by branch
*/
public Map<IntBranch, BlockFlow> flowsFrom() {
return flowsFrom;
}
/**
* Get (internal) flows entering this block
*
* @return the flows, keyed by branch
*/
public Map<IntBranch, BlockFlow> flowsTo() {
return flowsTo;
}
/**
* Get internal branches leaving this block
*
* @return the list of branches
*/
public List<IntBranch> branchesFrom() {
return branchesFrom;
}
/**
* Get internal branches entering this block
*
* @return the list of branches
*/
public List<IntBranch> branchesTo() {
return branchesTo;
}
/**
* Get branches leaving the passage from this block
*
* @return the list of branches
*/
public List<Branch> branchesOut() {
return branchesOut;
}
/**
* If this block has fall through, find the block into which it falls
*
* @return the block, or {@code null}
*/
public JitBlock getFallFrom() {
return flowsFrom.values()
.stream()
.filter(f -> f.branch.isFall())
.findAny()
.map(f -> f.to)
.orElse(null);
}
/**
* Check if there is an internal non-fall-through branch to this block
*
* <p>
* This is used by the {@link JitCodeGenerator} to determine whether or not a block's
* bytecode needs to be labeled.
*
* @return true if this block is targeted by a branch
*/
public boolean hasJumpTo() {
return flowsTo.values().stream().anyMatch(f -> !f.branch.isFall());
}
/**
* Get the target block for the given internal branch, assuming it's from this block
*
* @param branch the branch
* @return the target block or null
*/
public JitBlock getTargetBlock(IntBranch branch) {
return flowsFrom.get(branch).to;
}
/**
* Get the number of instructions represented in this block
*
* <p>
* This may get dicey as blocks are not necessarily split on instruction boundaries.
* Nevertheless, we seek to count the number of instructions executed at runtime, so that we
* can replay an execution, step in reverse, etc. What we actually do here is count the
* number of ops which are the first op produced by a decoded instruction.
*
* @see JitCompiledPassage#count(int, int)
* @see JitPcodeThread#count(int, int)
* @return the instruction count
*/
public int instructionCount() {
return instructions;
}
/**
* Get the number of trailing ops in this block
*
* <p>
* It is possible a block represents only partial execution of an instruction. Though
* {@link #instructionCount()} will count this partial instruction, we can tell how far we
* got into it by examining this value. With this, we should be able to replay an execution
* to exactly the same p-code op step.
*
* @return the trailing op count
*/
public int trailingOpCount() {
return trailingOps;
}
}
/**
* A class that splits a sequence of ops and associated branches into basic blocks.
*
* <p>
* This is the kernel of control flow analysis. It first indexes the branches by source and
* target op. Note that only non-fall-through branches are known at this point. Then, it
* traverses the list of ops. A split occurs following an op that is a branch source and/or
* preceding an op that is a branch target. A block is constructed when such a split point is
* encountered. In the case of a branch source, the branch is added to the newly constructed
* block. As traversal proceeds to the next op, it checks if the immediately-preceding block
* should have fall through (conditional or unconditional) by examining its last op. It adds a
* new fall-through branch if so. The end of the p-code op list is presumed a split point. If
* that final block "should have" fall through, an {@link UnterminatedFlowException} is thrown.
*
* <p>
* Once all the splitting is done, we have the blocks and all the branches (internal or
* external) that leave each block. We then compute all the branches (internal) that enter each
* block and the associated flows in both directions.
*/
public static class BlockSplitter {
private final PcodeProgram program;
private final Map<PcodeOp, Branch> branches = new HashMap<>();
private final Map<PcodeOp, IntBranch> branchesByTarget = new HashMap<>();
private final SequencedMap<PcodeOp, JitBlock> blocks = new LinkedHashMap<>();
private List<PcodeOp> partialBlock = new ArrayList<>();
private JitBlock lastBlock = null;
/**
* Construct a new block splitter to process the given program
*
* <p>
* No analysis is performed in the constructor. The client must call
* {@link #addBranches(Collection)} and then {@link #splitBlocks()}.
*
* @param program the program, i.e., list of p-code ops
*/
public BlockSplitter(PcodeProgram program) {
this.program = program;
}
/**
* Notify the splitter of the given branches before analysis
*
* <p>
* The splitter immediately indexes the given branches by source and target op.
*
* @param branches the branches
*/
public void addBranches(Collection<? extends Branch> branches) {
for (Branch b : branches) {
this.branches.put(b.from(), b);
if (b instanceof IntBranch ib) {
this.branchesByTarget.put(ib.to(), ib);
}
}
}
private JitBlock makeBlock() {
if (!partialBlock.isEmpty()) {
lastBlock = new JitBlock(program, partialBlock);
partialBlock = new ArrayList<>();
blocks.put(lastBlock.first(), lastBlock);
return lastBlock;
}
return null;
}
private boolean needsFallthrough(JitBlock block) {
if (block.branchesFrom.isEmpty() && block.branchesOut.isEmpty()) {
return true;
}
if (block.branchesFrom.size() == 1) {
return JitPassage.hasFallthrough(block.branchesFrom.getFirst().from());
}
if (block.branchesOut.size() == 1) {
return JitPassage.hasFallthrough(block.branchesOut.getFirst().from());
}
throw new AssertionError();
}
private void checkForFallthrough(PcodeOp op) {
if (lastBlock == null) {
return;
}
if (needsFallthrough(lastBlock)) {
lastBlock.branchesFrom.add(new IntBranch(lastBlock.getCode().getLast(), op, true));
}
lastBlock = null;
}
private void fillFlows() {
for (JitBlock from : blocks.values()) {
for (Branch branch : from.branchesFrom) {
if (branch instanceof IntBranch ib) {
JitBlock to = Objects.requireNonNull(blocks.get(ib.to()));
to.branchesTo.add(ib);
BlockFlow flow = new BlockFlow(from, to, ib);
from.flowsFrom.put(ib, flow);
to.flowsTo.put(ib, flow);
}
}
}
}
private void cook() {
for (JitBlock block : blocks.values()) {
block.cook();
}
}
private IntBranch getBranchTo(PcodeOp to) {
return branchesByTarget.get(to);
}
private Branch getBranchFrom(PcodeOp from) {
return branches.get(from);
}
private void doWork() {
if (program.getCode().isEmpty()) {
throw new IllegalArgumentException("No code to analyze");
}
for (PcodeOp op : program.getCode()) {
// This op would be after the block from the last iteration
checkForFallthrough(op);
IntBranch branchTo = getBranchTo(op);
if (branchTo != null) {
makeBlock();
// This op would be after the block we just made
checkForFallthrough(op);
}
partialBlock.add(op);
Branch branchFrom = getBranchFrom(op);
if (branchFrom != null) {
makeBlock();
// NB. lastBlock cannot be null, we just added the op
if (branchFrom instanceof IntBranch ib) {
lastBlock.branchesFrom.add(ib);
}
else {
lastBlock.branchesOut.add(branchFrom);
}
/**
* Do not checkForFallthrough, because the current op is already in the block
*/
}
}
makeBlock();
if (needsFallthrough(lastBlock)) {
/**
* I'm making it the decoder's responsibility to provide a sane program. We can
* catch missing control flow at the very end, but we cannot do so at the end of
* other blocks. If they have fall-through, they'll (perhaps erroneously) fall
* through to the next block that happens to be there. Thus, it is up to the
* decoder, if it decodes any incomplete strides, that is must synthesize the
* appropriate control-flow ops.
*/
throw new UnterminatedFlowException();
}
fillFlows();
cook();
}
private SequencedMap<PcodeOp, JitBlock> getBlocks() {
return blocks;
}
/**
* Perform the actual analysis
*
* @return the resulting split blocks, keyed by {@link JitBlock#start()}
*/
public SequencedMap<PcodeOp, JitBlock> splitBlocks() {
doWork();
return getBlocks();
}
}
private final JitPassage passage;
private final SequencedMap<PcodeOp, JitBlock> blocks;
/**
* Construct the control flow model.
*
* <p>
* Analysis is performed as part of constructing the model.
*
* @param context the analysis context
*/
public JitControlFlowModel(JitAnalysisContext context) {
this.passage = context.getPassage();
this.blocks = analyze();
}
/**
* Perform the analysis.
*
* @return the resulting blocks, keyed by {@link JitBlock#first()}
*/
protected SequencedMap<PcodeOp, JitBlock> analyze() {
BlockSplitter splitter = new BlockSplitter(passage);
splitter.addBranches(passage.getBranches().values());
return splitter.splitBlocks();
}
/**
* Get the basic blocks
*
* @return the collection of blocks
*/
public Collection<JitBlock> getBlocks() {
return blocks.values();
}
/**
* For diagnostics: Dump the results to stderr
*
* @see Diag#PRINT_CFM
*/
public void dumpResult() {
System.err.println("STAGE: ControlFlow");
for (JitBlock block : blocks.values()) {
System.err.println("");
System.err.println("Block: " + block);
System.err.println("Branches to:");
for (IntBranch branch : block.branchesTo) {
System.err.println(" " + branch);
}
System.err.println("Flows to:");
for (BlockFlow flow : block.flowsTo.values()) {
System.err.println(" " + flow);
}
System.err.println(block.format(true));
System.err.println("Branches from:");
for (IntBranch branch : block.branchesFrom) {
System.err.println(" " + branch);
}
System.err.println("Flows from:");
for (BlockFlow flow : block.flowsFrom.values()) {
System.err.println(" " + flow);
}
System.err.println("Branches out:");
for (Branch branch : block.branchesOut) {
System.err.println(" " + branch);
}
}
}
}

View file

@ -0,0 +1,398 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState.MiniDFState;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
import ghidra.pcode.exec.ConcretionError;
import ghidra.pcode.exec.PcodeArithmetic;
import ghidra.pcode.opbehavior.OpBehaviorFactory;
import ghidra.pcode.opbehavior.OpBehaviorSubpiece;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Endian;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* A p-code arithmetic for interpreting p-code and constructing a use-def graph
*
* <p>
* This is used for intra-block data flow analysis. We leverage the same API as is used for concrete
* p-code interpretation, but we use it for an abstraction. The type of the interpretation is
* {@code T:=}{@link JitVal}, which can consist of constants and variables in the use-def graph. The
* arithmetic must be provided to the {@link JitDataFlowExecutor}. The intra-block portions of the
* use-def graph are populated as each block is interpreted by the executor.
*
* <p>
* The general strategy for each of the arithmetic operations is to 1) generate the output SSA
* variable for the op, 2) generate the op node for the generated output and given inputs, 3) enter
* the op into the use-def graph as the definition of its output, 4) record the inputs and used by
* the new op, and finally 5) return the generated output.
*
* <p>
* There should only need to be one of these per data flow model, not per block.
*/
public class JitDataFlowArithmetic implements PcodeArithmetic<JitVal> {
private static final OpBehaviorSubpiece OB_SUBPIECE =
(OpBehaviorSubpiece) OpBehaviorFactory.getOpBehavior(PcodeOp.SUBPIECE);
private final JitDataFlowModel dfm;
private final Endian endian;
/**
* Construct the arithmetic
*
* @param context the analysis context
* @param dfm the owning data flow model
*/
public JitDataFlowArithmetic(JitAnalysisContext context, JitDataFlowModel dfm) {
this.dfm = dfm;
this.endian = context.getEndian();
}
@Override
public Endian getEndian() {
return endian;
}
/**
* Remove {@code amt} bytes from the right of the <em>varnode</em>.
*
* <p>
* "Right" is considered with respect to the machine endianness. If it is little endian, then
* the byte are shaved from the <em>left</em> of the value. This should be used when getting
* values from the state to remove pieces from off-cut values. It should be applied before the
* pieces are ordered according to machine endianness.
*
* @param in1Vn the varnode representing the input
* @param amt the number of bytes to remove
* @param in1 the input (really a value read from the state)
* @return the resulting value
*/
public JitVal truncFromRight(Varnode in1Vn, int amt, JitVal in1) {
Varnode outVn = new Varnode(in1Vn.getAddress(), in1Vn.getSize() - amt);
return subpiece(outVn, endian.isBigEndian() ? amt : 0, in1);
}
/**
* Remove {@code amt} bytes from the left of the <em>varnode</em>.
*
* <p>
* "Left" is considered with respect to the machine endianness. If it is little endian, then the
* byte are shaved from the <em>right</em> of the value. This should be used when getting values
* from the state to remove pieces from off-cut values. It should be applied before the pieces
* are ordered according to machine endianness.
*
* @param in1Vn the varnode representing the input
* @param amt the number of bytes to remove
* @param in1 the input (really a value read from the state)
* @return the resulting value
*/
public JitVal truncFromLeft(Varnode in1Vn, int amt, JitVal in1) {
Varnode outVn = new Varnode(in1Vn.getAddress().add(amt), in1Vn.getSize() - amt);
return subpiece(outVn, endian.isBigEndian() ? 0 : amt, in1);
}
private void removeOffsetFromRight(List<JitVal> parts, int offset) {
JitVal p;
do {
p = parts.remove(parts.size() - 1);
offset -= p.size();
}
while (offset > 0);
if (offset < 0) {
JitVal np = shaveFromRight(-offset, p);
parts.add(np);
offset += np.size();
assert offset == 0;
}
}
private void removeFromLeftToSize(List<JitVal> parts, int size) {
int actualSize = 0;
JitVal p;
int i = parts.size();
do {
p = parts.get(--i);
actualSize += p.size();
}
while (actualSize < size);
if (actualSize > size) {
JitVal np = shaveFromLeft(-size, p);
parts.set(i + 1, np);
actualSize -= p.size();
actualSize += np.size();
assert actualSize == size;
}
while (i > 0) {
parts.remove(--i);
}
}
/**
* Try to produce a simplified {@link JitSynthSubPieceOp} or {@link JitCatenateOp}
*
* <p>
* This takes an input, subpiece offset, and output variable. If the input variable is the
* result of another subpiece, the result can be a single simplified subpiece. Similarly, if the
* input is the result of a catenation, then the result can be a simplified catenation, or
* possibly subpiece.
*
* If either of these situations applies, and simplification is possible, this returns a
* non-null result, and that result is added to the use-def graph specifying the given output
* variable as the simplified output. Otherwise, the result is null and the caller should create
* a new subpiece op.
*
* @param out the output variable
* @param offset the subpiece offset (number of bytes shifted right)
* @param v the input value
* @return the output variable, as the result of the simplified sub-graph.
*/
private JitVal trySimplifiedSubPiece(JitOutVar out, int offset, JitVal v) {
if (!(v instanceof JitOutVar vOut)) {
return null;
}
if (vOut.definition() instanceof JitSynthSubPieceOp subsub) {
subsub.unlink();
return dfm
.notifyOp(new JitSynthSubPieceOp(out, offset + subsub.offset(), subsub.v()))
.out();
}
if (vOut.definition() instanceof JitCatenateOp cat) {
cat.unlink();
List<JitVal> newParts = new ArrayList<>(cat.parts());
removeOffsetFromRight(newParts, offset);
removeFromLeftToSize(newParts, out.size());
assert !newParts.isEmpty();
if (newParts.size() == 1) {
// Context should already be notified
return newParts.get(0);
}
return dfm.notifyOp(new JitCatenateOp(out, newParts)).out();
}
return null;
}
/**
* Construct the result of taking the subpiece
*
* <p>
* If the input is another subpiece or a catenation, the result may be simplified. In
* particular, the subpiece of a catenation may be a smaller catenation. No matter the case, the
* given output variable is made the output of the subpiece result, and the use-def graph is
* updated accordingly.
*
* @param outVn the output variable
* @param offset the subpiece offset (number of bytes shifted right)
* @param v the input value
* @return the output variable, as the result of the simplified sub-graph
*/
private JitVal subpiece(Varnode outVn, int offset, JitVal v) {
JitOutVar out = dfm.generateOutVar(outVn);
JitVal simplified = trySimplifiedSubPiece(out, offset, v);
if (simplified != null) {
return simplified;
}
return dfm.notifyOp(new JitSynthSubPieceOp(out, offset, v)).out();
}
private Varnode subPieceVn(int size, int offset, Varnode whole) {
if (endian.isBigEndian()) {
return new Varnode(whole.getAddress().add(whole.getSize() - offset - size), size);
}
return new Varnode(whole.getAddress().add(offset), size);
}
/**
* Remove {@code amt} bytes from the right of the value.
*
* <p>
* The value is unaffected by the machine endianness, except to designate the output varnode.
*
* @param amt the number of bytes to remove
* @param in1 the input
* @return the output
*/
public JitVal shaveFromRight(int amt, JitVal in1) {
return subpiece(in1.size() - amt, amt, in1);
}
/**
* Remove {@code amt} bytes from the left of the value.
*
* <p>
* The value is unaffected by the machine endianness, except to designate the output varnode.
*
* @param amt the number of bytes to remove
* @param in1 the input
* @return the output
*/
public JitVal shaveFromLeft(int amt, JitVal in1) {
return subpiece(in1.size() - amt, 0, in1);
}
/**
* Compute the subpiece of a value.
*
* <p>
* The result is added to the use-def graph. The output varnode is computed from the input
* varnode and the subpiece parameters. This is used to handle variable retrieval when an access
* only include parts of a value previously written. Consider the x86 assembly:
*
* <pre>
* MOV RAX, qword ptr [...]
* MOV dword ptr [...], EAX
* </pre>
*
* <p>
* The second line reads {@code EAX}, which consists of only the lower part of {@code RAX}.
* Thus, we synthesize a subpiece op. These are distinct from an actual {@link PcodeOp#SUBPIECE}
* op, since we sometimes needs to filter out synthetic ops.
*
* @param size the size of the output variable in bytes
* @param offset the subpiece offset (number of bytes shifted right)
* @param v the input value
* @return the output value
*/
public JitVal subpiece(int size, int offset, JitVal v) {
if (v instanceof JitConstVal c) {
return new JitConstVal(size,
OB_SUBPIECE.evaluateBinary(size, v.size(), c.value(), BigInteger.valueOf(offset)));
}
if (v instanceof JitVarnodeVar vv) {
Varnode inVn = vv.varnode();
Varnode outVn = subPieceVn(size, offset, inVn);
return subpiece(outVn, offset, v);
}
throw new UnsupportedOperationException("unsupported subpiece of " + v);
}
/**
* Construct the catenation of the given values to form the given output varnode.
*
* <p>
* The result is added to the use-def graph. This is used to handle variable retrieval when the
* pattern of accesses indicates catenation. Consider the x86 assembly:
*
* <pre>
* MOV AH, byte ptr [...]
* MOV AL, byte ptr [...]
* MOV word ptr [...], AX
* </pre>
*
* <p>
* On the third line, the value in {@code AX} is the catenation of whatever values were written
* into {@code AH} and {@code AL}. Thus, we synthesize a catenation op node in the use-def
* graph.
*
* @param outVn the output varnode
* @param parts the list of values to catenate, ordered by machine endianness
* @return the output value
* @see MiniDFState#getDefinitions(AddressSpace, long, int)
*/
public JitVal catenate(Varnode outVn, List<JitVal> parts) {
return dfm.notifyOp(new JitCatenateOp(dfm.generateOutVar(outVn), parts)).out();
}
@Override
public JitVal unaryOp(PcodeOp op, JitVal in1) {
return dfm.notifyOp(JitOp.unOp(op, dfm.generateOutVar(op.getOutput()), in1)).out();
}
@Override
public JitVal unaryOp(int opcode, int sizeout, int sizein1, JitVal in1) {
throw new AssertionError();
}
@Override
public JitVal binaryOp(PcodeOp op, JitVal in1, JitVal in2) {
return dfm.notifyOp(JitOp.binOp(op, dfm.generateOutVar(op.getOutput()), in1, in2)).out();
}
@Override
public JitVal binaryOp(int opcode, int sizeout, int sizein1, JitVal in1, int sizein2,
JitVal in2) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* <p>
* We override this to record the {@link JitStoreOp store} op into the use-def graph. As
* "output" we just return {@code inValue}. The executor will call
* {@link JitDataFlowState#setVar(AddressSpace, JitVal, int, boolean, JitVal) setVal}, but the
* state will just ignore it, because it will be an indirect memory write.
*/
@Override
public JitVal modBeforeStore(PcodeOp op, AddressSpace space, JitVal inOffset, JitVal inValue) {
return dfm.notifyOp(new JitStoreOp(op, space, inOffset, inValue)).value();
}
@Override
public JitVal modBeforeStore(int sizeinOffset, AddressSpace space, JitVal inOffset,
int sizeinValue, JitVal inValue) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* <p>
* We override this to record the {@lnk JitLoadOp load} op into the use-def graph. For our
* {@code inValue}, the {@link JitDataFlowState state} will have just returned the
* {@link JitIndirectMemoryVar#INSTANCE dummy indirect} variable definition. We must not "use"
* this. Instead, we must take our other parameters to construct the load op and return its
* output.
*/
@Override
public JitVal modAfterLoad(PcodeOp op, AddressSpace space, JitVal inOffset, JitVal inValue) {
return dfm.notifyOp(new JitLoadOp(
op, dfm.generateOutVar(op.getOutput()), space, inOffset)).out();
}
@Override
public JitVal modAfterLoad(int sizeinOffset, AddressSpace space, JitVal inOffset,
int sizeinValue, JitVal inValue) {
throw new AssertionError();
}
@Override
public JitVal fromConst(byte[] value) {
BigInteger bigVal =
Utils.bytesToBigInteger(value, value.length, endian.isBigEndian(), false);
return JitVal.constant(value.length, bigVal);
}
@Override
public byte[] toConcrete(JitVal value, Purpose purpose) {
if (value instanceof JitConstVal c) {
return Utils.bigIntegerToBytes(c.value(), c.size(), endian.isBigEndian());
}
throw new ConcretionError("Cannot concretize " + value, purpose);
}
@Override
public long sizeOf(JitVal value) {
return value.size();
}
}

View file

@ -0,0 +1,219 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockFlow;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.op.JitPhiOp;
import ghidra.pcode.emu.jit.var.JitMissingVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.PcodeExecutor;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Register;
import ghidra.program.model.pcode.Varnode;
/**
* An encapsulation of the per-block data flow analysis done by {@link JitDataFlowModel}
*
* <p>
* One of these is created for each basic block in the passage. This does both the intra-block
* analysis and encapsulates parts of the inter-block analysis. The class also contains and provides
* access to some of the analytic results.
*
* @see JitDataFlowModel#getAnalyzer(JitBlock)
*/
public class JitDataFlowBlockAnalyzer {
private final JitAnalysisContext context;
private final JitDataFlowModel dfm;
private final JitBlock block;
private final JitDataFlowArithmetic arithmetic;
private final JitDataFlowUseropLibrary library;
private final JitDataFlowState state;
private final boolean isEntry;
JitDataFlowBlockAnalyzer(JitAnalysisContext context, JitDataFlowModel dfm, JitBlock block) {
this.context = context;
this.dfm = dfm;
this.block = block;
this.arithmetic = dfm.getArithmetic();
this.library = dfm.getLibrary();
this.state = new JitDataFlowState(context, dfm, block);
this.isEntry = context.getOpEntry(block.first()) != null;
}
/**
* Perform the intra-block analysis for this block
*
* <p>
* This just runs the block p-code through the analytic interpreter. See
* {@link JitDataFlowModel}'s section on intra-block analysis.
*/
void doIntrablock() {
PcodeExecutor<JitVal> exec = new JitDataFlowExecutor(context, dfm, state);
exec.execute(block, library);
}
/**
* The initial entry into the recursive phi option seeking algorithm
*
* <p>
* See {@link JitDataFlowModel}'s section on inter-block analysis. This will modify the given
* phi op in place, adding to it each found option. There is also more details than discussed in
* the data flow model documentation. Keep in mind a varnode may be partially defined, e.g.,
* when reading {@link RAX}, perhaps only {@link EAX} has been defined. In such cases, we must
* catenate in the same manner we would when reading the varnode during intra-block analysis.
* The portions missing a definition will generate corresponding phi nodes, which are treated
* recursively.
*
* @param phi the phi op for which we seek options
*/
void fillPhiFromDeps(JitPhiOp phi) {
fillPhiFromDeps(phi, new HashSet<>());
}
/**
* Fill options in for the given phi op
*
* <p>
* If our block is an entry, add that as a possible option. <em>Additionally</em>, consider each
* upstream block (dependency) as an option, recursively. Recursion will naturally terminate if
* there are no inward flows.
*
* @param phi the phi op for which we seek options
* @param visited the blocks which have already been visited during recursion
*/
private void fillPhiFromDeps(JitPhiOp phi, Set<JitBlock> visited) {
if (isEntry) {
phi.addInputOption();
}
for (BlockFlow flow : block.flowsTo().values()) {
JitDataFlowBlockAnalyzer analyzerFrom = dfm.getOrCreateAnalyzer(flow.from());
analyzerFrom.fillPhiFromBlock(phi, flow, visited);
}
}
/**
* Consider the given flow as an option for the given phi op, and fill it
*
* <p>
* If we've already visited the given block, we return immediately, without further recursion.
* Otherwise, we examine the varnode output state of this block for suitable definitions. If
* needed, we fill any gaps (possibly the entire varnode sought) with new phi nodes and recurse.
*
* @param phi the phi op for which we seek an option
* @param flow the flow from the block to consider
* @param visited the blocks which have already been visited during recursion
*/
private void fillPhiFromBlock(JitPhiOp phi, BlockFlow flow, Set<JitBlock> visited) {
if (!visited.add(block)) {
/**
* NOTE: We do not need to remove the block before we return. If we didn't find it by
* this path, we certainly not going to find it from here by another path.
*/
return;
}
Varnode phiVn = phi.out().varnode();
List<JitVal> defs = state.getDefinitions(phiVn);
if (defs.size() != 1) {
defs = state.generatePhis(defs, dfm.phiQueue);
JitVal catOpt = arithmetic.catenate(phiVn, defs);
phi.addOption(flow, catOpt);
/**
* New phi nodes will be picked up in next round of filling. Since parts are smaller
* than the whole, the size of such nodes should shrink until a singular definition is
* found.
*/
return;
}
JitVal val = defs.get(0);
if (val instanceof JitMissingVar missing) {
// Require the chain to have a node in this block
JitPhiOp phi2 = missing.generatePhi(dfm, block);
dfm.phiQueue.add(phi2);
state.setVar(missing.varnode(), phi2.out());
phi.addOption(flow, phi2.out());
// Will get filled on subsequent round
//fillPhiFromDeps(phi2, visited);
return;
}
phi.addOption(flow, val);
}
/**
* Get a complete catalog of all varnodes read, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesRead() {
return state.getVarnodesRead();
}
/**
* Get a complete catalog of all varnodes written, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesWritten() {
return state.getVarnodesWritten();
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see JitDataFlowState#getDefinitions(Varnode)
* @param varnode the varnode whose definition(s) to retrieve
* @return the list of values
*/
public List<JitVal> getOutput(Varnode varnode) {
return state.getDefinitions(varnode);
}
/**
* Get an ordered list of all values involved in the latest definition of the given register.
*
* @see JitDataFlowState#getDefinitions(Register)
* @param register the register whose definition(s) to retrieve
* @return the list of values
*/
public List<JitVal> getOutput(Register register) {
return state.getDefinitions(register);
}
/**
* Get the latest definition of the given varnode, synthesizing ops is required.
*
* <p>
* NOTE: May produce phi nodes that need additional inter-block analysis
*
* @see JitDataFlowModel#analyzeInterblock(Collection)
* @see JitDataFlowState#getVar(AddressSpace, JitVal, int, boolean, Reason)
* @param vn the varnode
* @return the latest definition for the block analyzed
*/
public JitVal getVar(Varnode vn) {
return state.getVar(vn, Reason.EXECUTE_READ);
}
}

View file

@ -0,0 +1,165 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.Map;
import java.util.Objects;
import ghidra.pcode.emu.jit.JitPassage.Branch;
import ghidra.pcode.emu.jit.JitPassage.IndBranch;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* A modification to {@link PcodeExecutor} that is specialized for the per-block data flow analysis.
*
* <p>
* Normally, the p-code executor follows all of the control-flow branching, as you would expect in
* the interpretation-based p-code emulator. For analysis, we do not intend to actually follow
* branches. These should only ever occur at the end of a basic block, anyway.
*
* <p>
* We do record the branch ops into the graph as {@link JitOp op nodes}. A conditional branch
* naturally participates in the data flow, as it uses the definition of its predicate varnode.
* Similarly, indirect branches use the definitions of their target varnodes. Direct branch
* operations are also added to the use-def graph, even though they do not use any variable
* definition. Architecturally, the code generator emits JVM bytecode from the op nodes in the
* use-def graph. For that to work, every p-code op must be entered into it. For bookkeeping, and
* because the code generator will need them, we look up the {@link Branch} records created by the
* passage decoder and store them in their respective branch op nodes.
*
* <p>
* This is all accomplished by overriding {@link #executeBranch(PcodeOp, PcodeFrame)} and similar
* branch execution methods. Additionally, we override {@link #badOp(PcodeOp)} and
* {@link #onMissingUseropDef(PcodeOp, PcodeFrame, String, PcodeUseropLibrary)}, because the
* inherited implementations will throw exceptions. We need not throw an exception until/unless we
* reach such bad code a run time. So, we enter them into the use-def graph as op nodes from which
* we later generate the code to throw the exception.
*/
class JitDataFlowExecutor extends PcodeExecutor<JitVal> {
private final JitDataFlowModel dfm;
private final Map<PcodeOp, Branch> branches;
/**
* Construct an executor from the given context
*
* @param context the analysis context, namely to get the branches recorded by the passage
* decoder
* @param dfm the data-flow model whose use-def graph to populate
* @param state the executor state, which tracks varnode definitions during execution
*/
protected JitDataFlowExecutor(JitAnalysisContext context, JitDataFlowModel dfm,
PcodeExecutorState<JitVal> state) {
super(context.getLanguage(), dfm.getArithmetic(), state, Reason.EXECUTE_READ);
this.dfm = dfm;
this.branches = context.getPassage().getBranches();
}
/**
* Record a branch or call op into the use-def graph
*
* <p>
* We do not need to compute the branch target, because that op was already computed by the
* passage decoder. Past attempts to perform that computation here failed when dealing with
* injects and inlined p-code userops. It is much easier to let the decoder do it, because it
* has a copy of the original p-code. That op is recorded in the {@link Branch} for this op, so
* just look it up.
*
* @param op the op
*/
protected void recordBranch(PcodeOp op) {
Branch branch = Objects.requireNonNull(branches.get(op));
dfm.notifyOp(new JitBranchOp(op, branch));
}
/**
* Record a conditional branch op into the use-def graph
*
* <p>
* While we can lookup the {@link Branch} target as in
* {@link #executeBranch(PcodeOp, PcodeFrame)}, we must still obtain the predicate's definition
* and use it.
*
* @param op the op
*/
protected void recordConditionalBranch(PcodeOp op) {
Branch branch = Objects.requireNonNull(branches.get(op));
Varnode condVar = getConditionalBranchPredicate(op);
JitVal cond = state.getVar(condVar, reason);
dfm.notifyOp(new JitCBranchOp(op, branch, cond));
}
/**
* Record an indirect branch op into the use-def graph
*
* <p>
* The {@link IndBranch} will have the target decode context, but the address is dynamic. We
* have to obtain the target varnode's definition and use it.
*
* @param op the op
*/
protected void recordIndirectBranch(PcodeOp op) {
Varnode offVar = getIndirectBranchTarget(op);
JitVal offset = state.getVar(offVar, reason);
IndBranch branch = (IndBranch) Objects.requireNonNull(branches.get(op));
dfm.notifyOp(new JitBranchIndOp(op, offset, branch));
}
@Override
public void executeBranch(PcodeOp op, PcodeFrame frame) {
recordBranch(op);
}
@Override
public void executeConditionalBranch(PcodeOp op, PcodeFrame frame) {
recordConditionalBranch(op);
}
@Override
public void executeIndirectBranch(PcodeOp op, PcodeFrame frame) {
recordIndirectBranch(op);
}
@Override
public void executeCall(PcodeOp op, PcodeFrame frame, PcodeUseropLibrary<JitVal> library) {
recordBranch(op);
}
@Override
public void executeIndirectCall(PcodeOp op, PcodeFrame frame) {
recordIndirectBranch(op);
}
@Override
public void executeReturn(PcodeOp op, PcodeFrame frame) {
recordIndirectBranch(op);
}
@Override
protected void badOp(PcodeOp op) {
dfm.notifyOp(JitOp.stubOp(op));
}
@Override
protected void onMissingUseropDef(PcodeOp op, PcodeFrame frame, String opName,
PcodeUseropLibrary<JitVal> library) {
dfm.notifyOp(new JitCallOtherMissingOp(op, opName));
}
}

View file

@ -0,0 +1,647 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.io.*;
import java.util.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.lifecycle.Internal;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.JitPassage;
import ghidra.pcode.emu.jit.JitPassage.DecodedPcodeOp;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
import ghidra.pcode.emu.jit.var.JitVal.ValUse;
import ghidra.pcode.exec.PcodeExecutorState;
import ghidra.pcode.exec.PcodeExecutorStatePiece.Reason;
import ghidra.pcode.exec.PcodeProgram;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Register;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The data flow analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Data Flow Analysis phase of the {@link JitCompiler}. The result is a use-def
* graph. The graph follows Static Single Assignment (SSA) form, in that each definition of a
* variable, even if it's at the same address as a previous definition, is given a unique
* identifier. The graph is bipartite with {@link JitOp ops} on one side and {@link JitVal values}
* on the other. Please node the distinction between a <em>varnode</em> and a <em>variable</em> in
* this context. A <em>varnode</em> refers to the address and size in the machine's state. For
* better or for worse, this is often referred to as a "variable" in other contexts. A
* <em>variable</em> in the SSA sense is a unique "instance" of a varnode with precisely one
* <em>definition</em>. Consider the following x86 assembly:
*
* <pre>
* MOV RAX, qword ptr [...]
* ADD RAX, RDX
* MOV qword ptr [...], RAX
* </pre>
*
* <p>
* Ignoring RAM, there are two varnodes at play, named for the registers they represent: {@code RAX}
* and {@code RDX}. However, there are three variables. The first is an instance of {@code RAX},
* defined by the first {@code MOV} instruction. The second is an instance of {@code RDX}, which is
* implicitly defined as an input to the passage. The third is another instance of of {@code RAX},
* defined by the {@code ADD} instruction. These could be given unique names
* {@code RAX}<sub>1</sub>, {@code RDX}<sub>in</sub>, and {@code RAX}<sub>2</sub>, respectively.
* Thus, the {@code ADD} instruction uses {@code RAX}<sub>1</sub> and {@code RDX}<sub>in</sub>, to
* define {@code RAX}<sub>2</sub>. The last {@code MOV} instruction uses {@code RAX}<sub>2</sub>. If
* we plot each instruction and variable in a graph, drawing edges for each use and definition, we
* get a use-def graph.
*
* <p>
* Our analysis produces a use-def graph for the passage's p-code (not instructions) in two steps:
* First, we analyze each basic block independently. There are a lot of nuts and bolts in the
* implementation, but the analysis is achieved by straightforward interpretation of each block's
* p-code ops. Second, we connect the blocks' use-def graphs together using phi nodes where
* appropriate, according to the control flow.
*
* <h2>Intra-block analysis</h2>
* <p>
* For each block, we create a p-code interpreter consisting of a {@link JitDataFlowState} and
* {@link JitDataFlowExecutor}. Both are given this model's {@link JitDataFlowArithmetic}, which
* populates the use-def graph. We then feed the block's p-code into the executor. The block gets a
* fresh {@link JitDataFlowState}, so that its result has no dependency on the interpretation of any
* other block, except in the numbering of variable identifiers; those must be unique across the
* model.
*
* <p>
* During interpretation, varnode accesses generate value nodes. When a constant varnode is
* accessed, it simply creates a {@link JitConstVal}. When an op produces an output, it generates a
* {@link JitOutVar} and places it into the interpreter's {@link JitDataFlowState state} for its
* varnode. When a varnode is read, the interpreter examines its state for the last definition. If
* one is found, the variable is returned, its use noted, and nothing new is generated. Otherwise, a
* {@link JitMissingVar} is generated. Note that the interpreter does not track memory variables in
* its state, because the JIT translator does not seek to optimize these. At run time, such accesses
* will affect the emulator's state immediately. Registers and Sleigh uniques, on the other hand,
* are allocated as JVM locals, so we must know how they are used and defined. Direct memory
* accesses generate {@link JitDirectMemoryVar} and {@link JitMemoryOutVar}. Indirect memory
* accesses are denoted by the {@link JitLoadOp load} and {@link JitStoreOp store} op nodes, not as
* variables. There is a dummy {@link JitIndirectMemoryVar} singleton, so that the state can return
* something when the memory address is not fixed.
*
* <h2>Inter-block analysis</h2>
* <p>
* Up to this point, each block's use-def sub-graph is disconnected from the others'. We define each
* {@link JitMissingVar missing} variable generated during block interpretation as a {@link JitPhiOp
* phi} op. A phi op is said to belong to the block that generated the missing variable. We seek
* options for the phi op by examining the block's inward flows. For each source block, we check the
* most recent definition of the sought varnode. If one is present, the option is added to the phi
* op. Otherwise, we create an option by generating another phi op and taking its output. The new
* phi op belongs to the source block, and we recurse to seek its options. If a cycle is
* encountered, or we encounter a block with no inward flows, we do not recurse. An
* {@link JitInputVar input} variable is generated whenever we encounter a passage entry, indicating
* the variable could be defined outside the passage.
*
* <p>
* Note that the resulting phi ops may not adhere precisely to the formal definition of <em>phi
* node</em>. A phi op may have only one option. The recursive part of the option seeking algorithm
* generates chains of phi ops such that an option must come from an immediately upstream block,
* even if that block does not offer a direct definition. This may produce long chains when a
* varnode use is several block flows removed from a possible definition. We had considered
* simplifying/removing single-option phi ops afterward, but we found it too onerous, and the output
* bytecode is not improved. We do not generate bytecode for phi ops; they are synthetic and only
* used for analysis.
*/
public class JitDataFlowModel {
/**
* Create a list of {@link JitTypeBehavior#ANY ANY}s having the same size as the list of values.
*
* @param inVals the values, e.g., of each parameter to a userop
* @return the list
*/
static List<JitTypeBehavior> allAny(List<JitVal> inVals) {
return inVals.stream().map(v -> JitTypeBehavior.ANY).toList();
}
private final JitAnalysisContext context;
private final JitControlFlowModel cfm;
private final JitPassage passage;
private final SleighLanguage language;
private final JitDataFlowArithmetic arithmetic;
private final JitDataFlowUseropLibrary library;
private int nextVarId = 1;
private final List<JitPhiOp> phiNodes = new ArrayList<>();
private final List<JitSyntheticOp> synthNodes = new ArrayList<>();
private final Map<PcodeOp, JitOp> ops = new HashMap<>();
private final Map<JitBlock, JitDataFlowBlockAnalyzer> analyzers = new HashMap<>();
final SequencedSet<JitPhiOp> phiQueue = new LinkedHashSet<>();
/**
* Construct the data flow model.
*
* <p>
* Analysis is performed as part of constructing the model.
*
* @param context the analysis context
* @param cfm the control flow model
*/
public JitDataFlowModel(JitAnalysisContext context, JitControlFlowModel cfm) {
this.context = context;
this.cfm = cfm;
this.passage = context.getPassage();
this.language = context.getLanguage();
this.arithmetic = new JitDataFlowArithmetic(context, this);
this.library = new JitDataFlowUseropLibrary(context, this);
analyze();
}
/**
* Get the model's arithmetic that places p-code ops into the use-def graph
*
* @return the arithmetic
*/
public JitDataFlowArithmetic getArithmetic() {
return arithmetic;
}
/**
* Get a wrapper library that places userop calls into the use-def graph
*
* @return the library
*/
public JitDataFlowUseropLibrary getLibrary() {
return library;
}
/**
* Get all the phi nodes in the use-def graph.
*
* @return the list of phi nodes
*/
public List<JitPhiOp> phiNodes() {
return phiNodes;
}
/**
* Get all the synthetic op nodes in the use-def graph.
*
* @return the list of synthetic op nodes
*/
public List<JitSyntheticOp> synthNodes() {
return synthNodes;
}
/**
* Generate a unique variable identifier
*
* @return the generated identifier
*/
private int nextVarId() {
return nextVarId++;
}
/**
* Generate a new op output variable for eventual placement in the use-def graph
*
* @param out the varnode describing the corresponding {@link PcodeOp}'s
* {@link PcodeOp#getOutput() output}.
* @return the generated variable
* @see JitDataFlowModel
*/
public JitOutVar generateOutVar(Varnode out) {
if (out.isRegister() || out.isUnique()) {
return new JitLocalOutVar(nextVarId(), out);
}
return new JitMemoryOutVar(nextVarId(), out);
}
/**
* Generate a variable representing a direct memory access
*
* @param vn the varnode, which ought to be neither register nor unique
* @return the variable
*/
public JitDirectMemoryVar generateDirectMemoryVar(Varnode vn) {
return new JitDirectMemoryVar(nextVarId(), vn);
}
/**
* Generate a variable representing an indirect memory access
*
* @param space the address space containing the variable, which out to be neither register nor
* unique
* @param offset another variable describing the (dynamic) offset of the variable in the given
* space
* @param size the number of bytes in the variable
* @param quantize true if the offset should be quantized (as in
* {@link PcodeExecutorState#getVar(AddressSpace, Object, int, boolean, Reason)
* getVar}).
* @return the variable
* @see JitIndirectMemoryVar
* @see JitLoadOp
* @see JitStoreOp
* @implNote because the load and store ops already encode these details (except maybe
* {@code quantize}), this just returns a dummy instance.
*/
public JitIndirectMemoryVar generateIndirectMemoryVar(AddressSpace space, JitVal offset,
int size, boolean quantize) {
return JitIndirectMemoryVar.INSTANCE;
}
/**
* Add the given {@link JitOp} to the use-def graph
*
* @param <T> the type of the node
* @param op the op
* @return the same op
* @see JitDataFlowModel
*/
public <T extends JitOp> T notifyOp(T op) {
op.link();
if (op instanceof JitPhiOp phi) {
phiNodes.add(phi);
synthNodes.add(phi);
}
else if (op instanceof JitSyntheticOp synth) {
// Prevent call of .op()
synthNodes.add(synth);
}
else {
ops.put(Objects.requireNonNull(op.op()), op);
}
return op;
}
/**
* Get the use-def op node for the given p-code op
*
* <p>
* NOTE: When used in testing, if the passage is manufactured from a {@link PcodeProgram}, the
* decoder will re-write the p-code ops as {@link DecodedPcodeOp}s. Be sure to pass an op to
* this method that comes from the resulting {@link JitPassage}, not the original program, or
* else this method will certainly return {@code null}.
*
* @param op the p-code op from the source passage
* @return the node from the use-def graph, if present, or {@code null}
*/
public JitOp getJitOp(PcodeOp op) {
return ops.get(op);
}
/**
* Get all the op nodes, whether from a p-code op or synthesized.
*
* @return the ops.
* @see JitDataFlowModel
*/
Collection<JitOp> allOps() {
Set<JitOp> all = new LinkedHashSet<>();
all.addAll(ops.values());
all.addAll(synthNodes);
return all;
}
/**
* An upward graph traversal for collecting all values in the use-def graph.
*
* @see JitAnalysisContext#allValues()
* @see JitAnalysisContext#allValuesSorted()
*/
protected class ValCollector extends HashSet<JitVal> implements JitOpUpwardVisitor {
public ValCollector() {
for (PcodeOp op : passage.getCode()) {
JitOp jitOp = getJitOp(op);
visitOp(jitOp);
if (jitOp instanceof JitDefOp defOp) {
visitVal(defOp.out());
}
}
}
@Override
public void visitVal(JitVal v) {
if (!add(v)) {
return;
}
JitOpUpwardVisitor.super.visitVal(v);
}
}
/**
* Get all values (and variables) in the use-def graph
*
* @return the set of values
*/
public Set<JitVal> allValues() {
return new ValCollector();
}
/**
* Get the sort key of a given value. Variables get their ID, constants get -2.
*
* @param v the value
* @return the sort key
*/
int idOfVal(JitVal v) {
return v instanceof JitVar vv ? vv.id() : -2;
}
/**
* Same as {@link #allValues()}, but sorted by ID with constants at the top
*
* @return the list of values
*/
public List<JitVal> allValuesSorted() {
return allValues().stream().sorted(Comparator.comparing(this::idOfVal)).toList();
}
protected JitDataFlowBlockAnalyzer getOrCreateAnalyzer(JitBlock block) {
return analyzers.computeIfAbsent(block,
b -> new JitDataFlowBlockAnalyzer(context, this, b));
}
/**
* Get the per-block data flow analyzer for the given basic block
*
* @param block the block
* @return the analyzer
*/
public JitDataFlowBlockAnalyzer getAnalyzer(JitBlock block) {
return analyzers.get(block);
}
/**
* Construct the use-def graph
*/
protected void analyze() {
/**
* Just visit the blocks in any order. Use input placeholders and glue them together
* afterward.
*
* I considered unrolling each loop at least once to avoid certain multi-equals stuff. I
* don't think that'll be necessary. If we pre-load the registers into local variables, then
* we'll always be reading and writing to those locals, so no worries about multi-equals.
*/
for (JitBlock block : cfm.getBlocks()) {
getOrCreateAnalyzer(block).doIntrablock();
}
/**
* Now, work out the inter-block flows.
*/
analyzeInterblock(phiNodes);
}
/**
* Perform the inter-block analysis.
*
* <p>
* This is called by {@link #analyze()} after intra-block analysis.
*
* @implNote This may be called a second time by the {@link JitOpUseModel}, since a variable's
* definition may be several block flows removed from its retirement, which counts as
* a use.
*
* @see JitVarScopeModel
* @see JitOpUseModel
*/
void analyzeInterblock(Collection<JitPhiOp> phis) {
phiQueue.addAll(phis);
while (!phiQueue.isEmpty()) {
JitPhiOp phi = phiQueue.removeFirst();
JitDataFlowBlockAnalyzer analyzer = getOrCreateAnalyzer(phi.block());
analyzer.fillPhiFromDeps(phi);
}
}
/**
* For testing: Get the value(s) in (or intersecting) the given register defined by the given
* block
*
* @param block the block whose p-code to consider
* @param register the register to examine
* @return the list of values (usually variables)
*/
@Internal
List<JitVal> getOutput(JitBlock block, Register register) {
return getAnalyzer(block).getOutput(register);
}
/**
* For diagnostics: Dump the analysis result to stderr
*
* @see Diag#PRINT_DFM
*/
public void dumpResult() {
System.err.println("STAGE: DataFlow");
for (JitBlock block : cfm.getBlocks()) {
System.err.println(" Block: " + block);
for (PcodeOp op : block.getCode()) {
System.err.println(" %s: %s".formatted(op.getSeqnum(), getJitOp(op)));
}
}
}
/**
* For diagnostics: Dump the synthetic ops to stderr
*
* @see Diag#PRINT_SYNTH
*/
public void dumpSynth() {
System.err.println("SYNTHETIC OPS");
for (JitSyntheticOp synthOp : synthNodes) {
System.err.println(" " + synthOp);
}
}
/**
* A diagnostic tool for visualizing the use-def graph.
*
* <p>
* NOTE: This is only as complete as it needed to be for me to diagnose whatever issue I was
* having at the time.
*
* @see JitAnalysisContext#exportGraphviz(File)
*/
protected class GraphvizExporter implements JitOpUpwardVisitor {
final PrintWriter out;
final Set<JitVar> vars = new HashSet<>();
final Set<JitOp> ops = new HashSet<>();
public GraphvizExporter(File outFile) {
try (FileOutputStream outStream = new FileOutputStream(outFile);
PrintWriter out = new PrintWriter(outStream)) {
this.out = out;
out.println("digraph DataFlow {");
for (PcodeOp op : passage.getCode()) {
JitOp jitOp = getJitOp(op);
if (jitOp instanceof JitDefOp defOp) {
// Because of direction of visit
visitVal(defOp.out());
}
else {
visitOp(jitOp);
}
}
out.println("}");
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
String opLabel(JitOp op) {
return switch (op) {
case null -> "null";
//case JitSyntheticOp synth -> synth.getClass().getSimpleName();
//default -> op.op().toString();
default -> "%s\n%x".formatted(op.getClass().getSimpleName(),
System.identityHashCode(op));
};
}
@Override
public void visitOp(JitOp op) {
if (!ops.add(op)) {
return;
}
out.println("""
"op%x" [
label = "%s"
shape = "ellipse"
];
""".formatted(
System.identityHashCode(op),
opLabel(op)));
if (op == null) {
return;
}
int i = 0;
for (JitVal input : op.inputs()) {
i++;
if (input instanceof JitVar iv) {
out.println("""
"var%d" -> "op%x" [
headlabel = "[%d]"
];
""".formatted(
iv.id(),
System.identityHashCode(op),
i));
}
else {
out.println("""
"val%x" -> "op%x" [
headlabel = "[%d]"
];
""".formatted(
System.identityHashCode(input),
System.identityHashCode(op),
i));
}
}
if (op instanceof JitDefOp defOp) {
out.println("""
"op%x" -> "var%d" [
taillabel = "out"
];
""".formatted(
System.identityHashCode(op),
defOp.out().id()));
}
JitOpUpwardVisitor.super.visitOp(op);
}
String varLabel(JitVar v) {
return switch (v) {
case JitVarnodeVar vv -> "%s\n%d".formatted(vv.varnode().toString(language),
v.id());
default -> throw new AssertionError();
};
}
@Override
public void visitVal(JitVal v) {
final String name;
final String label;
if (v instanceof JitVar vv) {
if (!vars.add(vv)) {
return;
}
name = "var%d".formatted(vv.id());
label = varLabel(vv);
}
else if (v instanceof JitConstVal cv) {
name = "val%x".formatted(System.identityHashCode(cv));
label = cv.value().toString();
}
else {
throw new AssertionError();
}
out.println("""
"%s" [
label = "%s"
shape = "box"
];
""".formatted(name, label));
for (ValUse use : v.uses()) {
out.println("""
"%s" -> "op%x" [
dir = "back"
arrowhead = "none"
arrowtail = "crow"
taillabel = "use"
];
""".formatted(name, System.identityHashCode(use.op())));
}
if (v instanceof JitOutVar ov) {
out.println("""
"op%x" -> "%s" [
dir = "back"
arrowhead = "none"
arrowtail = "crow"
taillabel = "def"
];
""".formatted(System.identityHashCode(ov.definition()), name));
}
JitOpUpwardVisitor.super.visitVal(v);
}
}
/**
* Generate a graphviz .dot file to visualize the use-def graph.
*
* <p>
* <b>WARNING:</b> This is an internal diagnostic that is only as complete as it needed to be.
*
* @param file the output file
*/
@Internal // for diagnostics
public void exportGraphviz(File file) {
new GraphvizExporter(file);
}
}

View file

@ -0,0 +1,572 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import java.util.Map.Entry;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
import ghidra.pcode.exec.PcodeArithmetic.Purpose;
import ghidra.pcode.exec.PcodeExecutorState;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.Register;
import ghidra.program.model.mem.MemBuffer;
import ghidra.program.model.pcode.Varnode;
import ghidra.util.Msg;
/**
* An implementation of {@link PcodeExecutorState} for per-block data flow interpretation
*
* <p>
* In p-code interpretation, this interface's purpose is to store the current value of varnodes in
* the emulation/interpretation state. Here we implement it using {@code T:=}{@link JitVal}, and
* track the latest variable definition of vanodes in the data flow interpretation. The adaptation
* is fairly straightforward, except when varnode accesses do not match their latest definitions
* exactly, e.g., an access of {@code EAX} when the latest definition is for {@code RAX}. Thus, this
* state object may synthesize {@link JitSynthSubPieceOp subpiece} and {@link JitCatenateOp
* catenate} ops to model the "off-cut" use of one or more such definitions. Additionally, in
* preparation for inter-block data flow analysis, when no definition is present for a varnode (or
* part of a varnode) access, this state will synthesize {@link JitPhiOp phi} ops. See
* {@link #setVar(AddressSpace, JitVal, int, boolean, JitVal) setVar} and
* {@link #getVar(AddressSpace, JitVal, int, boolean, Reason) getVar} for details.
*
* <p>
* This state only serves to analyze data flow through register and unique variables. Because we
* know these are only accessible to the thread, we stand to save much execution time by bypassing
* the {@link JitBytesPcodeExecutorState} at run time. We can accomplish this by mapping these
* variables to suitable JVM local variables. Thus, we have one map of entries for register space
* and another for unique space. Accesses to other spaces do not mutate or read from either of those
* maps, but this class may generate a suitable {@link JitVal} for the use-def graph.
*/
public class JitDataFlowState implements PcodeExecutorState<JitVal> {
/**
* A minimal data flow machine state that can be captured by a {@link JitCallOtherOpIf}.
*/
public class MiniDFState {
private final NavigableMap<Long, JitVal> uniqMap;
private final NavigableMap<Long, JitVal> regMap;
MiniDFState() {
this(new TreeMap<>(), new TreeMap<>());
}
MiniDFState(NavigableMap<Long, JitVal> uniqMap, NavigableMap<Long, JitVal> regMap) {
this.uniqMap = uniqMap;
this.regMap = regMap;
}
NavigableMap<Long, JitVal> mapFor(AddressSpace space) {
if (space.isUniqueSpace()) {
return uniqMap;
}
if (space.isRegisterSpace()) {
return regMap;
}
return null;
}
/**
* Compute the upper (exclusive) offset of a given definition entry
*
* @param entry the entry
* @return the upper offset, exclusive
*/
protected static long endOf(Entry<Long, JitVal> entry) {
return entry.getKey() + entry.getValue().size();
}
/**
* Clear all definition entries in the given per-space map for the given varnode
*
* <p>
* Any entries completely covered by the given varnode (including an exact match) are
* removed from the map. Those partially covered will be replaced by subpieces of their
* former selves such that no part within the cleared varnode remains defined.
*
* @param map the map to modify
* @param varnode the varnode whose definitions to remove
*/
protected void doClear(NavigableMap<Long, JitVal> map, Varnode varnode) {
AddressSpace space = varnode.getAddress().getAddressSpace();
long offset = varnode.getOffset();
int size = varnode.getSize();
Entry<Long, JitVal> truncLeftEntry = map.lowerEntry(offset);
if (truncLeftEntry != null && endOf(truncLeftEntry) <= offset) {
truncLeftEntry = null;
}
/**
* Collect entry at both ends before removal, in case the clear is cutting a hole in the
* middle of one entry. I.e., could be the same entry at both ends.
*/
long end = offset + size;
Entry<Long, JitVal> truncRightEntry = map.lowerEntry(end);
if (truncRightEntry != null && endOf(truncRightEntry) <= end) {
truncRightEntry = null;
}
/**
* Replace the right entry first. If it's the same entry as the left, and we remove by
* key, then we might remove the replacement on the left, if it were done first.
*/
if (truncRightEntry != null) {
long entStart = truncRightEntry.getKey();
map.remove(entStart);
int shave = (int) (endOf(truncRightEntry) - end);
JitVal entVal = truncRightEntry.getValue();
Varnode truncVn = new Varnode(space.getAddress(entStart), entVal.size());
JitVal truncVal = arithmetic.truncFromLeft(truncVn, shave, entVal);
map.put(end, truncVal);
}
if (truncLeftEntry != null) {
long entStart = truncLeftEntry.getKey();
map.remove(entStart);
int shave = (int) (endOf(truncLeftEntry) - offset);
JitVal entVal = truncLeftEntry.getValue();
Varnode truncVn = new Varnode(space.getAddress(entStart), entVal.size());
JitVal truncVal = arithmetic.truncFromRight(truncVn, shave, entVal);
map.put(truncLeftEntry.getKey(), truncVal);
}
/**
* At this point, no part of the ends should be in the key range [start,end), so clear
* that submap
*/
map.subMap(offset, end).clear();
}
/**
* The implementation of {@link #set(Varnode, JitVal)} for a given address space
*
* @param map the map to modify for the given space
* @param varnode the varnode whose value to define
* @param val the varnode's new definition
*/
protected void doSet(NavigableMap<Long, JitVal> map, Varnode varnode, JitVal val) {
doClear(map, varnode);
if (val instanceof JitOutVar out) {
if (out.definition() instanceof JitCatenateOp cat) {
int cursor = 0;
for (JitVal part : cat.iterParts(language.isBigEndian())) {
/**
* NOTE: Do not filter phi nodes here. Perhaps if we're certain its for the
* same varnode we could, but not sure there's any benefit to doing so.
* TODO: Determine whether there's any benefit. NOTE: While the phi nodes
* are linked after the fact, they are generated (but empty) during p-code
* interpretation.
*/
map.put(varnode.getOffset() + cursor, part);
cursor += part.size();
}
/**
* Can't necessarily unlink cat here. Something else may use it. May need to
* prune afterward.
*/
return;
}
}
map.put(varnode.getOffset(), val);
}
/**
* Set one or more definition entries in the given map for the given varnode to the given
* value
*
* <p>
* Ordinary, this just sets the one varnode to the given value; however, if the given value
* is the output of a {@link JitCatenateOp catenation}, then each input part is entered into
* the map separately, and the synthetic catenation dropped. The behavior avoids nested
* catenations.
*
* @param varnode the varnode
* @param val the value
*/
public void set(Varnode varnode, JitVal val) {
var map = mapFor(varnode.getAddress().getAddressSpace());
if (map == null) {
return;
}
doSet(map, varnode, val);
}
/**
* The implementation of {@link #getDefinitions(AddressSpace, long, int)} for a given
* address space
*
* @param map the map of values for the given space
* @param space the address space
* @param offset the offset within the space
* @param size the size of the varnode
* @return the list of values
*/
protected List<JitVal> doGetDefinitions(NavigableMap<Long, JitVal> map, AddressSpace space,
long offset, int size) {
List<JitVal> result = new ArrayList<>();
Entry<Long, JitVal> preEntry = map.lowerEntry(offset);
long cursor = offset;
if (preEntry != null) {
if (endOf(preEntry) > offset) {
JitVal preVal = preEntry.getValue();
Varnode preVn = new Varnode(space.getAddress(preEntry.getKey()), preVal.size());
int shave = (int) (offset - preEntry.getKey());
JitVal truncVal = arithmetic.truncFromLeft(preVn, shave, preVal);
cursor = endOf(preEntry);
result.add(truncVal);
}
}
long end = offset + size;
for (Entry<Long, JitVal> entry : map.subMap(offset, end).entrySet()) {
if (entry.getKey() > cursor) {
result.add(new JitMissingVar(
new Varnode(space.getAddress(cursor), (int) (entry.getKey() - cursor))));
}
if (endOf(entry) > end) {
JitVal postVal = entry.getValue();
Varnode postVn = new Varnode(space.getAddress(entry.getKey()), postVal.size());
int shave = (int) (endOf(entry) - end);
JitVal truncVal = arithmetic.truncFromRight(postVn, shave, postVal);
cursor = end;
result.add(truncVal);
break;
}
result.add(entry.getValue());
cursor = endOf(entry);
}
if (end > cursor) {
result.add(
new JitMissingVar(new Varnode(space.getAddress(cursor), (int) (end - cursor))));
}
assert !result.isEmpty();
return result;
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* <p>
* In the simplest case, the list consists of exactly one SSA variable whose varnode exactly
* matches that requested. In other cases, e.g., when only a subregister is defined, the
* list may have several entries, some of which may be {@link JitMissingVar missing}.
*
* <p>
* The list is ordered according to machine endianness. That is for little endian, the
* values are ordered from least to most significant parts of the varnode defined. This is
* congruent with how {@link JitDataFlowArithmetic#catenate(Varnode, List)} expects parts to
* be listed.
*
* @param space the address space of the varnode
* @param offset the offset of the varnode
* @param size the size in bytes of the varnode
* @return the list of values
*/
public List<JitVal> getDefinitions(AddressSpace space, long offset, int size) {
var map = mapFor(space);
if (map == null) {
throw new AssertionError("What is this space?: " + space);
}
return doGetDefinitions(map, space, offset, size);
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see #getDefinitions(AddressSpace, long, int)
* @param varnode the varnode whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Varnode varnode) {
AddressSpace space = varnode.getAddress().getAddressSpace();
return getDefinitions(space, varnode.getOffset(), varnode.getSize());
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see #getDefinitions(AddressSpace, long, int)
* @param register the register whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Register register) {
return getDefinitions(register.getAddressSpace(), register.getOffset(),
register.getNumBytes());
}
/**
* Replace missing variables with phi nodes, mutating the given list in place
*
* @param defs the definitions
* @return the same list, modified
*/
protected List<JitVal> generatePhis(List<JitVal> defs, Collection<JitPhiOp> phiQueue) {
int n = defs.size();
for (int i = 0; i < n; i++) {
JitVal v = defs.get(i);
if (v instanceof JitMissingVar missing) {
JitPhiOp phi = missing.generatePhi(dfm, block);
if (phiQueue != null) {
phiQueue.add(phi);
}
defs.set(i, phi.out());
set(missing.varnode(), phi.out());
}
}
return defs;
}
/**
* Get the value of the given varnode
*
* <p>
* This is the implementation of
* {@link JitDataFlowState#getVar(AddressSpace, JitVal, int, boolean, Reason)}, but only for
* uniques and registers.
*
* @param varnode the varnode
* @return the value
*/
public JitVal getVar(Varnode varnode) {
List<JitVal> defs = generatePhis(getDefinitions(varnode), null);
if (defs.size() == 1) {
return defs.get(0);
}
return arithmetic.catenate(varnode, defs);
}
/**
* Copy this mini state
*
* @return the copy
*/
public MiniDFState copy() {
return new MiniDFState(new TreeMap<>(uniqMap), new TreeMap<>(regMap));
}
}
private final JitDataFlowModel dfm;
private final JitBlock block;
private final Language language;
private final JitDataFlowArithmetic arithmetic;
private final MiniDFState mini = new MiniDFState();
private final Set<Varnode> varnodesRead = new HashSet<>();
private final Set<Varnode> varnodesWritten = new HashSet<>();
/**
* Construct a state
*
* @param context the analysis context
* @param dfm the data flow model whose use-def graph to populate
* @param block the block being analyzed (to which generated phi ops belong)
*/
JitDataFlowState(JitAnalysisContext context, JitDataFlowModel dfm, JitBlock block) {
this.dfm = dfm;
this.block = block;
this.language = context.getLanguage();
this.arithmetic = dfm.getArithmetic();
}
@Override
public Language getLanguage() {
return language;
}
@Override
public JitDataFlowArithmetic getArithmetic() {
return arithmetic;
}
/**
* {@inheritDoc}
*
* <p>
* This and {@link #getVar(AddressSpace, JitVal, int, boolean, Reason)} are where we connect the
* interpretation to the maps of definitions we keep in this state. We examine the varnode's
* type first. We can't write to a constant, and that shouldn't be allowed anyway, so we warn if
* we observe that. We'll ignore any indirect writes, because those are denoted by
* {@link JitStoreOp store} ops. We also don't do much here with direct writes. The writes to
* such variables are handled by {@link JitMemoryOutVar}. Such output variables are actually
* passed in as {@code val} here, but need only be stored in a map if they are register or
* unique variables.
*/
@Override
public void setVar(AddressSpace space, JitVal offset, int size, boolean quantize,
JitVal val) {
/**
* We use this only to log possible storage bypasses. All uniques will be bypassed.
* Registers must be written, but it is safe to bypass subsequent loads. Actually, perhaps
* with a pre-load of register values and a try-finally to write them, we can optimize
* register access, too. Might also make sense to do that for uniques, just for debugging
* purposes.
*
* Memory must be written. Unless we can determine for sure the memory is non-volatile, we
* must presume volatile, so no bypassing is allowed. TODO: We might consider assuming
* stack-based accesses are non-volatile, but I'm not sure that is appropriate either.
* Technically one thread may launch another, providing a ref to a stack variable it knows
* will live for the entire thread's life.
*/
if (space.isConstantSpace()) {
Msg.warn(this, "Witnessed write to constant space! Ignoring.");
return;
}
if (!(offset instanceof JitConstVal c)) {
// Don't attempt bypass for any indirect memory access
return;
}
// NB. There should never be need to quantize in regs or uniqs.
Varnode varnode = new Varnode(space.getAddress(c.value().longValue()), size);
varnodesWritten.add(varnode);
mini.set(varnode, val);
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see MiniDFState#getDefinitions(AddressSpace, long, int)
* @param varnode the varnode whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Varnode varnode) {
return mini.getDefinitions(varnode);
}
/**
* Get an ordered list of all values involved in the latest definition of the given varnode.
*
* @see MiniDFState#getDefinitions(AddressSpace, long, int)
* @param register the register whose definitions to retrieve
* @return the list of values
*/
public List<JitVal> getDefinitions(Register register) {
return mini.getDefinitions(register);
}
/**
* Replace missing variables with phi nodes, mutating the given list in place
*
* @param defs the definitions
* @return the same list, modified
*/
List<JitVal> generatePhis(List<JitVal> defs, SequencedSet<JitPhiOp> phiQueue) {
return mini.generatePhis(defs, phiQueue);
}
/**
* {@inheritDoc}
*
* <p>
* This and {@link #setVar(AddressSpace, JitVal, int, boolean, JitVal)} are where we connect the
* interpretation to the maps of definitions we keep in this state. We examine the varnode's
* type first. If it's a constant or memory variable, it just returns the appropriate
* {@link JitConstVal}, {@link JitDirectMemoryVar}, or {@link JitIndirectMemoryVar}. If it's a
* register or unique, then we retrieve the latest definition(s) as in
* {@link MiniDFState#getDefinitions(AddressSpace, long, int)}. In the simple case of an exact
* definition, we return it. Otherwise, this synthesizes the appropriate op(s), enters them into
* the use-def graph, and returns the final output.
*/
@Override
public JitVal getVar(AddressSpace space, JitVal offset, int size, boolean quantize,
Reason reason) {
if (space.isConstantSpace()) {
if (!(offset instanceof JitConstVal c)) {
throw new AssertionError("Non-constant constant?");
}
if (c.size() == size) {
return offset;
}
return new JitConstVal(size, c.value());
}
if (space.isMemorySpace()) {
if (offset instanceof JitConstVal c) {
Varnode vn = new Varnode(space.getAddress(c.value().longValue()), size);
return dfm.generateDirectMemoryVar(vn);
}
return dfm.generateIndirectMemoryVar(space, offset, size, quantize);
}
if (!(offset instanceof JitConstVal c)) {
throw new AssertionError("Indirect non-memory access?");
}
Varnode varnode = new Varnode(space.getAddress(c.value().longValue()), size);
varnodesRead.add(varnode);
return mini.getVar(varnode);
}
@Override
public Map<Register, JitVal> getRegisterValues() {
throw new UnsupportedOperationException();
}
@Override
public MemBuffer getConcreteBuffer(Address address, Purpose purpose) {
throw new UnsupportedOperationException();
}
@Override
public void clear() {
throw new UnsupportedOperationException();
}
@Override
public PcodeExecutorState<JitVal> fork() {
throw new UnsupportedOperationException();
}
/**
* Get a complete catalog of all varnodes read, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesRead() {
return varnodesRead;
}
/**
* Get a complete catalog of all varnodes written, including overlapping, subregs, etc.
*
* @return the set of varnodes
*/
public Set<Varnode> getVarnodesWritten() {
return varnodesWritten;
}
/**
* Capture the current state of intra-block analysis.
*
* <p>
* This may be required for follow-up op-use analysis by a {@link JitCallOtherOpIf} invoked
* using the standard strategy. All live varnodes <em>at the time of the call</em> must be
* considered used.
*
* @return the captured state
*/
public MiniDFState captureState() {
return mini.copy();
}
}

View file

@ -0,0 +1,275 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.lang.reflect.Method;
import java.lang.reflect.Parameter;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.decode.DecoderUseropLibrary;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitOutVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.AnnotatedPcodeUseropLibrary.PcodeUserop;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* A wrapper around a userop library that places {@link PcodeOp#CALLOTHER callother} ops into the
* use-def graph
*
* <p>
* This is the library provided to
* {@link JitDataFlowExecutor#execute(PcodeProgram, PcodeUseropLibrary)} to cooperate with in the
* population of the use-def graph. The Sleigh compiler is very permissive when it comes to userop
* invocations. Notably, there's no way to declare the "prototype" or "signature" of the userop.
* Invocations can have any number of input operands and an optional output operand. Because the
* use-def graph takes careful notice of variables and their definiting ops, there are two possible
* op nodes: {@link JitCallOtherOp} when no output operand is given and {@link JitCallOtherDefOp}
* when an output operand is given.
*
* <p>
* We employ several different strategies to handle a p-code userop:
*
* <ul>
* <li><b>Standard</b>: Invocation of the userop in the same fashion as the interpreted p-code
* emulator. Any live variables have to be written into the {@link JitBytesPcodeExecutorState state}
* before the invocation and the read back out afterward. If the userop accesses the state directly,
* we must use this strategy. Most userops whose implementations precede the introduction of JIT
* acceleration can be supported with this strategy, so long as they don't manipulate the
* emulator/executor directly is some unsupported way.</li>
* <li><b>Inlining</b>: The inclusion of the userop's p-code directly at its call site, replacing
* the {@link PcodeOp#CALLOTHER} op. This is implemented in the decoder by
* {@link DecoderUseropLibrary}. This strategy is only applicable to userops defined using Sleigh
* and/or p-code.</li>
* <li><b>Direct</b>: The direct invocation of the userop's defining Java method in the generated
* JVM bytecode. This is applicable when the method's parameters and return type are primitives that
* each map to a {@link JitTypeBehavior}. The input values can be passed directly in, which works
* well when the inputs are registers or uniques allocated in JVM locals. The return value can be
* handled similarly.</li>
* </ul>
*
* <p>
* The default strategy for all userops is Standard. Implementors should set the attributes of
* {@link PcodeUserop} and adjust the parameters of the userop's method accordingly. To allow
* inlining, set {@link PcodeUserop#canInline() canInline}. To allow direct invocation, set
* {@link PcodeUserop#functional()} and ensure all the parameter types and return type are
* supported. Supported types include primitives other than {@code char}. The return type may be
* {@code void}. No matter the strategy, userops may be subject to removal by the
* {@link JitOpUseModel}. To permit removal, clear {@link PcodeUserop#hasSideEffects()}. The default
* prevents removal. For the inline strategy, each op from the inlined userop is analyzed
* separately, so the userop could be partially culled. An inlined userop cannot have side effects,
* and so the attribute is ignored.
*/
public class JitDataFlowUseropLibrary implements PcodeUseropLibrary<JitVal> {
/**
* The wrapper of a specific userop definition
*/
protected class WrappedUseropDefinition implements PcodeUseropDefinition<JitVal> {
private final PcodeUseropDefinition<Object> decOp;
public WrappedUseropDefinition(PcodeUseropDefinition<Object> decOp) {
this.decOp = decOp;
}
@Override
public String getName() {
return decOp.getName();
}
@Override
public int getInputCount() {
return decOp.getInputCount();
}
@Override
public void execute(PcodeExecutor<JitVal> executor, PcodeUseropLibrary<JitVal> library,
Varnode outVar, List<Varnode> inVars) {
throw new AssertionError();
}
/**
* If the number of arguments matches the userop's Java method, map each argument value to
* the type behavior for its corresponding parameter.
*
* <p>
* This is used by the {@link JitTypeModel} to assign types to JVM locals in order to reduce
* the number of type casts. In the case of direct invocation, this enters type information
* from the userop's Java definition into the analysis.
*
* <p>
* If the parameter count doesn't match, we just map the arguments to
* {@link JitTypeBehavior#ANY} and let the error surface at run time. We need not throw the
* exception until/unless the invocation is actually executed. Similarly, if any parameter's
* type is not supported, or the userop cannot be invoked directly, we just map all
* arguments to {@link JitTypeBehavior#ANY}, because the generator will apply standard
* invocation, which does not benefit from type analysis.
*
* @param inVals the input arguments
* @return the map from argument value (SSA variable) to parameter type behavior
*/
private List<JitTypeBehavior> getInputTypes(List<JitVal> inVals) {
int inputCount = getInputCount();
if (inputCount != inVals.size()) { // includes inputCount == -1 (variadic)
return JitDataFlowModel.allAny(inVals);
}
Method method = decOp.getJavaMethod();
if (method == null) {
return JitDataFlowModel.allAny(inVals);
}
List<JitTypeBehavior> result = new ArrayList<>();
Parameter[] parameters = method.getParameters();
for (int i = 0; i < inVals.size(); i++) {
Parameter p = parameters[i];
JitTypeBehavior type = JitTypeBehavior.forJavaType(p.getType());
if (type == null) {
return JitDataFlowModel.allAny(inVals);
}
result.add(type);
}
return Collections.unmodifiableList(result);
}
/**
* Get the type behavior from the userop's Java method
*
* <p>
* If the userop is not backed by a Java method, or its return type is not supported, this
* return {@link JitTypeBehavior#ANY}.
*
* @return the type behavior
*/
private JitTypeBehavior getReturnType() {
Method method = decOp.getJavaMethod();
if (method == null) {
return JitTypeBehavior.ANY;
}
return JitTypeBehavior.forJavaType(method.getReturnType());
}
/**
* {@inheritDoc}
*
* <p>
* This "execution" is part of the intra-block analysis. This is the analytic interpretation
* of the invocation, not the actual run time invocation. This derives type information
* about the userop from the Java method and selects the approparite {@link JitCallOtherOpIf
* callother} op to enter into the use-def graph. If an output operand is given, then this
* generates an output notes defined by a {@lnk JitCallOtherDefOp}. Otherwise, it generates
* a (sink) {@link JitCallOtherOp}.
*
* @implNote When inlining a userop, the decoder leaves the original callother op in place.
* This is for branch bookkeeping. Thus, we ask the decoder-wrapped version of the
* userop if it was inlined. If so, we enter a {@link JitNopOp nop} node into the
* use-def graph. The node will still contain the original callother op, but the
* generator will not emit any code.
* @implNote <b>TODO</b>: Maybe float types shouldn't be size cast as ints and then bitcast
* to the requested type. Either that, or we need to develop an overloading system
* for userops, or to require the user to be very careful about which to invoke
* for what (float) operand sizes. <b>TODO</b>: I don't know what the actual
* behavior is here. We should add test cases for this.
* @implNote <b>TODO</b>: I think userop libraries may need to be able to hook this point.
* Not sure to what extent we should allow them control of code generation. But
* consider a syscall library. It might like to try to concretize, e.g., RAX, and
* just hard code the invoked userop in the generated code.
*/
@Override
public void execute(PcodeExecutor<JitVal> executor, PcodeUseropLibrary<JitVal> library,
PcodeOp op) {
if (decOp.canInlinePcode()) {
dfm.notifyOp(new JitNopOp(op));
return;
}
JitDataFlowState state = (JitDataFlowState) executor.getState();
List<JitVal> inVals = Stream.of(op.getInputs())
.skip(1)
.map(inVn -> state.getVar(inVn, executor.getReason()))
.toList();
List<JitTypeBehavior> inTypes = getInputTypes(inVals);
Varnode outVn = op.getOutput();
if (outVn == null) {
dfm.notifyOp(new JitCallOtherOp(op, decOp, inVals, inTypes, state.captureState()));
}
else {
JitOutVar out = dfm.generateOutVar(outVn);
dfm.notifyOp(new JitCallOtherDefOp(op, out, getReturnType(), decOp, inVals, inTypes,
state.captureState()));
state.setVar(outVn, out);
}
}
@Override
public boolean isFunctional() {
return decOp.isFunctional();
}
@Override
public boolean hasSideEffects() {
return decOp.hasSideEffects();
}
@Override
public boolean canInlinePcode() {
return decOp.canInlinePcode();
}
@Override
public Method getJavaMethod() {
return decOp.getJavaMethod();
}
@Override
public PcodeUseropLibrary<?> getDefiningLibrary() {
return decOp.getDefiningLibrary();
}
}
private final JitDataFlowModel dfm;
private final Map<String, PcodeUseropDefinition<JitVal>> userops;
/**
* Construct a wrapper library
*
* @param context the context from which the decoder's userop wrapper library is retrieved
* @param dfm the data flow model whose use-def graph to populate.
* @implNote Each time this is constructed, it has to traverse the wrapped userop library and
* create a wrapper for each individual userop. For a large library, this could get
* expensive, and it currently must happen for every passage compiled. Part of the
* cause for this requirement is the reference to the data flow mode used by each
* userop wrapper.
*/
public JitDataFlowUseropLibrary(JitAnalysisContext context, JitDataFlowModel dfm) {
this.dfm = dfm;
this.userops = context.getPassage()
.getDecodeLibrary()
.getUserops()
.values()
.stream()
.map(WrappedUseropDefinition::new)
.collect(Collectors.toUnmodifiableMap(d -> d.getName(), d -> d));
}
@Override
public Map<String, PcodeUseropDefinition<JitVal>> getUserops() {
return userops;
}
}

View file

@ -0,0 +1,95 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitOutVar;
import ghidra.pcode.emu.jit.var.JitVal;
/**
* A visitor that traverses the use-def graph upward, that is from uses toward definitions
*/
public interface JitOpUpwardVisitor extends JitOpVisitor {
@Override
default void visitUnOp(JitUnOp op) {
visitVal(op.u());
}
@Override
default void visitBinOp(JitBinOp op) {
visitVal(op.l());
visitVal(op.r());
}
@Override
default void visitStoreOp(JitStoreOp op) {
visitVal(op.offset());
visitVal(op.value());
}
@Override
default void visitLoadOp(JitLoadOp op) {
visitVal(op.offset());
}
@Override
default void visitCallOtherOp(JitCallOtherOp otherOp) {
for (JitVal v : otherOp.args()) {
visitVal(v);
}
}
@Override
default void visitCallOtherDefOp(JitCallOtherDefOp otherOp) {
for (JitVal v : otherOp.args()) {
visitVal(v);
}
}
@Override
default void visitCatenateOp(JitCatenateOp op) {
for (JitVal p : op.parts()) {
visitVal(p);
}
}
@Override
default void visitPhiOp(JitPhiOp op) {
for (JitVal opt : op.options().values()) {
visitVal(opt);
}
}
@Override
default void visitSubPieceOp(JitSynthSubPieceOp op) {
visitVal(op.v());
}
@Override
default void visitCBranchOp(JitCBranchOp op) {
visitVal(op.cond());
}
@Override
default void visitBranchIndOp(JitBranchIndOp op) {
visitVal(op.target());
}
@Override
default void visitOutVar(JitOutVar v) {
visitOp(v.definition());
}
}

View file

@ -0,0 +1,333 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockFlow;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState.MiniDFState;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitMissingVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.AnnotatedPcodeUseropLibrary.PcodeUserop;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The operator output use analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Operation Elimination phase of the {@link JitCompiler} using a simple graph
* traversal. The result is the set of {@link JitOp ops} whose outputs are (or could be) used by a
* downstream op. This includes all "sink" ops and all ops on which they depend.
*
* <p>
* Some of the sink ops are easy to identify. These are ops that have direct effects on memory,
* control flow, or other aspects of the emulated machine:
*
* <ul>
* <li><b>Memory outputs</b> - any p-code op whose output operand is a memory varnode.</li>
* <li><b>Store ops</b> - a {@link JitStoreOp store} op.</li>
* <li><b>Branch ops</b> - one of {@link JitBranchOp branch}, {@link JitCBranchOp cbranch}, or
* {@link JitBranchIndOp branchind}.</li>
* <li><b>User ops with side effects</b> - a {@link JitCallOtherOpIf callother} to a method where
* {@link PcodeUserop#hasSideEffects() hasSideEffects}{@code =true}.</li>
* <li><b>Errors</b> - e.g., {@link JitUnimplementedOp unimplemented}, {@link JitCallOtherMissingOp
* missing userop}.</li>
* </ul>
*
* <p>
* We identify these ops by invoking {@link JitOp#canBeRemoved()}. Ops that return {@code false} are
* "sink" ops.
*
* <p>
* There is another class of ops to consider as "sinks," though: The definitions of SSA variables
* that could be retired. This could be from exiting the passage, flowing to a block with fewer live
* variables, or invoking a userop with the Standard strategy (see
* {@link JitDataFlowUseropLibrary}). Luckily, we have already performed {@link JitVarScopeModel
* scope} analysis, so we already know what varnodes are retired. However, to determine what SSA
* variables are retired, we have to consider where the retirement happens. For block transitions,
* it is always at the end of the block. Thus, we can use
* {@link JitDataFlowBlockAnalyzer#getVar(Varnode)}. For userops, we capture the intra-block
* analysis state into {@link JitCallOtherOpIf#dfState()} <em>at the time of invocation</em>. We can
* then use {@link MiniDFState#getVar(Varnode)}. The defining op for each retired SSA variable is
* considered used.
*
* <p>
* Retirement due to block flow requires a little more attention. Consider an op that defines a
* variable, where that op exists in a block that ends with a conditional branch. The analyzer does
* not know which flow the code will take, so we have to consider that it could take either. If for
* either branch, the variable goes out of scope and is retired, we have to consider the defining op
* as used.
*
* <p>
* The remainder of the algorithm is simply an upward traversal of the use-def graph to collect all
* of the sink ops' dependencies. All the dependencies are considered used.
*
* @implNote The {@link JitOpUpwardVisitor} permits seeding of values (constants and variables) and
* ops. Thus, we seed using the non-{@link JitOp#canBeRemoved() removable} ops, and the
* retireable SSA variables. We do not have to get the variables' defining ops, since the
* visitor will do that for us.
*/
public class JitOpUseModel {
private final JitAnalysisContext context;
private final JitControlFlowModel cfm;
private final JitDataFlowModel dfm;
private final JitVarScopeModel vsm;
private final Set<JitOp> used = new HashSet<>();
/**
* Construct the operator use model
*
* @param context the analysis context
* @param cfm the control flow model
* @param dfm the data flow model
* @param vsm the variable scope model
*/
public JitOpUseModel(JitAnalysisContext context, JitControlFlowModel cfm,
JitDataFlowModel dfm, JitVarScopeModel vsm) {
this.context = context;
this.cfm = cfm;
this.dfm = dfm;
this.vsm = vsm;
if (context.getConfiguration().removeUnusedOperations()) {
analyze();
}
}
/**
* The implementation of the graph traversal
*
* <p>
* This implements the use-def upward visitor to collect the dependencies of ops and variables
* identified elsewhere in the code. By calling {@link #visitOp(JitOp)},
* {@link #visitVal(JitVal)}, etc., all used ops are collected into {@link JitOpUseModel#used}.
*/
class OpUseCollector implements JitOpUpwardVisitor {
final JitBlock block;
final JitDataFlowBlockAnalyzer analyzer;
/**
* Construct a collector for the given block
*
* @param block the block whose ops are being examined
*/
public OpUseCollector(JitBlock block) {
this.block = block;
this.analyzer = dfm.getAnalyzer(block);
}
@Override
public void visitOp(JitOp op) {
if (!used.add(op)) {
return;
}
JitOpUpwardVisitor.super.visitOp(op);
}
@Override
public void visitMissingVar(JitMissingVar missingVar) {
throw new AssertionError("missing: " + missingVar);
}
/**
* Visit a varnode that could be retired upon exiting a block
*
* <p>
* This applies whether exiting the passage altogether or just flowing to another block. It
* will find all definitions (including just-generated phi nodes) and visit them.
*
* @param vn the retireable varnode
*/
void visitRetireable(Varnode vn) {
for (JitVal val : analyzer.getOutput(vn)) {
visitVal(val);
}
}
/**
* Visit a varnode that will be retired before calling a userop
*
* <p>
* This applies only when the userop is invoked using the Standard strategy.
*
* @see JitDataFlowUseropLibrary
* @param vn the retired varnode
* @param callother the callother op
*/
void visitCallOtherRetireable(Varnode vn, JitCallOtherOpIf callother) {
for (JitVal val : callother.dfState().getDefinitions(vn)) {
visitVal(val);
}
}
}
/**
* Get the varnodes that will be retired before the given callother
*
* @param block the block containing the callother
* @param op the callother op
* @return the block's live varnodes, or empty, depending on the callother invocation strategy.
*/
private Set<Varnode> getCallOtherRetireVarnodes(JitBlock block, JitCallOtherOpIf op) {
// Should not see inline-replaced ops here
if (op.userop().isFunctional()) {
return Set.of();
}
return vsm.getLiveVars(block);
}
/**
* Get the varnodes that could be retired upon leaving this block
*
* <p>
* If the block has an {@link JitBlock#branchesOut() exit} branch, then all live varnodes could
* be retired. The result is the union of retired varnodes among each flow
* {@link JitBlock#flowsFrom() from} the block. Note that every block must have a means of
* leaving, i.e., {@link JitBlock#branchesOut()} and {@link JitBlock#flowsFrom()} cannot both be
* empty.
*
* @implNote Because retired varnodes are the difference in live varnodes, we can optimize the
* set computation by taking the intersection of live varnodes among all flow
* destinations and subtracting it from the live varnodes of this block.
*
* @param block the block to examine
* @return the set of varnodes that could be retired
*/
private Set<Varnode> getCouldRetireVarnodes(JitBlock block) {
if (!block.branchesOut().isEmpty()) {
return vsm.getLiveVars(block);
}
if (block.flowsFrom().isEmpty()) {
throw new AssertionError();
// or just return Set.of()?
}
Set<Varnode> aliveAfterAnyFlow =
new HashSet<>(vsm.getLiveVars(block.flowsFrom().values().iterator().next().to()));
for (BlockFlow flow : block.flowsFrom().values()) {
aliveAfterAnyFlow.retainAll(vsm.getLiveVars(flow.to()));
}
Set<Varnode> result = new HashSet<>(vsm.getLiveVars(block));
result.removeAll(aliveAfterAnyFlow);
return result;
}
/**
* Perform the analysis
*
* <p>
* This first backfills any missing phi nodes that might not have been considered during data
* flow analysis. Then, it collects all the sinks and invokes the traversal on them. Note that
* we can end traversal any time we encounter an op that we have already marked as used, because
* we will already have marked its dependencies, too. The visit order does not matter, so we
* just iterate over the blocks and ops, marking things as we encounter them.
*/
private void analyze() {
/**
* I want every value that could get written back out to the state, either because it's
* retired, or because the output operand is memory. I also need inputs to branches or to
* callother's, since those may have side effects depending on those inputs.
*/
Set<JitPhiOp> phisBefore = Set.copyOf(dfm.phiNodes());
for (JitBlock block : cfm.getBlocks()) {
for (PcodeOp op : block.getCode()) {
if (dfm.getJitOp(op) instanceof JitCallOtherOpIf callother) {
for (Varnode vn : getCallOtherRetireVarnodes(block, callother)) {
// We only want the side effect: Adds needed phi.
callother.dfState().getVar(vn); // Visit is later
}
}
}
for (Varnode vn : getCouldRetireVarnodes(block)) {
JitDataFlowBlockAnalyzer analyzer = dfm.getAnalyzer(block);
analyzer.getVar(vn); // Visit is later
}
}
Set<JitPhiOp> extraPhis = new LinkedHashSet<>(dfm.phiNodes());
extraPhis.removeAll(phisBefore);
dfm.analyzeInterblock(extraPhis);
for (JitBlock block : cfm.getBlocks()) {
OpUseCollector collector = new OpUseCollector(block);
// Locate memory outputs, stores, branches, callothers
for (PcodeOp op : block.getCode()) {
JitOp jitOp = dfm.getJitOp(op);
if (jitOp instanceof JitCallOtherOpIf callotherOp) {
for (Varnode vn : getCallOtherRetireVarnodes(block, callotherOp)) {
collector.visitCallOtherRetireable(vn, callotherOp);
}
}
if (!jitOp.canBeRemoved()) {
collector.visitOp(jitOp);
}
}
// Compute retire-able variables
for (Varnode vn : getCouldRetireVarnodes(block)) {
collector.visitRetireable(vn);
}
}
}
/**
* Check whether the given op node is used.
*
* <p>
* If the op is used, then it cannot be eliminated.
*
* @param op the op to check
* @return true if used, i.e., non-removable
*/
public boolean isUsed(JitOp op) {
if (context.getConfiguration().removeUnusedOperations()) {
return used.contains(op);
}
return true;
}
/**
* For diagnostics: Dump the analysis result to stderr
*
* @see Diag#PRINT_OUM
*/
public void dumpResult() {
System.err.println("STAGE: OpUse");
for (JitBlock block : cfm.getBlocks()) {
JitDataFlowBlockAnalyzer analyzer = dfm.getAnalyzer(block);
System.err.println(" Block: " + block);
for (Varnode vn : getCouldRetireVarnodes(block)) {
for (JitVal val : analyzer.getOutput(vn)) {
System.err.println(" Could retire: " + val);
}
}
for (PcodeOp op : block.getCode()) {
JitOp jitOp = dfm.getJitOp(op);
if (!isUsed(jitOp)) {
System.err.println(" Removed: %s: %s".formatted(op.getSeqnum(), jitOp));
}
}
}
}
}

View file

@ -0,0 +1,270 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.*;
/**
* A visitor for traversing the use-def graph
*
* <p>
* The default implementations here do nothing other than discern the type of an op and variable and
* dispatch the invocations appropriately. To traverse the graph upward, consider
* {@link JitOpUpwardVisitor}. Note no "downward" visitor is currently provided, because it was not
* needed.
*/
public interface JitOpVisitor {
/**
* Visit an op node
*
* <p>
* The default implementation dispatches this to the type-specific {@code visit} method.
*
* @param op the op visited
*/
default void visitOp(JitOp op) {
switch (op) {
case null -> throw new NullPointerException("null op");
case JitUnOp unOp -> visitUnOp(unOp);
case JitBinOp binOp -> visitBinOp(binOp);
case JitStoreOp storeOp -> visitStoreOp(storeOp);
case JitLoadOp loadOp -> visitLoadOp(loadOp);
case JitCallOtherOp otherOp -> visitCallOtherOp(otherOp);
case JitCallOtherDefOp otherOp -> visitCallOtherDefOp(otherOp);
case JitCallOtherMissingOp otherOp -> visitCallOtherMissingOp(otherOp);
case JitCatenateOp catOp -> visitCatenateOp(catOp);
case JitPhiOp phiOp -> visitPhiOp(phiOp);
case JitSynthSubPieceOp pieceOp -> visitSubPieceOp(pieceOp);
case JitBranchOp branchOp -> visitBranchOp(branchOp);
case JitCBranchOp cBranchOp -> visitCBranchOp(cBranchOp);
case JitBranchIndOp branchIndOp -> visitBranchIndOp(branchIndOp);
case JitUnimplementedOp unimplOp -> visitUnimplementedOp(unimplOp);
case JitNopOp nopOp -> visitNopOp(nopOp);
default -> throw new AssertionError("Unrecognized op: " + op);
}
}
/**
* Visit a {@link JitUnOp}
*
* @param unOp the op visited
*/
default void visitUnOp(JitUnOp unOp) {
}
/**
* Visit a {@link JitBinOp}
*
* @param binOp the op visited
*/
default void visitBinOp(JitBinOp binOp) {
}
/**
* Visit a {@link JitStoreOp}
*
* @param storeOp the op visited
*/
default void visitStoreOp(JitStoreOp storeOp) {
}
/**
* Visit a {@link JitLoadOp}
*
* @param loadOp the op visited
*/
default void visitLoadOp(JitLoadOp loadOp) {
}
/**
* Visit a {@link JitCallOtherOp}
*
* @param otherOp the op visited
*/
default void visitCallOtherOp(JitCallOtherOp otherOp) {
}
/**
* Visit a {@link JitCallOtherDefOp}
*
* @param otherOp the op visited
*/
default void visitCallOtherDefOp(JitCallOtherDefOp otherOp) {
}
/**
* Visit a {@link JitCallOtherMissingOp}
*
* @param otherOp the op visited
*/
default void visitCallOtherMissingOp(JitCallOtherMissingOp otherOp) {
}
/**
* Visit a {@link JitCatenateOp}
*
* @param catOp the op visited
*/
default void visitCatenateOp(JitCatenateOp catOp) {
}
/**
* Visit a {@link JitPhiOp}
*
* @param phiOp the op visited
*/
default void visitPhiOp(JitPhiOp phiOp) {
}
/**
* Visit a {@link JitSynthSubPieceOp}
*
* @param pieceOp the op visited
*/
default void visitSubPieceOp(JitSynthSubPieceOp pieceOp) {
}
/**
* Visit a {@link JitBranchOp}
*
* @param branchOp the op visited
*/
default void visitBranchOp(JitBranchOp branchOp) {
}
/**
* Visit a {@link JitCBranchOp}
*
* @param cBranchOp the op visited
*/
default void visitCBranchOp(JitCBranchOp cBranchOp) {
}
/**
* Visit a {@link JitBranchIndOp}
*
* @param branchIndOp the op visited
*/
default void visitBranchIndOp(JitBranchIndOp branchIndOp) {
}
/**
* Visit a {@link JitUnimplementedOp}
*
* @param unimplOp the op visited
*/
default void visitUnimplementedOp(JitUnimplementedOp unimplOp) {
}
/**
* Visit a {@link JitNopOp}
*
* @param nopOp the op visited
*/
default void visitNopOp(JitNopOp nopOp) {
}
/**
* Visit a {@link JitVal}
*
* <p>
* The default implementation dispatches this to the type-specific {@code visit} method.
*
* @param v the value visited
*/
default void visitVal(JitVal v) {
switch (v) {
case JitConstVal constVal -> visitConstVal(constVal);
case JitVar jVar -> visitVar(jVar);
default -> throw new AssertionError();
}
}
/**
* Visit a {@link JitVar}
*
* <p>
* The default implementation dispatches this to the type-specific {@code visit} method.
*
* @param v the variable visited
*/
default void visitVar(JitVar v) {
switch (v) {
case JitInputVar inputVar -> visitInputVar(inputVar);
case JitMissingVar missingVar -> visitMissingVar(missingVar);
case JitOutVar outVar -> visitOutVar(outVar);
case JitDirectMemoryVar dirMemVar -> visitDirectMemoryVar(dirMemVar);
case JitIndirectMemoryVar indMemVar -> visitIndirectMemoryVar(indMemVar);
default -> throw new AssertionError();
}
}
/**
* Visit a {@link JitConstVal}
*
* @param constVal the variable visited
*/
default void visitConstVal(JitConstVal constVal) {
}
/**
* Visit a {@link JitDirectMemoryVar}
*
* @param dirMemVar the variable visited
*/
default void visitDirectMemoryVar(JitDirectMemoryVar dirMemVar) {
}
/**
* Visit a {@link JitIndirectMemoryVar}
*
* <p>
* NOTE: These should not ordinarily appear in the use-def graph. There is only the one
* {@link JitIndirectMemoryVar#INSTANCE}, and it's used as a temporary dummy. Indirect memory
* access is instead modeled by the {@link JitLoadOp}.
*
* @param indMemVar the variable visited
*/
default void visitIndirectMemoryVar(JitIndirectMemoryVar indMemVar) {
throw new AssertionError();
}
/**
* Visit a {@link JitInputVar}
*
* @param inputVar the variable visited
*/
default void visitInputVar(JitInputVar inputVar) {
}
/**
* Visit a {@link JitMissingVar}
*
* @param missingVar the variable visited
*/
default void visitMissingVar(JitMissingVar missingVar) {
}
/**
* Visit a {@link JitOutVar}
*
* @param outVar the variable visited
*/
default void visitOutVar(JitOutVar outVar) {
}
}

View file

@ -0,0 +1,529 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import static org.objectweb.asm.Opcodes.*;
import java.util.*;
import org.objectweb.asm.Opcodes;
/**
* The p-code type of an operand.
*
* <p>
* A type is an integer of floating-point value of a specific size in bytes. All values and
* variables in p-code are just bit vectors. The operators interpret those vectors according to a
* {@link JitTypeBehavior}. While types only technically belong to the operands, we also talk about
* values, variables, and varnodes being assigned types, so that we can allocate suitable JVM
* locals.
*/
public interface JitType {
/**
* Compare two types by preference. The type with the more preferred behavior then smaller size
* is preferred.
*
* @param t1 the first type
* @param t2 the second type
* @return as in {@link Comparator#compare(Object, Object)}
*/
static int compare(JitType t1, JitType t2) {
int c;
c = Integer.compare(t1.pref(), t2.pref());
if (c != 0) {
return c;
}
c = Integer.compare(t1.size(), t2.size());
if (c != 0) {
return c;
}
return 0;
}
/**
* Identify the p-code type that is exactly represented by the given JVM type.
*
* <p>
* This is used during Direct userop invocation to convert the arguments and return value.
*
* @param cls the primitive class (not boxed)
* @return the p-code type
* @see JitDataFlowUseropLibrary
*/
public static JitType forJavaType(Class<?> cls) {
if (cls == boolean.class) {
return IntJitType.I4;
}
if (cls == byte.class) {
return IntJitType.I1;
}
if (cls == short.class) {
return IntJitType.I2;
}
if (cls == int.class) {
return IntJitType.I4;
}
if (cls == long.class) {
return LongJitType.I8;
}
if (cls == float.class) {
return FloatJitType.F4;
}
if (cls == double.class) {
return DoubleJitType.F8;
}
throw new IllegalArgumentException();
}
/**
* A p-code type that can be represented in a single JVM variable.
*/
public interface SimpleJitType extends JitType {
/**
* The JVM type of the variable that can represent a p-code variable of this type
*
* @return the primitive class (not boxed)
*/
Class<?> javaType();
/**
* The JVM opcode to load a local variable of this type onto the stack
*
* @return the opcode
*/
int opcodeLoad();
/**
* The JVM opcode to store a local variable of this type from the stack
*
* @return the opcode
*/
int opcodeStore();
/**
* Re-apply the {@link JitTypeBehavior#INTEGER integer} behavior to this type
*
* <p>
* This may be slightly faster than {@code JitTypeBehavior.INTEGER.resolve(this)}, because
* each type can pick its int type directly, and integer types can just return {@code this}.
*
* @return this type as an int
*/
SimpleJitType asInt();
}
/**
* The p-code types for integers of size 1 through 4, i.e., that fit in a JVM int.
*
* @param size the size in bytes
*/
public record IntJitType(int size) implements SimpleJitType {
/** {@code int1}: a 1-byte integer */
public static final IntJitType I1 = new IntJitType(1);
/** {@code int2}: a 2-byte integer */
public static final IntJitType I2 = new IntJitType(2);
/** {@code int3}: a 3-byte integer */
public static final IntJitType I3 = new IntJitType(3);
/** {@code int4}: a 4-byte integer */
public static final IntJitType I4 = new IntJitType(4);
/**
* Get the type for an integer of the given size 1 through 4
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for any size <em>not</em> 1 through 4
*/
public static IntJitType forSize(int size) {
return switch (size) {
case 1 -> I1;
case 2 -> I2;
case 3 -> I3;
case 4 -> I4;
default -> throw new IllegalArgumentException("size:" + size);
};
}
/**
* Compact constructor to check the size
*
* @param size the size in bytes
*/
public IntJitType {
assert 0 < size && size <= Integer.BYTES;
}
@Override
public int pref() {
return 0;
}
@Override
public String nm() {
return "i";
}
@Override
public Class<?> javaType() {
return int.class;
}
@Override
public int opcodeLoad() {
return ILOAD;
}
@Override
public int opcodeStore() {
return ISTORE;
}
@Override
public IntJitType ext() {
return I4;
}
@Override
public IntJitType asInt() {
return this;
}
}
/**
* The p-code types for integers of size 5 through 8, i.e., that fit in a JVM long.
*
* @param size the size in bytes
*/
public record LongJitType(int size) implements SimpleJitType {
/** {@code int5}: a 5-byte integer */
public static final LongJitType I5 = new LongJitType(5);
/** {@code int6}: a 6-byte integer */
public static final LongJitType I6 = new LongJitType(6);
/** {@code int7}: a 7-byte integer */
public static final LongJitType I7 = new LongJitType(7);
/** {@code int8}: a 8-byte integer */
public static final LongJitType I8 = new LongJitType(8);
/**
* Get the type for an integer of the given size 5 through 8
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for any size <em>not</em> 5 through 8
*/
public static LongJitType forSize(int size) {
return switch (size) {
case 5 -> I5;
case 6 -> I6;
case 7 -> I7;
case 8 -> I8;
default -> throw new IllegalArgumentException("size:" + size);
};
}
/**
* Compact constructor to check the size
*
* @param size the size in bytes
*/
public LongJitType {
assert 0 < size && size <= Long.BYTES;
}
@Override
public int pref() {
return 1;
}
@Override
public String nm() {
return "l";
}
@Override
public Class<?> javaType() {
return long.class;
}
@Override
public int opcodeLoad() {
return LLOAD;
}
@Override
public int opcodeStore() {
return LSTORE;
}
@Override
public LongJitType ext() {
return I8;
}
@Override
public LongJitType asInt() {
return this;
}
}
/**
* The p-code type for floating-point of size 4, i.e., that fits in a JVM float.
*/
public enum FloatJitType implements SimpleJitType {
/** {@code float4}: a 4-byte float */
F4;
@Override
public int pref() {
return 2;
}
@Override
public String nm() {
return "f";
}
@Override
public int size() {
return Float.BYTES;
}
@Override
public Class<?> javaType() {
return float.class;
}
@Override
public int opcodeLoad() {
return FLOAD;
}
@Override
public int opcodeStore() {
return FSTORE;
}
@Override
public FloatJitType ext() {
return this;
}
@Override
public IntJitType asInt() {
return IntJitType.I4;
}
}
/**
* The p-code type for floating-point of size 8, i.e., that fits in a JVM double.
*/
public enum DoubleJitType implements SimpleJitType {
/** {@code float8}: a 8-byte float */
F8;
@Override
public int pref() {
return 3;
}
@Override
public String nm() {
return "d";
}
@Override
public int size() {
return Double.BYTES;
}
@Override
public Class<?> javaType() {
return double.class;
}
@Override
public int opcodeLoad() {
return DLOAD;
}
@Override
public int opcodeStore() {
return DSTORE;
}
@Override
public DoubleJitType ext() {
return this;
}
@Override
public LongJitType asInt() {
return LongJitType.I8;
}
}
/**
* <b>WIP</b>: The p-code types for integers of size 9 and greater.
*
* @param size the size in bytes
*/
public record MpIntJitType(int size) implements JitType {
private static final Map<Integer, MpIntJitType> FOR_SIZES = new HashMap<>();
/**
* Get the type for an integer of the given size 9 or greater
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for any size 8 or less
*/
public static MpIntJitType forSize(int size) {
return FOR_SIZES.computeIfAbsent(size, MpIntJitType::new);
}
@Override
public int pref() {
return 4;
}
@Override
public String nm() {
return "I";
}
/**
* The total number of JVM int variables ("legs") required to store the int
*
* @return the total number of legs
*/
public int legsAlloc() {
return (size + Integer.BYTES - 1) / Integer.BYTES;
}
/**
* The number of legs that are filled
*
* @return the number of whole legs
*/
public int legsWhole() {
return size / Integer.BYTES;
}
/**
* The number of bytes filled in the last leg, if partial
*
* @return the number of bytes in the partial leg, or 0 if all legs are whole
*/
public int partialSize() {
return size % Integer.BYTES;
}
/**
* Get the p-code type that describes the part of the variable in each leg
*
* <p>
* Each whole leg will have the type {@link IntJitType#I4}, and the partial leg, if
* applicable, will have its appropriate smaller integer type.
*
* @return the list of types, each fitting in a JVM int.
*/
public List<SimpleJitType> legTypes() {
IntJitType[] types = new IntJitType[legsAlloc()];
int i = 0;
if (partialSize() != 0) {
types[i++] = IntJitType.forSize(partialSize());
}
for (; i < legsWhole(); i++) {
types[i] = IntJitType.I4;
}
return Arrays.asList(types);
}
@Override
public MpIntJitType ext() {
return MpIntJitType.forSize(legsAlloc() * Integer.BYTES);
}
}
/**
* <b>WIP</b>: The p-code types for floats of size other than 4 and 8
*
* @param size the size in bytes
*/
public record MpFloatJitType(int size) implements JitType {
private static final Map<Integer, MpFloatJitType> FOR_SIZES = new HashMap<>();
/**
* Get the type for a float of the given size other than 4 and 8
*
* @param size the size in bytes
* @return the type
* @throws IllegalArgumentException for size 4 or 8
*/
public static MpFloatJitType forSize(int size) {
return FOR_SIZES.computeIfAbsent(size, MpFloatJitType::new);
}
@Override
public int pref() {
return 5;
}
@Override
public String nm() {
return "F";
}
@Override
public MpFloatJitType ext() {
return this;
}
}
/**
* The preference for this type. Smaller is more preferred.
*
* @return the preference
*/
public int pref();
/**
* Part of the name of a JVM local variable allocated for this type
*
* @return the "type" part of a JVM local's name
*/
public String nm();
/**
* The size of this type
*
* @return the size in bytes
*/
public int size();
/**
* Extend this p-code type to the p-code type that fills its entire host JVM type.
*
* <p>
* This is useful, e.g., when multiplying two {@link IntJitType#I3 int3} values using
* {@link Opcodes#IMUL imul} that the result might be an {@link IntJitType#I4 int4} and so may
* need additional conversion.
*
* @return the extended type
*/
JitType ext();
}

View file

@ -0,0 +1,182 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.Comparator;
import java.util.Objects;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.op.JitCopyOp;
import ghidra.pcode.emu.jit.op.JitPhiOp;
/**
* The behavior/requirement for an operand's type.
*
* @see JitTypeModel
*/
public enum JitTypeBehavior {
/**
* No type requirement or interpretation.
*/
ANY {
/**
* {@inheritDoc}
*
* <p>
* If no type is specified, we default to ints.
*/
@Override
public JitType type(int size) {
return INTEGER.type(size);
}
@Override
public JitType resolve(JitType varType) {
return varType;
}
},
/**
* The bits are interpreted as an integer.
*/
INTEGER {
@Override
public JitType type(int size) {
assert size > 0;
return switch (size) {
case 1, 2, 3, 4 -> IntJitType.forSize(size);
case 5, 6, 7, 8 -> LongJitType.forSize(size);
default -> MpIntJitType.forSize(size);
};
}
@Override
public JitType resolve(JitType varType) {
return type(varType.size());
}
},
/**
* The bits are interpreted as a floating-point value.
*/
FLOAT {
@Override
public JitType type(int size) {
return switch (size) {
case Float.BYTES -> FloatJitType.F4;
case Double.BYTES -> DoubleJitType.F8;
default -> MpFloatJitType.forSize(size);
};
}
@Override
public JitType resolve(JitType varType) {
return type(varType.size());
}
},
/**
* For {@link JitCopyOp} and {@link JitPhiOp}: No type requirement or interpretation, but there
* is an implication that the output has the same interpretation as the inputs.
*/
COPY {
@Override
public JitType type(int size) {
throw new AssertionError();
}
@Override
public JitType resolve(JitType varType) {
return ANY.resolve(varType);
}
},
;
/**
* Compare two behaviors by preference. The behavior with the smaller ordinal is preferred.
*
* @param b1 the first behavior
* @param b2 the second behavior
* @return as in {@link Comparator#compare(Object, Object)}
*/
public static int compare(JitTypeBehavior b1, JitTypeBehavior b2) {
return Objects.compare(b1, b2, JitTypeBehavior::compareTo);
}
/**
* Apply this behavior to a value of the given size to determine its type
*
* @param size the size of the value in bytes
* @return the resulting type
* @throws AssertionError if the type is not applicable, and such an invocation was not expected
*/
public abstract JitType type(int size);
/**
* Re-apply this behavior to an existing type
*
* <p>
* For {@link #ANY} and {@link #COPY} the result is the given type.
*
* @param varType the type
* @return the resulting type
*/
public abstract JitType resolve(JitType varType);
/**
* Derive the type behavior from a Java language type.
*
* <p>
* This is used on userops declared with Java primitives for parameters. To work with the
* {@link JitTypeModel}, we need to specify the type behavior of each operand. We aim to select
* behaviors such that the model allocates JVM locals whose JVM types match the userop method's
* parameters. This optimizes type conversions during Direct invocation.
*
* @param cls the primitive class (not boxed)
* @return the p-code type behavior
* @see JitDataFlowUseropLibrary
*/
public static JitTypeBehavior forJavaType(Class<?> cls) {
if (cls == byte.class) {
return INTEGER;
}
if (cls == short.class) {
return INTEGER;
}
if (cls == int.class) {
return INTEGER;
}
if (cls == long.class) {
return INTEGER;
}
if (cls == float.class) {
return FLOAT;
}
if (cls == double.class) {
return FLOAT;
}
if (cls == boolean.class) {
return INTEGER;
}
if (cls == char.class) {
return null;
}
if (cls == void.class) {
return null;
}
if (cls.isPrimitive()) {
throw new AssertionError();
}
return null;
}
}

View file

@ -0,0 +1,401 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import java.util.Map.Entry;
import org.objectweb.asm.Opcodes;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.analysis.JitType.FloatJitType;
import ghidra.pcode.emu.jit.analysis.JitType.IntJitType;
import ghidra.pcode.emu.jit.op.*;
import ghidra.pcode.emu.jit.var.JitOutVar;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.emu.jit.var.JitVal.ValUse;
import ghidra.program.model.pcode.PcodeOp;
/**
* The type analysis for JIT-accelerated emulation.
*
* <p>
* This implements the Type Assignment phase of the {@link JitCompiler} using a very basic "voting"
* algorithm. The result is an assignment of type to each {@link JitVal}. To be clear, at this
* phase, we're assigning types to variables (and constants) in the use-def graph, not varnodes.
* Later we do another bit of "voting" to determine the type of each JVM local allocated to a
* varnode. Perhaps we could be more direct, but in anticipation of future optimizations, we keep
* this analysis at the per-variable level. This is partly an artifact of exploration before
* deciding to allocate by varnode instead of by variable.
*
* <h2>Types in P-code and the JVM</h2>
* <p>
* P-code (and Sleigh) is a relatively type free language. Aside from size, variables have no type;
* they are just bit vectors. The operators are typed and cast the bits as required. This aligns
* well with most machine architectures. Registers are just bit vectors, and the instructions
* interpret them according to some type. In contrast, JVM variables have a type: {@code int},
* {@code long}, {@code float}, {@code double}, or a reference. Conversions between JVM types must
* be explicit, so we must attend to certain aspects of p-code types when consuming operands
* allocated in JVM locals. There are three aspects to consider when translating p-code types to the
* JVM: behavior, size, and signedness.
*
* <h3>Behavior: Integer vs. Float</h3>
* <p>
* The JVM has two integral types {@code int} and {@code long} of 4 and 8 bytes respectively. P-code
* has one integral type of no specified size. Or rather, it has as many integral types: 1-byte int,
* 2-byte int, 3-byte int, and so on. We thus describe p-code operands as having a type
* {@link JitTypeBehavior behavior}: <em>integral</em> or <em>floating-point</em>. Note there are
* two ancillary behaviors <em>any</em> and <em>copy</em> to describe the operands of truly typeless
* operators, like {@link JitCopyOp}.
*
* <h3>Size</h3>
* <p>
* When paired with a varnode's size, we have enough information to start mapping p-code types to
* JVM types. For float types, p-code only supports specific sizes defined by IEEE 754: 2-byte
* half-precision, 4-byte single-precision, 8-byte double-precision, 10-byte extended-precision,
* 16-byte quadruple-precision, and 32-byte octuple-precision. Some p-code types map precisely to
* JVM counterparts: The 4- and 8-byte integer types map precisely to the JVM's {@code int} and
* {@code long} types. Similarly, the 4- and 8-byte float types map precisely to {@code float} and
* {@code double}. <b>TODO</b>: The JIT translator does not currently support integral types greater
* than 8 bytes (64 bits) in size nor floating-point types other than 4 and 8 bytes (single and
* double precision) in size.
*
* <h3>Signedness</h3>
* <p>
* All floating-point types are signed, whether in p-code or in the JVM, so there's little to
* consider in terms of mapping. Some p-code operators have signed operands, some have unsigned
* operands, and others have no signedness at all. In contrast, no JVM bytecodes are strictly
* unsigned. They are either signed or have no signedness. It was a choice of the Java language
* designers that all variables would be signed, and this is consequence of that choice. In time,
* "unsigned" operations were introduced in the form of static methods, e.g.,
* {@link Integer#compareUnsigned(int, int)} and {@link Long#divideUnsigned(long, long)}. Note that
* at the bit level, unsigned multiplication is the same as signed, and so no "unsigned multiply"
* method was provided. This actually aligns well with p-code in that, for this aspect of
* signedness, the variables are all the same. Instead the operations apply the type interpretation.
* Thus, we need not consider signedness when allocating JVM locals.
*
* <h2>Conversions and Casts</h2>
* <p>
* Conversions between JVM primitive types must be explicit in the emitted bytecode, even if the
* intent is just to re-cast the bits. This is not the case for p-code. Conversions in p-code need
* only be explicit when they mutate the actual bits. Consider the following p-code:
*
* <pre>
* $U00:4 = FLOAT_ADD r0, r1
* r2 = INT_2COMP $U00:4
* </pre>
*
* <p>
* The native translation to bytecode:
*
* <pre>
* FLOAD 1 # r0
* FLOAD 2 # r1
* FADD
* FSTORE 3 # $U00:4
* LDC 0
* ILOAD 3 # $U00:4
* ISUB
* ISTORE 4 # r2
* </pre>
*
* <p>
* Will cause an error when loading the class. This is because the local variable 3 must be one of
* {@code int} or {@code float}, and the bytecode must declare which, so either the {@code FSTORE 3}
* or the {@code ILOAD 3} will fail the JVM's type checker. To resolve this, we could assign the
* type {@code float} to local variable 3, and change the erroneous {@code ILOAD 3} to:
*
* <pre>
* FLOAD 3
* INVOKESTATIC {@link Float#floatToRawIntBits(float)}
* </pre>
*
* <p>
* At this point, the bit-vector contents of {@code $U00:4} are on the stack, but for all the JVM
* cares, they are now an {@code int}. We must assigned a JVM type to each local we allocate and
* place bitwise type casts wherever the generated bytecodes would cause type disagreement. We would
* like to assign JVM types in a way that reduces the number of {@code INVOKESTATIC} bytecodes
* emitted. One could argue that we should instead seek to reduce the number of {@code INVOKESTATIC}
* bytecodes actually executed, but I pray the JVM's JIT compiler can recognize calls to
* {@link Float#floatToRawIntBits(float)} and similar and emit no native code for them, i.e., they
* ought to have zero run-time cost.
*
* <p>
* Size conversions cause a similar need for explicit conversions, for two reasons: 1) Any
* conversion between JVM {@code int} and {@code long} still requires specific bytecodes. Neither
* platform supports implicit conversion between {@code float} and {@code double}. 2) We allocate
* the smaller JVM integral type to accommodate each p-code integral type, so we must apply masks in
* some cases to assure values to do not exceed their p-code varnode size. Luckily, p-code also
* requires explicit conversions between sizes, e.g., using {@link PcodeOp#INT_ZEXT zext}. However,
* we often have to perform temporary conversions in order to meet the type/size requirements of JVM
* bytecodes.
*
* <p>
* Consider {@code r2 = INT_MULT r0, r1} where the registers are all 5 bytes. Thus, the registers
* are allocated as JVM locals of type {@code long}. We load {@code r0} and {@code r1} onto the
* stack, and then we emit an {@link Opcodes#LMUL}. Technically, the result is another JVM
* {@code long}, which maps to an 8-byte p-code integer. Thus, we must apply a mask to "convert" the
* result to a 5-byte p-code integer before storing the result in {@code r2}'s JVM local.
*
* <h2>Type Assignment</h2>
* <p>
* Given that only behavior and size require any explicit conversions, we omit signedness from the
* formal definition of p-code {@link JitType type}. It is just the behavior applied to a size,
* e.g., {@link IntJitType#I3 int3}.
*
* <p>
* We use a fairly straightforward voting algorithm that examines how each variable definition is
* used. The type of an operand is trivially determined by examining the behavior of each operand,
* as specified by the p-code opcode; and the size of the input varnode, specified by the specific
* p-code op instance. For example, the p-code op {@code $U00:4 = FLOAT_ADD r0, r1} has an output
* operand of {@link FloatJitType#F4 float4}. Thus, it casts a vote that {@code $U00:4} should be
* that type. However, the subsequent op {@code r2 = INT_2COMP $U00} casts a vote for
* {@link IntJitType#I4 int4}. We prefer an {@code int} when tied, so we assign {@code $U00:4} the
* type {@code int4}.
*
* <p>
* This become complicated in the face of typeless ops, namely {@link JitCopyOp copy} and
* {@link JitPhiOp phi}. Again, we'd like to reduce the number of casts we have to emit in the
* bytecode. Consider the op {@code r1 = COPY r0}. This should emit a load followed immediately by a
* store, but The JVM will require both the source and destination locals to have the same type.
* Otherwise, a cast is necessary. The votes regarding {@code r0} will thus need to incorporate the
* votes regarding {@code r1} and vice versa.
*
* <p>
* Our algorithm is a straightforward queued traversal of the use-def graph until convergence.
* First, we initialize a queue with all values (variables and constants) in the graph and
* initialize all type assignments to {@link JitTypeBehavior#ANY any}. We then process each value in
* the queue until it is empty. A value receives votes from its uses as required by each operand.
* {@link JitTypeBehavior#INTEGER integer} and {@link JitTypeBehavior float} behaviors count as 1
* vote for that behavior. The {@link JitTypeBehavior#ANY any} behavior contributes 0 votes. If the
* behavior is {@link JitTypeBehavior#COPY copy}, then we know the use is either a {@link JitCopyOp
* copy} or {@link JitPhiOp phi} op, so we fetch its output value. The op casts its vote for the
* tentative type of that output value. Similar is done for the value's defining op, if applicable.
* If it's a copy or phi, we start a sub contest where each input/option casts a vote for its
* tentative type. The defining op's vote is cast according to the winner of the sub contest. Ties
* favor {@link JitTypeBehavior#INTEGER integer}. The final winner is computed and the tentative
* type assignment is updated. If there are no votes, the tentative assignment is
* {@link JitTypeBehavior#ANY}.
*
* <p>
* When an update changes the tentative type assignment of a value, then all its neighbors are added
* back to the queue. Neighbors are those values connected to this one via a copy or phi. When the
* queue is empty, the tentative type assignments are made final. Any assignment that remains
* {@link JitTypeBehavior#ANY any} is treated as if {@link JitTypeBehavior#INTEGER int}.
* <b>TODO</b>: Prove that this algorithm always terminates.
*
* @implNote We do all the bookkeeping in terms of {@link JitTypeBehavior} and wait to resolve the
* actual type until the final assignment.
*/
public class JitTypeModel {
/**
* A contest to determine a type assignment
*
* @param counts the initial count for each candidate (should just be empty)
*/
protected record Contest(Map<JitTypeBehavior, Integer> counts) {
/**
* Start a new contest
*/
public Contest() {
this(new HashMap<>());
}
/**
* Cast a vote for the given candidate
*
* @param candidate the candidate type
* @param c the number of votes cast
*/
private void vote(JitTypeBehavior candidate, int c) {
if (candidate == JitTypeBehavior.ANY || candidate == JitTypeBehavior.COPY) {
return;
}
counts.compute(candidate, (k, v) -> v == null ? c : v + c);
}
/**
* Cast a vote for the given candidate
*
* @param candidate the candidate type
*/
public void vote(JitTypeBehavior candidate) {
vote(candidate, 1);
}
/**
* Compare the votes between two candidates, and select the winner
*
* <p>
* The {@link #winner()} method seeks the "max" candidate, so the vote counts are compared
* in the usual fashion. We need to invert the comparison of the types, though.
* {@link JitTypeBehavior#INTEGER} has a lower ordinal than {@link JitTypeBehavior#FLOAT},
* but we want to ensure int is preferred, so we reverse that comparison.
*
* @param ent1 the first candidate-vote entry
* @param ent2 the second candidate-vote entry
* @return -1 if the <em>second</em> wins, 1 if the <em>first</em> wins. 0 should never
* result, unless we're comparing a candidate with itself.
*/
public static int compareCandidateEntries(Entry<JitTypeBehavior, Integer> ent1,
Entry<JitTypeBehavior, Integer> ent2) {
int c;
c = Integer.compare(ent1.getValue(), ent2.getValue());
if (c != 0) {
return c;
}
c = JitTypeBehavior.compare(ent1.getKey(), ent2.getKey());
if (c != 0) {
return -c; // INT is preferred to FLOAT
}
return 0;
}
/**
* Compute the winner of the contest
*
* @return the winner, or {@link JitTypeBehavior#ANY} if there are no entries
*/
public JitTypeBehavior winner() {
return counts.entrySet()
.stream()
.max(Contest::compareCandidateEntries)
.map(Entry::getKey)
.orElse(JitTypeBehavior.ANY);
}
}
private final JitDataFlowModel dfm;
private final SequencedSet<JitVal> queue = new LinkedHashSet<>();
private final Map<JitVal, JitTypeBehavior> assignments = new HashMap<>();
/**
* Construct the type model
*
* @param dfm the data flow model whose use-def graph to process
*/
public JitTypeModel(JitDataFlowModel dfm) {
this.dfm = dfm;
analyze();
}
/**
* Compute the new tentative assignment for the given value
*
* <p>
* As discussed in the "voting" section of {@link JitTypeModel}, this tallies up the votes among
* the values's uses and defining op then selects the winner.
*
* @param v the value
* @return the new assignment
*/
protected JitTypeBehavior computeNewAssignment(JitVal v) {
Contest contest = new Contest();
// Downstream votes
for (ValUse use : v.uses()) {
JitTypeBehavior type = use.type();
if (type == JitTypeBehavior.COPY && use.op() instanceof JitDefOp def) {
JitVal downstream = def.out();
type = assignments.get(downstream);
}
contest.vote(type);
}
// Upstream votes
if (v instanceof JitOutVar out) {
JitTypeBehavior defType = JitTypeBehavior.ANY;
JitDefOp def = out.definition();
defType = def.type();
if (defType == JitTypeBehavior.COPY) {
Contest subContest = new Contest();
for (JitVal upstream : def.inputs()) {
subContest.vote(assignments.get(upstream));
}
defType = subContest.winner();
}
contest.vote(defType);
}
return contest.winner();
}
/**
* Re-add the given value's neighbors to the processing queue.
*
* <p>
* Neighbors are any values connected to the given one via {@link JitCopyOp} or {@link JitPhiOp}
* &mdash; or any op with an operand requiring {@link JitTypeBehavior#COPY} if additional ones
* should appear in the future. This is necessary because those ops may change their vote now
* that this value's tentative type has changed. Note if the value is already in the queue, it
* need not be added again. Thus, the queue is a {@link SequencedSet}.
*
* @param v the value whose neighbors to re-process
*/
protected void queueNeighbors(JitVal v) {
for (ValUse use : v.uses()) {
JitTypeBehavior type = use.type();
if (type == JitTypeBehavior.COPY && use.op() instanceof JitDefOp def) {
queue.add(def.out());
}
}
if (v instanceof JitOutVar out) {
JitDefOp def = out.definition();
if (def.type() == JitTypeBehavior.COPY) {
queue.addAll(def.inputs());
}
}
}
/**
* Perform the analysis
*
* <p>
* This queues every value up to be processed at least once and then runs the algorithm to
* termination. Each value in the queue is removed and a voting contest run to update its type
* assignment. If the new assignment differs from its old assignment, its neighbors (if any) are
* re-added to the queue.
*/
protected void analyze() {
Set<JitVal> vals = dfm.allValues();
queue.addAll(vals);
for (JitVal v : vals) {
assignments.put(v, JitTypeBehavior.ANY);
}
while (!queue.isEmpty()) {
JitVal v = queue.removeFirst();
JitTypeBehavior type = computeNewAssignment(v);
JitTypeBehavior old = assignments.put(v, type);
if (old != type) {
queueNeighbors(v);
}
}
}
/**
* Get the final type assignment for the given value
*
* @param v the value
* @return the value's assigned type
*/
public JitType typeOf(JitVal v) {
return assignments.get(v).type(v.size());
}
}

View file

@ -0,0 +1,531 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.analysis;
import java.util.*;
import java.util.Map.Entry;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.JitCompiler;
import ghidra.pcode.emu.jit.JitCompiler.Diag;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockFlow;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Register;
import ghidra.program.model.pcode.Varnode;
import ghidra.util.MathUtilities;
/**
* The variable scope analysis of JIT-accelerated emulation.
*
* <p>
* This implements the Variable Scope Analysis phase of the {@link JitCompiler}. The result provides
* the set of in-scope (alive) varnodes for each basic block. The design of this analysis, and the
* shortcuts we take, are informed by the design of downstream phases. In particular, we do not
* intend to allocate each SSA variable. There are often many, many such variables, and attempting
* to allocate them to as few target resources, e.g., JVM locals, as possible is <em>probably</em> a
* complicated and expensive algorithm. I don't think we'd gain much from it either. Instead, we'll
* just allocate by varnode. To do that, though, we still have to consider that some varnodes
* overlap and otherwise alias others. If we are able to handle all that aliasing in place, then we
* need not generate code for the synthetic ops. One might ask, well then why do any of the Data
* Flow Analysis in the first place? 1) We still need data flow to inform the selection of JVM local
* types. We have not measured the run-time cost of the bitwise casts, but we do know the bytecode
* for each cast occupies space, counted against the 65,535-byte max. 2) We also need data flow to
* inform operation elimination, which removes many wasted flag computations.
*
* <p>
* To handle the aliasing, we coalesce overlapping varnodes. For example, {@code EAX} will get
* coalesced with {@code RAX}, but {@code BH} <em>will not</em> get coalesced with {@code BL},
* assuming no other part of {@code RBX} is accessed. The {@link JitDataFlowModel} records all
* varnodes accessed in the course of its intra-block analysis. Only those actually accessed are
* considered. We then compute scope in terms of these coalesced varnodes. For example, if both
* {@code RAX} and {@code EAX} are used by a passage, then an access of {@code EAX} causes
* {@code RAX} to remain in scope.
*
* <p>
* The decision to compute scope on a block-by-block basis instead of op-by-op is for simplicity. We
* intend to birth and retire variables along block transitions by considering what variables are
* coming into or leaving scope on the flow edge. <em>Birthing</em> is just reading a variable's
* value from the run-time {@link JitBytesPcodeExecutorState state} into its allocated JVM local.
* Conversely, <em>retiring</em> is writing the value back out to the state. There's little to be
* gained by retiring a variable midway through a block as opposed to the end of the block. Perhaps
* if one giant block handles a series of variables in sequence, we could have used a single JVM
* local to allocate each, but we're already committed to allocating a JVM local per (coalesced)
* varnode. So, while that may ensure only one variable is alive at a time, the number of JVM locals
* required remains the same. Furthermore, the amount of bytecode emitted remains the same, but at
* different locations in the block. The case where this might be worth considering is a userop
* invocation, because all live variables must be forcefully retired.
*
* <p>
* We then consider what common cases we want to ensure are optimized, when we're limited to a
* block-by-block analysis. One that comes to mind is a function with an early bail. Consider the
* following C source:
*
* <pre>
* int func(my_struct* ptr) {
* if (ptr == NULL) {
* return ERR;
* }
* // Do some serious work
* return ptr->v;
* }
* </pre>
*
* <p>
* Often, the C compiler will group all the returns into one final basic block, so we might get the
* following p-code:
*
* <pre>
* 1 RSP = INT_SUB RSP, 0x20:8
* 2 $U00:1 = INT_EQUAL RDI, 0:8 # RDI is ptr
* 3 CBRANCH &lt;err&gt;, $U0:1
*
* 4 # Do some serious work
* 5 $U10:8 = INT_ADD RDI, 0xc:8 # Offset to field v
* 6 EAX = LOAD [ram] $U10:8
* 7 BRANCH &lt;exit&gt;
* &lt;err&gt;
* 8 EAX = COPY 0xffffffff:4
* &lt;exit&gt;
* 9 RSP = INT_ADD RSP, 0x20:8
* 10 RIP = LOAD [ram] RSP
* 11 RSP = INT_ADD RSP, 8:8
* 12 RETURN RIP
* </pre>
*
* <p>
* Note that I've elided the actual x86 machine code and all of the noise generated by C compilation
* and p-code lifting, and I've presumed the decoded passage contains exactly the example function.
* The result is your typical if-else diamond. We'll place the error case on the left:
*
* <pre>
* +---------+
* | 1--3 |
* | CBRANCH |
* +-T-----F-+
* / \
* / \
* +--------+ +--------+
* | 8 | | 4--7 |
* | (fall) | | BRANCH |
* +--------+ +--------+
* \ /
* \ /
* +---------+
* | 9--12 |
* | RETURN |
* +---------+
* </pre>
*
* <p>
* Suppose the "serious work" on line 4 accesses several varnodes: RBX, RCX, RDX, and RSI. If
* execution follows the error path, we'd rather not birth any of those variables. Thus, we might
* like the result of the scope analysis to be:
*
* <p>
* <table border="1">
* <tr>
* <th>Block</th>
* <th>Live Vars</th>
* </tr>
* <tr>
* <td>1&ndash;3</td>
* <td>RDI, RSP, $U00:1</td>
* </tr>
* <tr>
* <td>4&ndash;7</td>
* <td>EAX, RBX, RCX, RDI, RDX, RSI, RSP, $U10:8</td>
* </tr>
* <tr>
* <td>8</td>
* <td>EAX, RSP</td>
* </tr>
* <tr>
* <td>9&ndash;12</td>
* <td>RIP, RSP</td>
* </tr>
* </table>
*
* <p>
* This can be achieved rather simply: Define two sets for each block, the upward view and the
* downward view. The first corresponds to all varnodes that could be accessed before entering this
* block or while in it. The second corresponds to all varnodes that could be access while in this
* block or after leaving it. The upward view is computed by initializing each set to the varnodes
* accessed by its block. Then we "push" each set upward by adding its elements into the set for
* each block with flows into this one, until the sets converge. The downward sets are similarly
* computed, independently of the upward sets. The result is the intersection of these sets, per
* block. The algorithm is somewhat intuitive in that we accrue live variables as we move toward the
* "body" of the control flow graph, and they begin to drop off as we approach an exit. The accrual
* is captured by the downward set, and the drop off is captured by intersection with the upward
* set. This will also prevent retirement and rebirth of variables. Essentially, if we are between
* two accesses of a varnode, then that varnode is alive. Consider {@code RSP} from the example
* above. The algorithm considers it alive in blocks 4&ndash;7 and 8, despite the fact neither
* actually accesses it. Nevertheless, we'd rather generate one birth upon entering block 1&ndash;3,
* keep it alive in the body, and then generate one retirement upon leaving block 9&ndash;12.
*
* <p>
* One notable effect of this algorithm is that all blocks in a loop will have the same variables in
* scope.... I think this is okay. We'll birth the relevant variables upon entering the loop, keep
* them all alive during loop execution, and then retire them (unless they're accessed downstream)
* upon leaving.
*
* @implNote <b>TODO</b>: There's some nonsense to figure out with types. It would be nice if we
* could allow variables of different types to occupy the same location at different
* times. This can be the case, e.g., if a register is used as a temporary location for
* copying values around. If there are times when it's treated as an int and other times
* when it's treated as a float, we could avoid unnecessary Java type conversions.
* However, this would require us to track liveness with types, and at that granularity,
* it could get unwieldy. My inclination is to just consider location liveness and then
* have the allocator decide what type to assign the local variable for that location
* based on some voting system. This is not the best, because some access sites are
* executed more often than others, but it'll suffice.
*/
public class JitVarScopeModel {
/**
* Encapsulates set movement when computing the upward and downward views.
*/
enum Which {
/**
* Set movement for the upward view
*/
UP {
@Override
Collection<JitBlock> getFlows(ScopeInfo info) {
return info.block.flowsTo().values().stream().map(BlockFlow::from).toList();
}
@Override
Set<Varnode> getLive(ScopeInfo info) {
return info.liveUp;
}
@Override
Set<Varnode> getQueued(ScopeInfo info) {
return info.queuedUp;
}
},
/**
* Set movement for the downward view
*/
DOWN {
@Override
Collection<JitBlock> getFlows(ScopeInfo info) {
return info.block.flowsFrom().values().stream().map(BlockFlow::to).toList();
}
@Override
Set<Varnode> getLive(ScopeInfo info) {
return info.liveDn;
}
@Override
Set<Varnode> getQueued(ScopeInfo info) {
return info.queuedDn;
}
};
/**
* Get the flow toward which we will push the given block's set
*
* @param info the intermediate analytic result for the block whose set to push
* @return the blocks into which our set will be unioned
*/
abstract Collection<JitBlock> getFlows(ScopeInfo info);
/**
* Get the current set for the given block
*
* @param info the intermediate analytic result for the block whose set to get
* @return the set of live varnodes
*/
abstract Set<Varnode> getLive(ScopeInfo info);
/**
* Get the varnodes which are queued for addition into the given block's set
*
* @param info the intermediate analytic result for the given block
* @return the set of queued live varnodes
*/
abstract Set<Varnode> getQueued(ScopeInfo info);
}
/**
* Encapsulates the (intermediate) analytic result for each block
*/
private class ScopeInfo {
private final JitBlock block;
private final Set<Varnode> liveUp = new HashSet<>();
private final Set<Varnode> liveDn = new HashSet<>();
private final Set<Varnode> queuedUp = new HashSet<>();
private final Set<Varnode> queuedDn = new HashSet<>();
private final Set<Varnode> liveVars = new LinkedHashSet<>();
private final Set<Varnode> liveVarsImm = Collections.unmodifiableSet(liveVars);
/**
* Construct the result for the given block
*
* @param block the block
*/
public ScopeInfo(JitBlock block) {
this.block = block;
JitDataFlowBlockAnalyzer dfa = dfm.getAnalyzer(block);
for (Varnode vn : dfa.getVarnodesRead()) {
if (!vn.isAddress()) {
queuedUp.add(getCoalesced(vn));
queuedDn.add(getCoalesced(vn));
}
}
for (Varnode vn : dfa.getVarnodesWritten()) {
if (!vn.isAddress()) {
queuedUp.add(getCoalesced(vn));
queuedDn.add(getCoalesced(vn));
}
}
}
/**
* Push this block's queue for the given view
*
* <p>
* Any block whose set was affected by this push is added to the queue of blocks to be
* processed again.
*
* @param which which view (direction)
*/
private void push(Which which) {
Set<Varnode> queued = which.getQueued(this);
if (queued.isEmpty()) {
return;
}
for (JitBlock block : which.getFlows(this)) {
ScopeInfo that = infos.get(block);
Set<Varnode> toQueueThat = new HashSet<>(queued);
toQueueThat.removeAll(which.getLive(that));
if (which.getQueued(that).addAll(toQueueThat)) {
blockQueue.add(that);
}
}
which.getLive(this).addAll(queued);
queued.clear();
}
/**
* Finish the analytic computation for this block
*
* <p>
* If a block contains an access to a variable, that variable is alive in that block. If a
* block is between (in terms of possible control-flow paths) two others that access a
* variable, that variable is alive in the block.
*/
private void finish() {
List<Varnode> sortedLiveUp = new ArrayList<>(this.liveUp);
Collections.sort(sortedLiveUp, Comparator.comparing(Varnode::getAddress));
liveVars.addAll(sortedLiveUp);
liveVars.retainAll(liveDn);
}
}
private final JitControlFlowModel cfm;
private final JitDataFlowModel dfm;
private final NavigableMap<Address, Varnode> coalesced = new TreeMap<>();
private final Map<JitBlock, ScopeInfo> infos = new HashMap<>();
private final SequencedSet<ScopeInfo> blockQueue = new LinkedHashSet<>();
/**
* Construct the model
*
* @param cfm the control flow model
* @param dfm the data flow model
*/
public JitVarScopeModel(JitControlFlowModel cfm, JitDataFlowModel dfm) {
this.cfm = cfm;
this.dfm = dfm;
analyze();
}
/**
* Get the maximum address (inclusive) in the varnode
*
* @param varnode the node
* @return the max address
*/
static Address maxAddr(Varnode varnode) {
return varnode.getAddress().add(varnode.getSize() - 1);
}
/**
* Check for overlap when one varnode is known to be to the left of the other.
*
* @param left the left varnode (having lower address)
* @param right the right varnode (having higher address)
* @return true if they overlap (not counting abutting), false otherwise.
*/
static boolean overlapsLeft(Varnode left, Varnode right) {
// max is inclusive, so use >=, not just >
return maxAddr(left).compareTo(right.getAddress()) >= 0;
}
private void coalesceVarnode(Varnode varnode) {
Address min = varnode.getAddress();
Address max = maxAddr(varnode);
Entry<Address, Varnode> leftEntry = coalesced.floorEntry(min);
if (leftEntry != null && overlapsLeft(leftEntry.getValue(), varnode)) {
min = leftEntry.getKey();
}
Entry<Address, Varnode> rightEntry = coalesced.floorEntry(max);
if (rightEntry != null) {
max = MathUtilities.cmax(max, maxAddr(rightEntry.getValue()));
}
Varnode exists = leftEntry == null ? null : leftEntry.getValue();
Varnode existsRight = rightEntry == null ? null : rightEntry.getValue();
if (exists == existsRight && exists != null && exists.getAddress().equals(min) &&
maxAddr(exists).equals(max)) {
return; // no change
}
coalesced.subMap(min, true, maxAddr(varnode), true).clear();
coalesced.put(min, new Varnode(min, (int) max.subtract(min) + 1));
}
private void coalesceVarnodes() {
Set<Varnode> allVarnodes = new HashSet<>();
for (JitBlock block : cfm.getBlocks()) {
allVarnodes.addAll(dfm.getAnalyzer(block).getVarnodesRead());
allVarnodes.addAll(dfm.getAnalyzer(block).getVarnodesWritten());
}
for (Varnode varnode : allVarnodes) {
if (!varnode.isAddress()) {
coalesceVarnode(varnode);
}
}
}
/**
* Get the varnode into which the given varnode was coalesced
*
* <p>
* In many cases, the result is the same varnode.
*
* @param part the varnode
* @return the coalesced varnode
*/
public Varnode getCoalesced(Varnode part) {
if (part.isAddress()) {
return part;
}
Entry<Address, Varnode> floorEntry = coalesced.floorEntry(part.getAddress());
assert overlapsLeft(floorEntry.getValue(), part);
return floorEntry.getValue();
}
/**
* Perform a push for the given direction for the next block in the queue.
*
* <p>
* Any block whose varnode queue was affected is added back into the block queue.
*
* @param which which view is being computed (direction)
* @return true if there remains at least one block in the queue
*/
private boolean pushNext(Which which) {
if (blockQueue.isEmpty()) {
return false;
}
ScopeInfo info = blockQueue.removeFirst();
info.push(which);
return !blockQueue.isEmpty();
}
/**
* Perform the analysis.
*
* <p>
* This starts with the upward set, which is computed by pushing queued block's varnodes upward
* until the queue is empty. All blockes are queued initially. When a block's set is affected,
* it's re-added to the queue, so we know we've converged when the queue is empty. The downward
* set is then computed in the same fashion.
*/
private void analyze() {
coalesceVarnodes();
for (JitBlock block : cfm.getBlocks()) {
ScopeInfo info = new ScopeInfo(block);
infos.put(block, info);
blockQueue.add(info);
}
while (pushNext(Which.UP)) {
}
blockQueue.addAll(infos.values());
while (pushNext(Which.DOWN)) {
}
for (ScopeInfo info : infos.values()) {
info.finish();
}
}
/**
* Get the collection of all coalesced varnodes
*
* @return the varnodes
*/
public Iterable<Varnode> coalescedVarnodes() {
return coalesced.values();
}
/**
* Get the set of live varnodes for the given block
*
* @param block the block
* @return the live varnodes
*/
public Set<Varnode> getLiveVars(JitBlock block) {
return infos.get(block).liveVarsImm;
}
/**
* For diagnostics: Dump the analysis result to stderr
*
* @see Diag#PRINT_VSM
*/
public void dumpResult() {
System.err.println("STAGE: VarLiveness");
for (JitBlock block : cfm.getBlocks()) {
System.err.println(" Block: " + block);
Set<String> liveNames = new TreeSet<>();
for (Varnode vn : infos.get(block).liveVarsImm) {
Register register = block.getLanguage().getRegister(vn.getAddress(), vn.getSize());
if (register != null) {
liveNames.add(register.getName());
}
else if (vn.isUnique()) {
liveNames.add("$U%x:%d".formatted(vn.getOffset(), vn.getSize()));
}
else {
liveNames.add("%s:%x:4".formatted(vn.getAddress().getAddressSpace().getName(),
vn.getOffset(), vn.getSize()));
}
}
System.err.println(" Live: " + liveNames);
}
}
}

View file

@ -0,0 +1,32 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.util.List;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp;
/**
* A list of contiguous instructions connected by fall through, along with their emitted p-code ops
*
* @param start the address and contextreg value that seeded this stride
* @param instructions the instructions in the order decoded
* @param ops the ops in the order decoded and emitted
* @see JitPassageDecoder
*/
record DecodedStride(AddrCtx start, List<Instruction> instructions, List<PcodeOp> ops) {}

View file

@ -0,0 +1,563 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.math.BigInteger;
import java.util.*;
import ghidra.app.plugin.processors.sleigh.SleighParserContext;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.*;
import ghidra.pcode.emu.jit.analysis.JitDataFlowState;
import ghidra.pcode.emu.jit.op.JitNopOp;
import ghidra.pcode.exec.*;
import ghidra.program.disassemble.Disassembler;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.listing.ProgramContext;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.util.Msg;
/**
* The p-code interpreter used during passage decode
*
* <p>
* Aside from branches, this interpreter simply logs each op, so that they get collected into the
* greater stride and passage. It does "rewrite" the ops, so that we can easily recover the input
* context, especially when the op is emitted from a user inject. For branches, this interpreter
* creates the appropriate {@link Branch} records and notifies the passage decoder of new seeds.
*
* <p>
* This executor also implements the {@link DisassemblerContext} to track context changes, namely
* uses of {@code globalset}. This is kept in {@link #futCtx}. <b>TODO</b>: Should {@link #futCtx}
* be moved into the passage decoder to ensure it persists for more than a single instruction? I'm
* not sure whether or not that is already taken care of by the {@link Disassembler}.
*
* @implNote I had considered using a {@link JitDataFlowState} here, but that's Not a Good Idea,
* because a stride is not generally a <em>basic block</em>. A "stride" is just a
* contiguous run of instructions with fall-through. If there is a jump into the middle of
* it, any value analysis (e.g., constant folding) would be meaningless. Were we to put
* this in there, the temptation may be to have userop libraries attempt constant
* resolution, esp., for syscall numbers. While that may work, if only because syscall
* numbers are conventionally set in the same basic block as the invocation, there's no
* guarantee that's the case. And there may be other use cases where this is totally
* wrong. Instead, we should use as barren an executor here as possible. We do incorporate
* injects here, because they may affect control flow, which the decoder must consider.
*
* @implNote <b>WARNING</b>: This executor has no {@link PcodeExecutorState state} object. Care must
* be taken to ensure we override any method that assumes we have one, and that we don't
* invoke any method from the superclass that assumes we have one.
*
*/
class DecoderExecutor extends PcodeExecutor<Object>
implements DisassemblerContextAdapter {
private final DecoderForOneStride stride;
final AddrCtx at;
private PseudoInstruction instruction;
private NopPcodeOp termNop;
private RegisterValue flow;
private final Map<Address, RegisterValue> futCtx = new HashMap<>();
final List<PcodeOp> opsForThisStep = new ArrayList<>();
private final List<Branch> branchesForThisStep = new ArrayList<>();
private final Map<PcodeOp, DecodedPcodeOp> rewrites = new HashMap<>();
/**
* Construct the interpreter
*
* @param stride the stride being decoded
* @param at the address and contextreg value of the instruction
* @param instruction the instruction, or {@code null}
*/
DecoderExecutor(DecoderForOneStride stride, AddrCtx at, PseudoInstruction instruction) {
super(stride.decoder.thread.getLanguage(), null, null, null);
this.stride = stride;
this.at = at;
setInstruction(instruction);
}
/**
* Construct the interpreter without an instruction
*
* <p>
* This initializes the interpreter without an instruction. The decoder must set the instruction
* via {@link #setInstruction(PseudoInstruction)} as soon as it becomes available, either 1)
* because the step resulted in a simple instruction, or 2) because a user inject caused the
* instruction to be decoded.
*
* @param stride the stride being decoded
* @param at the address and contextreg value of the instruction
*/
DecoderExecutor(DecoderForOneStride stride, AddrCtx at) {
this(stride, at, null);
}
/**
* Re-write the given op as a {@link DecodedPcodeOp} with the given address/contextreg value
*
* <p>
* If the given op is already a {@link DecodedPcodeOp}, i.e., a {@link DecodeErrorPcodeOp} or
* {@link NopPcodeOp}, just return the same op without re-writing.
*
* @param at the address and decode context
* @param op the original p-code op
* @return the equivalent op, re-written
*/
static DecodedPcodeOp rewriteOp(AddrCtx at, PcodeOp op) {
if (op instanceof DecodedPcodeOp dec) {
assert dec.getAt().equals(at);
return dec;
}
return new DecodedPcodeOp(at, op);
}
/**
* Re-write the given op
*
* <p>
* Because we create an interpreter for each instruction step, we already know the target
* address and decode context. We re-write the op to capture that target. If we've already
* re-written the op, return the existing one to ensure we retain identity in the re-written
* realm.
*
* @param op the op to re-write
* @return the equivalent op, re-written
*/
DecodedPcodeOp rewrite(PcodeOp op) {
return rewrites.computeIfAbsent(op, o -> rewriteOp(at, o));
}
/**
* Set the current instruction.
*
* <p>
* This also pre-computes the resulting "flow" context from the given instruction. That is, the
* input context for the next decode instruction, not accounting for {@code globalset}. It is
* computed by taking the given instruction's input context and resetting non-flowing bits to
* the language's defaults. When a branch is encountered or fall through is considered, we
* account for {@code globalset} and derive the target context for the target address.
*
* @param instruction the instruction
*/
void setInstruction(PseudoInstruction instruction) {
this.instruction = instruction;
if (at.rvCtx == null || instruction == null ||
instruction instanceof DecodeErrorInstruction) {
this.flow = at.rvCtx;
}
else {
Register contextreg = stride.decoder.contextreg;
ProgramContext defaultContext = stride.decoder.defaultContext;
this.flow = new RegisterValue(contextreg, BigInteger.ZERO)
.combineValues(defaultContext.getDefaultValue(contextreg, at.address))
.combineValues(defaultContext.getFlowValue(at.rvCtx));
processContextChanges();
}
}
/**
* Decode the instruction this executor is meant to interpret
*
* <p>
* This can be delayed if there is a user inject at the target address. In that case, this may
* be invoked by {@link DecoderUseropLibrary#emu_exec_decoded(PcodeExecutor)} or
* {@link DecoderUseropLibrary#emu_skip_decoded(PcodeExecutor)}.
*
* @return the decoded instruction, which may be a {@link DecodeErrorInstruction}
*/
PseudoInstruction decodeInstruction() {
PseudoInstruction instruction = stride.decoder.decodeInstruction(at.address, at.rvCtx);
setInstruction(instruction);
return instruction;
}
private void processContextChanges() {
try {
SleighParserContext parserCtx =
(SleighParserContext) instruction.getParserContext();
parserCtx.applyCommits(this);
}
catch (MemoryAccessException e) {
throw new AssertionError(e);
}
}
/**
* Interpret the given program with the passage decoder's userop library
*
* @param program the p-code to interpret
*/
public void execute(PcodeProgram program) {
execute(program, stride.passage.library());
}
/**
* {@inheritDoc}
*
* @implNote We check here if a "terminal nop" was necessary. Any jump to (should never be past)
* the end of the program will require one. Instead of trying to figure out what the
* op following this instruction is, so the jumps can target it, we add a special nop,
* and the jump is made to target it. Once we reach the end of the p-code program
* proper, we have to add that nop.
*/
@Override
public void finish(PcodeFrame frame, PcodeUseropLibrary<Object> library) {
super.finish(frame, library);
if (termNop != null) {
opsForThisStep.add(termNop);
}
}
/**
* {@inheritDoc}
*
* <p>
* We only really need to interpret branching ops here. We also interpret
* {@link PcodeOp#CALLOTHER callother}, in case wer're able to inline a p-code userop. Note that
* if we inline the userop, we still retain the {@code callother} op, because internal jumps may
* target it. It is easier to leave it in the books and {@link JitNopOp nop} it out later than
* to try to substitute the first inlined op. Worse, if the inlined userop emits no p-code,
* substitution would get especially difficult.
*
* <p>
* We also interpret {@link PcodeOp#UNIMPLEMENTED unimplemented}, because that will require us
* to create an {@link ErrBranch} record. All other ops must still be added to the decoded
* passage, but not (yet) interpreted.
*/
@Override
public void stepOp(PcodeOp op, PcodeFrame frame, PcodeUseropLibrary<Object> library) {
/**
* NOTE: Must log every op, including inlined CALLOTHER's, because an internal jump may
* refer to that CALLOTHER. It's easier, I think, to snuff the op later than it is to try to
* substitute the refs.
*/
op = rewrite(op);
switch (op.getOpcode()) {
case PcodeOp.BRANCH, //
PcodeOp.CBRANCH, //
PcodeOp.CALL, //
PcodeOp.BRANCHIND, //
PcodeOp.CALLIND, //
PcodeOp.RETURN, //
PcodeOp.CALLOTHER, //
PcodeOp.UNIMPLEMENTED -> {
opsForThisStep.add(op);
super.stepOp(op, frame, library);
}
default -> {
opsForThisStep.add(op);
}
}
}
/**
* {@inheritDoc}
*
* <p>
* We interpret this the same as an unconditional branch, because at this point, we need only
* collect branch targets to seed additional strides.
*/
@Override
public void executeConditionalBranch(PcodeOp op, PcodeFrame frame) {
doExecuteBranch(op, frame);
}
/**
* {@inheritDoc}
*
* <p>
* We override this to prevent an attempt to write PC to the {@link #getState() state}, which is
* {@code null}.
*/
@Override
protected void branchToOffset(PcodeOp op, long offset, PcodeFrame frame) {
}
@Override
protected void branchToOffset(PcodeOp op, Object offset, PcodeFrame frame) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* <p>
* This creates an {@link ExtBranch} record and collects it for this instruction step. The
* record will first be used to check for fall through. Then, the passage decoder is notified,
* which either adds it to the seed queue or converts it to an {@link IntBranch} record.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void branchToAddress(PcodeOp op, Address target) {
branchesForThisStep.add(new ExtBranch(op, takeTargetContext(target)));
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link IntBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void branchInternal(PcodeOp op, PcodeFrame frame, int relative) {
int tgtSeq = op.getSeqnum().getTime() + relative;
if (tgtSeq == frame.getCode().size()) {
if (termNop == null) {
termNop = new NopPcodeOp(at, tgtSeq);
}
branchesForThisStep.add(new IntBranch(op, termNop, false));
}
else {
PcodeOp to = frame.getCode().get(op.getSeqnum().getTime() + relative);
branchesForThisStep.add(new IntBranch(op, rewrite(to), false));
}
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link IndBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void doExecuteIndirectBranch(PcodeOp op, PcodeFrame frame) {
branchesForThisStep.add(new IndBranch(op, flow));
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link ErrBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis. In most (all?) cases, this
* is the only op emitted by the instruction (decode error, unimplemented instruction), and so
* there is certainly no fall through.
*
* @see #checkFallthroughAndAccumulate(PcodeProgram)
*/
@Override
protected void badOp(PcodeOp op) {
String message;
if (instruction instanceof DecodeErrorInstruction err) {
message = err.getMessage();
}
else {
message =
"Encountered an unimplemented instruction at " + at + " (" + instruction + ")";
}
branchesForThisStep.add(new ErrBranch(op, message));
}
/**
* {@inheritDoc}
*
* <p>
* This create an {@link ErrBranch} record and collects it for this instruction step. The record
* will first be used to check for fall through. Then, the passage decoder is notified, which
* collects the records to later passage-wide control flow analysis. In contrast to
* {@link #badOp(PcodeOp)}, an instruction that calls a missing userop may still have fall
* through.
*/
@Override
protected void onMissingUseropDef(PcodeOp op, PcodeFrame frame, String opName,
PcodeUseropLibrary<Object> library) {
branchesForThisStep.add(
new ErrBranch(op, "Sleigh userop '%s' is not in the library".formatted(opName)));
}
@Override
public void setFutureRegisterValue(Address address, RegisterValue value) {
if (!value.getRegister().isProcessorContext()) {
return;
}
futCtx.compute(address, (a, v) -> v == null ? value : v.combineValues(value));
}
/**
* Derive the contextreg value at the given target address (branch or fall through).
*
* <p>
* An instruction's constructors may use {@code globalset} to place context changes at specific
* addresses. Those changes are collected by
* {@link #setFutureRegisterValue(Address, RegisterValue)} through some chain of method
* invocations started by {@link #setInstruction(PseudoInstruction)}. When the interpreter
* encounters a branch op, that op will specify the target address. We must also derive the
* context for that branch. This is the pre-computed "flow" context, but now accounting for
* {@code globalset} at the target address.
*
* @param target the target address
* @return the target address and contextreg value
*/
public AddrCtx takeTargetContext(Address target) {
if (!futCtx.containsKey(target)) {
return new AddrCtx(flow, target);
}
/** Do not remove, in case there are multiple branches to the same target address */
return new AddrCtx(flow.combineValues(futCtx.get(target)), target);
}
/**
* After p-code interpretation, check if the instruction has fall through, notify the stride
* decoder of the instruction's ops, and notify the passage of the instruction's branches.
*
* <p>
* To determine whether there's fall through, this performs a miniature control flow analysis on
* just this step's p-code ops. This is required because a user inject can be very complex, and
* need not obey all of the usual control flow checks imposed by the Sleigh semantic compiler.
* In particular {@link Instruction#hasFallthrough()} is not sufficient, for at least two
* reasons: 1) The aforementioned user inject possibilities, 2) We do not consider a
* {@link PcodeOp#CALL call} or {@link PcodeOp#CALLIND callind} as having fall through.
*
* <p>
* To use control flow analysis as a means of checking for fall through, we append a special
* "probe" {@link ExitPcodeOp} along with an {@link ExtBranch} record to {@link AddrCtx#NOWHERE
* nowhere}. The probe thus serves the secondary purpose of preventing any complaints from the
* analyzer about unterminated control flow. We then perform the analysis, borrowing
* {@link BlockSplitter} from {@link JitControlFlowModel}. In practice, this seems fast enough.
* Because the splitter keeps the blocks in the original order, the first op will certainly be
* in the first block, and the probe op will certainly be in the last block. We perform a simple
* reachability test between the two. The step has fall through if and only if a path is found.
*
* @param from the instruction's or inject's p-code
* @return true if the step falls through.
*/
public boolean checkFallthroughAndAccumulate(PcodeProgram from) {
if (instruction instanceof DecodeErrorInstruction) {
stride.opsForStride.addAll(opsForThisStep);
for (Branch branch : branchesForThisStep) {
switch (branch) {
case ErrBranch eb -> stride.passage.otherBranches.put(eb.from(), eb);
default -> throw new AssertionError();
}
}
return false;
}
if (opsForThisStep.isEmpty()) {
return true;
}
ExitPcodeOp probeOp = new ExitPcodeOp(AddrCtx.NOWHERE);
opsForThisStep.add(probeOp);
ExtBranch probeBranch = new ExtBranch(probeOp, AddrCtx.NOWHERE);
branchesForThisStep.add(probeBranch);
PcodeProgram program = new PcodeProgram(from, opsForThisStep);
BlockSplitter splitter = new BlockSplitter(program);
splitter.addBranches(branchesForThisStep);
SequencedMap<PcodeOp, JitBlock> blocks = splitter.splitBlocks();
JitBlock entry = blocks.firstEntry().getValue();
JitBlock exit = blocks.lastEntry().getValue();
Set<JitBlock> reachable = new HashSet<>();
collectReachable(reachable, entry);
for (JitBlock block : blocks.values()) {
for (PcodeOp op : block.getCode()) {
if (op != probeOp) {
stride.opsForStride.add(op);
}
}
for (IntBranch branch : block.branchesFrom()) {
if (!branch.isFall()) {
stride.passage.internalBranches.put(branch.from(), branch);
}
}
for (Branch branch : block.branchesOut()) {
if (branch != probeBranch) {
switch (branch) {
case ExtBranch eb -> stride.passage.flowTo(eb);
default -> stride.passage.otherBranches.put(branch.from(), branch);
}
}
}
}
return reachable.contains(exit);
}
/**
* The reachability test mentioned in {@link #checkFallthroughAndAccumulate(PcodeProgram)}
*
* <p>
* Collects the set of blocks reachable from {@code cur} into the given mutable set.
*
* @param into a mutable set for collecting reachable blocks
* @param cur the source block, or an intermediate during recursion
*/
private void collectReachable(Set<JitBlock> into, JitBlock cur) {
if (!into.add(cur)) {
return;
}
for (BlockFlow flow : cur.flowsFrom().values()) {
collectReachable(into, flow.to());
}
}
/**
* Compute the fall-through address
*
* <p>
* This computes the "next" address whether or not the instruction actually has fall through.
* The caller should check for fall through first.
*
* @return the next address
* @implNote If no instruction was actually decoded during this step, and the decoder is asking
* about fall through, then the user very likely made an error in specifying an
* inject's control flow, in which case the counter will not advance. To get this same
* effect, we just return the current address. The decoder and/or translator ought to
* recognize this and ensure the resulting infinite loop can be interrupted.
* @see PcodeMachine#inject(Address, String)
*/
Address getAdvancedAddress() {
if (instruction != null) {
return instruction.getMaxAddress().next();
}
Msg.warn(this, "An inject may have forgotten control flow.");
return at.address;
}
/**
* Notify the stride of an instruction
*
* <p>
* For addresses without injects, every decoded instruction ought to be included in the stride.
* For an address with an inject, a decoded instruction should only be included if it is
* actually interpreted, i.e., its ops are included.
*
* @param instruction the decoded instruction
*/
void addInstruction(PseudoInstruction instruction) {
stride.instructions.add(instruction);
}
}

View file

@ -0,0 +1,169 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.util.*;
import java.util.Map.Entry;
import org.apache.commons.collections4.MapUtils;
import ghidra.pcode.emu.jit.JitConfiguration;
import ghidra.pcode.emu.jit.JitPassage;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.exec.PcodeUseropLibrary;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp;
/**
* The decoder for a single passage
*
* <p>
* This is a sort of "mutable" passage or passage "builder" that is used while the passage is being
* decoded. Once complete, this provides an immutable (or at least it's supposed to be) decoded
* {@link Passage}.
*/
class DecoderForOnePassage {
private final JitPassageDecoder decoder;
private final AddrCtx seed;
private final int maxOps;
private final int maxInstrs;
private final int maxStrides;
final Map<PcodeOp, IntBranch> internalBranches = new HashMap<>();
final SequencedMap<PcodeOp, ExtBranch> externalBranches = new LinkedHashMap<>();
final Map<PcodeOp, Branch> otherBranches = new HashMap<>();
final Map<AddrCtx, PcodeOp> firstOps = new HashMap<>();
final List<DecodedStride> strides = new ArrayList<>();
private int opCount = 0;
private int instructionCount = 0;
/**
* Construct the decoder
*
* @param decoder the thread's passage decoder
* @param seed the seed for this passage
* @param maxOps the maximum-ish number of p-code ops to emit
*/
DecoderForOnePassage(JitPassageDecoder decoder, AddrCtx seed, int maxOps) {
this.decoder = decoder;
this.seed = seed;
this.maxOps = maxOps;
JitConfiguration config = decoder.thread.getMachine().getConfiguration();
this.maxInstrs = config.maxPassageInstructions();
this.maxStrides = config.maxPassageStrides();
EntryPcodeOp entryOp = new EntryPcodeOp(seed);
externalBranches.put(entryOp, new ExtBranch(entryOp, seed));
}
/**
* Implements the actual decode loop
*/
void decodePassage() {
while (opCount < maxOps && instructionCount < maxInstrs &&
strides.size() < maxStrides) {
Entry<PcodeOp, ExtBranch> nextEnt = externalBranches.pollFirstEntry();
if (nextEnt == null) {
break;
}
ExtBranch next = nextEnt.getValue();
AddrCtx start = next.to();
if (decoder.thread.hasEntry(start)) {
otherBranches.put(next.from(), next);
}
else {
decodeStride(start);
PcodeOp to = Objects.requireNonNull(firstOps.get(start));
internalBranches.put(next.from(), new IntBranch(next.from(), to, false));
}
}
}
/**
* Record that a direct branch was encountered.
*
* <p>
* If we've already decoded the target, we create an {@link IntBranch} record, and we're done.
* Otherwise, we queue up an {@link ExtBranch} record. If multiple direct branches target the
* same address, we still create separate entries. First, we note their {@link Branch#from()
* from} fields will be different. Also, we ensure once we've terminated (probably because of a
* quota), we must examine records still in the queue, but whose targets may have since been
* decoded, and convert them to {@link IntBranch} records.
*
* @param from the op representing or causing the control flow
* @param to the target of the branch
*/
void flowTo(ExtBranch eb) {
if (firstOps.containsKey(eb.to())) {
IntBranch ib = new IntBranch(eb.from(), firstOps.get(eb.to()), false);
internalBranches.put(ib.from(), ib);
return;
}
externalBranches.put(eb.from(), eb);
}
/**
* Decode a stride starting at the given address.
*
* @param start the starting address and context
*/
private void decodeStride(AddrCtx start) {
DecodedStride stride = new DecoderForOneStride(decoder, this, start).decode();
opCount += stride.ops().size();
instructionCount += stride.instructions().size();
strides.add(stride);
}
/**
* Sort out the result and create the decoded passage
*
* <p>
* The strides are sorted by their seeds (contextreg value then address), and their code
* concatenated together. The various types of branches are also all combined. (They can still
* be distinguished by type.) {@link ExtBranch} records are converted to {@link IntBranch}
* records where possible.
*
* @return the passage
*/
JitPassage finish() {
strides.sort(Comparator.comparing(DecodedStride::start));
List<PcodeOp> code = strides.stream().flatMap(b -> b.ops().stream()).toList();
List<Instruction> instructions =
strides.stream().flatMap(b -> b.instructions().stream()).toList();
Map<PcodeOp, Branch> branches = otherBranches;
branches.putAll(internalBranches);
for (ExtBranch eb : externalBranches.values()) {
if (firstOps.containsKey(eb.to())) {
branches.put(eb.from(), new IntBranch(eb.from(), firstOps.get(eb.to()), false));
}
else {
branches.put(eb.from(), eb);
}
}
return new JitPassage(decoder.thread.getLanguage(), seed, code, decoder.library,
instructions, branches, MapUtils.invertMap(firstOps));
}
/**
* Get the decoder-wrapped userop library
*
* @return the library
*/
PcodeUseropLibrary<Object> library() {
return decoder.library;
}
}

View file

@ -0,0 +1,186 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.util.ArrayList;
import java.util.List;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.exec.PcodeProgram;
import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp;
/**
* The decoder for a single stride.
*
* <p>
* This starts at a given seed and proceeds linearly until it hits an instruction without fall
* through. It may also stop if it encounters an existing entry point or an erroneous user inject.
*
* @see JitPassageDecoder
*/
public class DecoderForOneStride {
/**
* The result of decoding an instruction
*
* <p>
* This may also represent an error encountered while trying to decode an instruction.
*
* @param executor the p-code interpreter, which retains some state
* @param program the resulting p-code
*/
record StepResult(DecoderExecutor executor, PcodeProgram program) {
/**
* Check whether the result falls through, accumulate its instructions and ops, and apply
* any control-flow effects.
*
* @return true if the result falls through.
* @see DecoderExecutor#checkFallthroughAndAccumulate(PcodeProgram)
*/
boolean checkFallthroughAndAccumulate() {
return executor.checkFallthroughAndAccumulate(program);
}
/**
* Compute the fall-through target
*
* <p>
* <b>NOTE</b>: This should only be called after checking if the result actually has fall
* through; otherwise, this will blindly compute the address and context immediately after
* the instruction.
*
* @return the next address to decode
*/
AddrCtx next() {
return executor.takeTargetContext(executor.getAdvancedAddress());
}
}
final JitPassageDecoder decoder;
final DecoderForOnePassage passage;
private final AddrCtx start;
final List<Instruction> instructions = new ArrayList<>();
final List<PcodeOp> opsForStride = new ArrayList<>();;
/**
* Construct a stride decoder
*
* @param decoder the thread's passage decoder
* @param passage the decoder for this specific passage
* @param start the seed to start this stride
*/
public DecoderForOneStride(JitPassageDecoder decoder, DecoderForOnePassage passage,
AddrCtx start) {
this.decoder = decoder;
this.passage = passage;
this.start = start;
}
/**
* Finish decoding and create the stride
*
* @return the stride
*/
DecodedStride toStride() {
return new DecodedStride(start, instructions, opsForStride);
}
/**
* "Step" the decoder an instruction
*
* <p>
* This will attempt to decode the instruction at the given address (and contextreg value). If
* the given address is already a known entry point (for the entire emulator), then this returns
* {@code null} and the stride should be terminated. Otherwise, this checks for a user inject or
* then decodes an instruction. The resulting p-code (which may represent a decode error) is
* interpreted, and the first op is saved, in case it is targeted by a direct branch. As a
* special case, if the inject and/or instruction emits no p-code, we synthesize a
* {@link NopPcodeOp nop}, so that we can enter something into our books.
*
* @param at the address of the instruction to decode
* @return the result
*/
private StepResult stepAddrCtx(AddrCtx at) {
/**
* Avoid duplicate translation when we encounter an existing entry point. Just encode an
* exit branch.
*/
if (decoder.thread.hasEntry(at)) {
ExitPcodeOp exitOp = new ExitPcodeOp(at);
opsForStride.add(exitOp);
passage.otherBranches.put(exitOp, new ExtBranch(exitOp, at));
return null;
}
DecoderExecutor executor = new DecoderExecutor(this, at);
PcodeProgram program = decoder.thread.getInject(at.address);
if (program == null) {
PseudoInstruction instruction = executor.decodeInstruction();
instructions.add(instruction);
program = PcodeProgram.fromInstruction(instruction, false);
}
executor.execute(program);
if (executor.opsForThisStep.isEmpty()) {
NopPcodeOp nop = new NopPcodeOp(at, 0);
passage.firstOps.put(at, nop);
opsForStride.add(nop);
}
else {
passage.firstOps.put(at, executor.opsForThisStep.getFirst());
}
return new StepResult(executor, program);
}
/**
* Decode the stride.
*
* @return the decoded stride
*/
public DecodedStride decode() {
AddrCtx at = start;
while (true) {
if (passage.firstOps.containsKey(at)) {
return toStride();
}
StepResult result = stepAddrCtx(at);
if (result == null || !result.checkFallthroughAndAccumulate()) {
return toStride();
}
AddrCtx next = result.next();
if (at.equals(next)) {
// Would happen because of inject without control flow
ExitPcodeOp exitOp = new ExitPcodeOp(at);
opsForStride.add(exitOp);
passage.otherBranches.put(exitOp, new ExtBranch(exitOp, at));
return toStride();
}
at = next;
}
/**
* NOTE: If we impose a max instruction count within the stride, be sure to add the
* "external branch" that falls-through to the next instruction outside the passage.
*/
}
}

View file

@ -0,0 +1,202 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import java.lang.reflect.Method;
import java.util.List;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.DefaultPcodeThread.PcodeEmulationLibrary;
import ghidra.pcode.emu.jit.op.JitNopOp;
import ghidra.pcode.exec.*;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The decoder's wrapper around the emulator's userop library
*
* <p>
* This library serves two purposes: 1) to override {@link PcodeEmulationLibrary#emu_exec_decoded()}
* and {@link PcodeEmulationLibrary#emu_skip_decoded()}, and 2) to check and inline p-code userops
* that {@link ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition#canInlinePcode() allow}
* it.
*
* <p>
* We accomplish the first purpose simply by adding the two userops using the usual annotations. The
* two built-in userops regarding the decoded instruction are easily inlinable, so we will mark them
* as such. Note, however, that they are separate from the wrappers we mention for the second
* purpose (inlining), and so we must implement that inlining in the actual userop. We still mark
* them for informational purposes and because the translator needs to know.
*
* <p>
* We accomplish the second purpose of inlining by accepting the emulator's userop library and
* individually wrapping each of its userops, excluding the two we override. We allow each userop's
* attributes to pass through, but when executed, we check if the userop allows inlining. If so,
* then we feed the userop's p-code into the decoder's interpreter. This effectively inlines the op,
* control flow ops and all, into the passage. Note we do not actually replace the
* {@link PcodeOp#CALLOTHER callother} op, for bookkeeping purposes. Instead we will map it to a
* {@link JitNopOp nop} during translation.
*/
public class DecoderUseropLibrary extends AnnotatedPcodeUseropLibrary<Object> {
/**
* The wrapper around one of the emulator's userops
*/
protected class WrappedUseropDefinition implements PcodeUseropDefinition<Object> {
private final PcodeUseropDefinition<byte[]> rtOp;
/**
* Wrap the given userop
*
* @param rtOp the actual userop, as defined by the user or emulator
*/
public WrappedUseropDefinition(PcodeUseropDefinition<byte[]> rtOp) {
this.rtOp = rtOp;
}
@Override
public String getName() {
return rtOp.getName();
}
@Override
public int getInputCount() {
return rtOp.getInputCount();
}
@Override
public void execute(PcodeExecutor<Object> executor, PcodeUseropLibrary<Object> library,
Varnode outVar, List<Varnode> inVars) {
throw new AssertionError();
}
/**
* {@inheritDoc}
*
* @implNote If the userop can be inlined, we assume the delegate's {@code execute} method
* simply produces p-code and feeds it to the executor. If that is true, then the
* target type {@code <T>} does not matter, so we cast everything to raw types.
* Thus, the user is responsible to apply the {@link #canInlinePcode()} attribute
* correctly.
*/
@Override
@SuppressWarnings("unchecked")
public void execute(PcodeExecutor<Object> executor, PcodeUseropLibrary<Object> library,
PcodeOp op) {
if (rtOp.canInlinePcode()) {
@SuppressWarnings("rawtypes")
PcodeExecutor rawExec = executor;
@SuppressWarnings("rawtypes")
PcodeUseropLibrary rawLib = library;
rtOp.execute(rawExec, rawLib, op);
}
else {
// Nothing to do. CALLOTHER is logged and will be compiled later.
}
}
@Override
public boolean isFunctional() {
return rtOp.isFunctional();
}
@Override
public boolean hasSideEffects() {
return rtOp.hasSideEffects();
}
@Override
public boolean canInlinePcode() {
return rtOp.canInlinePcode();
}
@Override
public Method getJavaMethod() {
return rtOp.getJavaMethod();
}
@Override
public PcodeUseropLibrary<?> getDefiningLibrary() {
return rtOp.getDefiningLibrary();
}
}
/**
* Wrap the given userop library
*
* @param rtLib the actual library provided by the user or emulator
*/
public DecoderUseropLibrary(PcodeUseropLibrary<byte[]> rtLib) {
for (PcodeUseropDefinition<byte[]> opDef : rtLib.getUserops().values()) {
if (ops.containsKey(opDef.getName())) {
// Allow our annotations to override stuff in rtLib
continue;
}
ops.put(opDef.getName(), new WrappedUseropDefinition(opDef));
}
}
/**
* The replacement for {@link PcodeEmulationLibrary#emu_exec_decoded()}.
*
* <p>
* The one built into the emulator would have the thread interpret the decoded instruction
* directly. While this might "work," it totally missed the purpose of JIT translation. We
* instead inline the userop's p-code into the rest of the passage. We accomplish this by having
* the decoder interpret the p-code instead. We also need to ensure the decoded instruction is
* added into the passage.
*
* <p>
* Note that the {@link PcodeOp#CALLOTHER callother} op will be mapped to a {@link JitNopOp nop}
* during translation because we have set {@code canInline}.
*
* @param executor the decoder's executor
*/
@PcodeUserop(canInline = true)
public void emu_exec_decoded(@OpExecutor PcodeExecutor<Object> executor) {
DecoderExecutor de = (DecoderExecutor) executor;
PseudoInstruction instruction = de.decodeInstruction();
de.addInstruction(instruction);
PcodeProgram program = PcodeProgram.fromInstruction(instruction, false);
de.execute(program);
}
/**
* The replacement for {@link PcodeEmulationLibrary#emu_skip_decoded()}.
*
* <p>
* The one built into the emulator would have the thread drop and skip the decoded instruction
* directly. This would not have the intended effect, because the decoder is the thing that
* needs to skip and advance to the next address. We instead "inline" nothing, but we must still
* decode the instruction. Because the executor provides the decode routine, it can internally
* work out fall through. We will <em>not</em> add the instruction to the passage, though,
* because we will not have the executor interpret any of the instructon's p-code. As for fall
* through, the {@link DecoderExecutor#checkFallthroughAndAccumulate(PcodeProgram)} routine just
* does its usual. If the inject falls through, {@link DecoderExecutor#getAdvancedAddress()}
* considers the decoded instruction, even though it was never interpreted.
*
* <p>
* Note that the {@link PcodeOp#CALLOTHER callother} op will still be mapped to a
* {@link JitNopOp nop} during translation because we have set {@code canInline}.
*
* @param executor the decoder's executor
*/
@PcodeUserop(canInline = true)
public void emu_skip_decoded(@OpExecutor PcodeExecutor<Object> executor) {
DecoderExecutor de = (DecoderExecutor) executor;
de.decodeInstruction();
}
}

View file

@ -0,0 +1,167 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.decode;
import ghidra.app.util.PseudoInstruction;
import ghidra.pcode.emu.InstructionDecoder;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.BlockSplitter;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.DecodePcodeExecutionException;
import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Register;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.ProgramContext;
import ghidra.program.model.pcode.PcodeOp;
/**
* The decoder of a {@link JitPassage} to support JIT-accelerated p-code emulation.
*
* <p>
* When the emulator encounters an address (and contextreg value) that it has not previously
* translated, it must decode a passage seeded at that required entry point. It must then translate
* the passage and collects all the resulting entry points, and finally invoke the passage's
* {@link JitCompiledPassage#run(int) run} method for the required entry point.
*
* <h2>Decoding a Passage</h2>
* <p>
* Decode starts with a single seed, which is the entry point required by the emulator. As such,
* that seed <em>must</em> be among the entry points exported by the translator. Decode occurs one
* stride at a time. Starting with the seed, we decode a stride by disassembling linearly until: 1)
* We encounter an instruction without fall through, 2) there's already an entry point to a
* translated passage at an encountered address, or 3) a user injection fails to specify control
* flow. Case 1 is the normal expected case. For example, when the decoder encounters an
* unconditional branch, the stride is terminated. Case 2 is meant to reduce duplicative
* translations, but it does come at some cost during decode time. Suppose execution branches into
* the middle of a previously translated basic block. (Note that basic blocks are only broken apart
* using branches <em>in the same passage</em>, so it is possible some branch encountered later
* would jump into another passage's basic block.) That previously translated passage will not have
* exposed an entry point at that branch target, so the emulator will begin decoding using the
* branch target as the seed. Ideally, the resulting passage will consist of a single stride that
* terminates at an existing entry point. The emulator will translate and execute the passage, which
* should exit at that entry point, where the emulator can then continue execution. Case 3 is just
* to ensure execution does not get caught in a translated infinite loop. There will still be an
* infinite loop, but it can be interrupted while execution is in the emulator's logic rather than
* the translated logic.
*
* <p>
* As the stride decoder processes each instruction, it interprets its p-code, along with any
* generated by user injects, to collect branch targets. For direct branches ({@link PcodeOp#BRANCH
* branch}, {@link PcodeOp#CBRANCH cbranch}, and {@link PcodeOp#CALL call}), the target address (and
* appropriate contextreg value) is added to the queue of seeds, unless that target is already
* decoded in this passage. A bit of control flow analysis is required to determine whether each
* instruction (with user injects) has fall through. We borrow the {@link BlockSplitter} from the
* {@link JitControlFlowModel} to accomplish this. We append a "probe" p-code op at the very end,
* and then once we have the (miniature) control flow graph, we check if there's a path from
* instruction start to the probe op. If there is, then we can fall through, so decode proceeds to
* the next instruction. If not, the stride is terminated, so the decoder starts a new stride at the
* next seed, unless we've met the p-code op, instruction, or stride {@link JitConfiguration quota}.
*
* <p>
* The seed queue is a list of {@link ExtBranch} records. Each stride is decoded by removing a seed
* from that queue, decoding instructions, emitting ops, and then creating an {@link IntBranch}
* record targeting the first op of the newly-decoded instruction. The {@link Branch#from() from}
* field is taken from the seed {@link ExtBranch} record. Decode will likely terminate before this
* queue is emptied, in which case, those remaining external branches will become part of the
* passage's {@link JitPassage#getBranches() branches}. Direct branches to instructions already
* included in the passage, p-code relative branches, and queued external branches to instructions
* which have since been decoded all become {@link IntBranch} records, too. For indirect branches
* ({@link PcodeOp#BRANCHIND branchind}, {@link PcodeOp#CALLIND callind}, and {@link PcodeOp#RETURN
* return}), we create {@link IndBranch} records. For error cases (e.g.,
* {@link PcodeOp#UNIMPLEMENTED unimplemented}), we create {@link ErrBranch} records.
*
* @implNote The process described above is actually implemented in {@link DecoderForOnePassage}.
* This class just keeps the configuration and some other trappings, and instantiates an
* actual decoder upon requesting a seed.
*/
public class JitPassageDecoder {
final JitPcodeThread thread;
final InstructionDecoder decoder;
final ProgramContext defaultContext;
final Register contextreg;
final DecoderUseropLibrary library;
/**
* Construct a passage decoder
*
* @param thread the thread whose instruction decoder, context, and userop library to use.
*/
public JitPassageDecoder(JitPcodeThread thread) {
this.thread = thread;
this.decoder = thread.getDecoder();
this.defaultContext = thread.getDefaultContext();
this.contextreg =
defaultContext == null ? Register.NO_CONTEXT : defaultContext.getBaseContextRegister();
this.library = new DecoderUseropLibrary(thread.getUseropLibrary());
}
/**
* Decode a passage starting at the given seed
*
* @param seed the seed address
* @param ctxIn the seed contextreg value
* @param maxOps the maximum-ish number of p-code ops to emit
* @see #decodePassage(AddrCtx, int)
* @return the decoded passage
*/
public JitPassage decodePassage(Address seed, RegisterValue ctxIn, int maxOps) {
return decodePassage(new AddrCtx(ctxIn, seed), maxOps);
}
/**
* Decode a passage starting at the given seed
*
* <p>
* We provide a {@code maxOps} parameter so that the configured
* {@link JitConfiguration#maxPassageOps() option} can be overridden. In particular, the
* bytecode emitter may exceed the maximum size of a Java method, in which case we must abort,
* re-decode with fewer ops, and retry. Whether this back off should persist in the
* configuration is yet to be determined. Output size can vary wildly depending on the number of
* basic blocks, scope transitions, nature of the ops, etc. We ought to be able to provide a
* reasonable default value that mostly avoids retries, because each retry essentially wastes an
* entire JIT translation. On the other hand, if we choose too small a value, we lose some of
* the benefits of translating the control flow and keeping variables in JVM locals.
*
* @param seed the required entry point, where decode will start
* @param maxOps the maximum-ish number of p-codes to emit
* @return the decoded passage
*/
public JitPassage decodePassage(AddrCtx seed, int maxOps) {
DecoderForOnePassage forOne = new DecoderForOnePassage(this, seed, maxOps);
forOne.decodePassage();
return forOne.finish();
}
/**
* Decode a single instruction
*
* @param address the address of the instruction
* @param ctx the input decode context
* @return the decoded instruction, or a {@link DecodeErrorInstruction}.
*/
PseudoInstruction decodeInstruction(Address address, RegisterValue ctx) {
try {
return decoder.decodeInstruction(address, ctx);
}
catch (DecodePcodeExecutionException e) {
return JitPassage.decodeError(decoder.getLanguage(), address, ctx, e.getMessage());
}
}
}

View file

@ -0,0 +1,74 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static org.objectweb.asm.Opcodes.ATHROW;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.DecodedPcodeOp;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.program.model.pcode.PcodeOp;
/**
* A requested exception handler
*
* <p>
* When an exception occurs, we must retire all of the variables before we pop the
* {@link JitCompiledPassage#run(int) run} method's frame. We also write out the program counter and
* disassembly context so that the emulator can resume appropriately. After that, we re-throw the
* exception.
*
* <p>
* When the code generator knows the code it's emitting can cause a user exception, e.g., the Direct
* invocation of a userop, and there are live variables in scope, then it should request a handler
* (via {@link JitCodeGenerator#requestExceptionHandler(DecodedPcodeOp, JitBlock)}) and surround the
* code in a {@code try-catch} on {@link Throwable} directing it to this handler.
*
* @param op the op which may cause an exception
* @param block the block containing the op
* @param label the label at the start of the handler
*/
public record ExceptionHandler(PcodeOp op, JitBlock block, Label label) {
/**
* Construct a handler, generating a new label
*
* @param op the op which may cause an exception
* @param block the block containing the op
*/
public ExceptionHandler(PcodeOp op, JitBlock block) {
this(op, block, new Label());
}
/**
* Emit the handler's code into the {@link JitCompiledPassage#run(int) run} method.
*
* @param gen the code generator
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public void generateRunCode(JitCodeGenerator gen, MethodVisitor rv) {
rv.visitLabel(label);
// [exc]
gen.generatePassageExit(block, () -> {
rv.visitLdcInsn(gen.getAddressForOp(op).getOffset());
}, gen.getExitContext(op), rv);
// [exc]
rv.visitInsn(ATHROW);
// []
}
}

View file

@ -0,0 +1,87 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.program.model.address.Address;
/**
* A field request for a pre-fetched page from the {@link JitBytesPcodeExecutorStateSpace}.
*
* <p>
* The field is used for direct memory accesses. For those, the address space and fixed address is
* given in the p-code, so we are able to pre-fetch the page and access it directly at run time.
*
* @param address the address contained by the page to pre-fetch
*/
public record FieldForArrDirect(Address address) implements InstanceFieldReq {
@Override
public String name() {
return "arrDir_%s_%x".formatted(address.getAddressSpace().getName(),
address.getOffset());
}
/**
* {@inheritDoc}
*
* <p>
* Consider the address {@code ram:00600000}. The declaration is equivalent to:
*
* <pre>
* private final byte[] arrDir_ram_600000;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* arrDir_ram_600000 =
* state.getForSpace(ADDRESS_FACTORY.getAddressSpace(ramId)).getDirect(0x600000);
* </pre>
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(), TDESC_BYTE_ARR, null, null);
// [...]
iv.visitVarInsn(ALOAD, 0);
// [...,this]
gen.generateLoadJitStateSpace(address.getAddressSpace(), iv);
// [...,jitspace]
iv.visitLdcInsn(address.getOffset());
// [...,arr]
iv.visitMethodInsn(INVOKEVIRTUAL, NAME_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE,
"getDirect", MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__GET_DIRECT, false);
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(), TDESC_BYTE_ARR);
// [...]
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// [...]
rv.visitVarInsn(ALOAD, 0);
// [...,this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(),
TDESC_BYTE_ARR);
// [...,arr]
}
}

View file

@ -0,0 +1,77 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.RegisterValue;
/**
* A field request for pre-constructed contextreg value
*/
record FieldForContext(RegisterValue ctx) implements StaticFieldReq {
@Override
public String name() {
return "CTX_%s".formatted(ctx.getUnsignedValue().toString(16));
}
/**
* {@inheritDoc}
*
* <p>
* Consider the context value 0x80000000. The code is equivalent to:
*
* <pre>
* private static final {@link RegisterValue} CTX_80000000 = {@link JitCompiledPassage#createContext(Language, String) createContext}(LANGUAGE, "80000000");
* </pre>
*/
@Override
public void generateClinitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor sv) {
if (ctx == null) {
return;
}
cv.visitField(ACC_PRIVATE | ACC_STATIC | ACC_FINAL, name(), TDESC_REGISTER_VALUE, null,
null);
// []
sv.visitFieldInsn(GETSTATIC, gen.nameThis, "LANGUAGE", TDESC_LANGUAGE);
// [language]
sv.visitLdcInsn(ctx.getUnsignedValue().toString(16));
// [language,ctx:STR]
sv.visitMethodInsn(INVOKESTATIC, NAME_JIT_COMPILED_PASSAGE, "createContext",
MDESC_JIT_COMPILED_PASSAGE__CREATE_CONTEXT, true);
// [ctx:RV]
sv.visitFieldInsn(PUTSTATIC, gen.nameThis, name(), TDESC_REGISTER_VALUE);
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// [...]
if (ctx == null) {
rv.visitInsn(ACONST_NULL);
}
else {
rv.visitFieldInsn(GETSTATIC, gen.nameThis, name(), TDESC_REGISTER_VALUE);
}
// [...,ctx]
}
}

View file

@ -0,0 +1,101 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.JitPassage.ExtBranch;
import ghidra.pcode.emu.jit.JitPcodeThread;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPoint;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.ExitSlot;
import ghidra.program.model.lang.RegisterValue;
/**
* A field request for an {@link ExitSlot}.
*
* <p>
* One of these is allocated per {@link ExtBranch#to()}. At run time, the first time a branch is
* encountered from this passage to the given target, the slot calls
* {@link JitPcodeThread#getEntry(AddrCtx) getEntry}{@code (target)} and keeps the reference. Each
* subsequent encounter uses the kept reference. This reference is what gets returned by
* {@link JitCompiledPassage#run(int)}, so now the thread already has in hand the next
* {@link EntryPoint} to execute.
*
* @param target the target address-contextreg pair of the branch exiting via this slot
*/
public record FieldForExitSlot(AddrCtx target) implements InstanceFieldReq {
@Override
public String name() {
return "exit_%x_%s".formatted(target.address.getOffset(), target.biCtx.toString(16));
}
/**
* {@inheritDoc}
*
* <p>
* Consider the target {@code (ram:00401234,ctx=80000000)}. The declaration is equivalent to:
*
* <pre>
* private final {@link ExitSlot} exit_401234_80000000;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* exit_401234_80000000 = {@link JitCompiledPassage#createExitSlot(long, RegisterValue) createExitSlot}(0x401234, CTX_80000000);
* </pre>
*
* <p>
* Note that this method will ensure the {@code CTX_...} field is allocated and loads its value
* as needed.
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
FieldForContext ctxField = gen.requestStaticFieldForContext(target.rvCtx);
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(), TDESC_EXIT_SLOT, null, null);
// []
iv.visitVarInsn(ALOAD, 0);
// [this]
iv.visitInsn(DUP);
// [this,this]
iv.visitLdcInsn(target.address.getOffset());
// [this,this,target:LONG]
ctxField.generateLoadCode(gen, iv);
// [this,this,target:LONG,ctx:RV]
iv.visitMethodInsn(INVOKEINTERFACE, NAME_JIT_COMPILED_PASSAGE, "createExitSlot",
MDESC_JIT_COMPILED_PASSAGE__CREATE_EXIT_SLOT, true);
// [this,slot]
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(), TDESC_EXIT_SLOT);
// []
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// []
rv.visitVarInsn(ALOAD, 0);
// [this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(), TDESC_EXIT_SLOT);
// [slot]
}
}

View file

@ -0,0 +1,84 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.program.model.address.AddressSpace;
/**
* A field request for a pre-fetched {@link JitBytesPcodeExecutorStateSpace}
*
* <p>
* The field is used for indirect memory accesses. For those, the address space is given in the
* p-code, but the offset must be computed at run time. Thus, we can pre-fetch the state space, but
* not any particular page.
*
* @param space the address space of the state space to pre-fetch
*/
public record FieldForSpaceIndirect(AddressSpace space) implements InstanceFieldReq {
@Override
public String name() {
return "spaceInd_" + space.getName();
}
/**
* {@inheritDoc}
*
* <p>
* Consider the "ram" space. The declaration is equivalent to:
*
* <pre>
* private final {@link JitBytesPcodeExecutorStateSpace} spaceInd_ram;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* spaceInd_ram = state.getForSpace(ADDRESS_FACTORY.getAddressSpace(ramId));
* </pre>
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(),
TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE, null, null);
// [...]
iv.visitVarInsn(ALOAD, 0);
// [...,this]
gen.generateLoadJitStateSpace(space, iv);
// [...,this,jitspace]
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(),
TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE);
// [...]
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// [...]
rv.visitVarInsn(ALOAD, 0);
// [...,this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(),
TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE);
// [...,jitspace]
}
}

View file

@ -0,0 +1,87 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitDataFlowUseropLibrary;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
/**
* A field request for a pre-fetched userop definition
*
* <p>
* These are used to invoke userops using the Standard or Direct strategies.
*
* @param userop the definition to pre-fetch
* @see JitDataFlowUseropLibrary
*/
public record FieldForUserop(PcodeUseropDefinition<?> userop) implements InstanceFieldReq {
@Override
public String name() {
return "userop_" + userop.getName();
}
/**
* {@inheritDoc}
*
* <p>
* Consider the userop {@code syscall()}. The declaration is equivalent to:
*
* <pre>
* private final {@link PcodeUseropDefinition} userop_syscall;
* </pre>
*
* <p>
* And the initialization is equivalent to:
*
* <pre>
* userop_syscall = {@link JitCompiledPassage#getUseropDefinition(String) getUseropdDefinition}("syscall");
* </pre>
*/
@Override
public void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv) {
cv.visitField(ACC_PRIVATE | ACC_FINAL, name(), TDESC_PCODE_USEROP_DEFINITION, null,
null);
// []
iv.visitVarInsn(ALOAD, 0);
// [this]
iv.visitInsn(DUP);
// [this,this]
iv.visitLdcInsn(userop.getName());
// [this,this,name]
iv.visitMethodInsn(INVOKEINTERFACE, NAME_JIT_COMPILED_PASSAGE, "getUseropDefinition",
MDESC_JIT_COMPILED_PASSAGE__GET_USEROP_DEFINITION, true);
// [this,userop]
iv.visitFieldInsn(PUTFIELD, gen.nameThis, name(), TDESC_PCODE_USEROP_DEFINITION);
// []
}
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
// []
rv.visitVarInsn(ALOAD, 0);
// [this]
rv.visitFieldInsn(GETFIELD, gen.nameThis, name(), TDESC_PCODE_USEROP_DEFINITION);
// [userop]
}
}

View file

@ -0,0 +1,85 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitDataFlowUseropLibrary;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressFactory;
import ghidra.program.model.pcode.Varnode;
/**
* A field request for a pre-constructed varnode
*
* <p>
* These are used to invoke userops using the Standard strategy.
*
* @param vn the varnode to pre-construct
* @see JitDataFlowUseropLibrary
*/
public record FieldForVarnode(Varnode vn) implements StaticFieldReq {
@Override
public String name() {
Address addr = vn.getAddress();
return "VARNODE_%s_%s_%s".formatted(addr.getAddressSpace().getName().toUpperCase(),
Long.toUnsignedString(addr.getOffset(), 16), vn.getSize());
}
/**
* {@inheritDoc}
*
* <p>
* Consider the varnode (ram:00400000,4). The code is equivalent to:
*
* <pre>
* private static final {@link Varnode} VARNODE_ram_400000_4 = {@link JitCompiledPassage#createVarnode(AddressFactory, String, long, int) createVarnode}(ADDRESS_FACTORY, "ram", 0x400000, 4);
* </pre>
*/
@Override
public void generateClinitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor sv) {
cv.visitField(ACC_PRIVATE | ACC_STATIC | ACC_FINAL, name(), TDESC_VARNODE, null, null);
sv.visitFieldInsn(GETSTATIC, gen.nameThis, "ADDRESS_FACTORY", TDESC_ADDRESS_FACTORY);
sv.visitLdcInsn(vn.getAddress().getAddressSpace().getName());
sv.visitLdcInsn(vn.getAddress().getOffset());
sv.visitLdcInsn(vn.getSize());
sv.visitMethodInsn(INVOKESTATIC, NAME_JIT_COMPILED_PASSAGE, "createVarnode",
MDESC_JIT_COMPILED_PASSAGE__CREATE_VARNODE, true);
sv.visitFieldInsn(PUTSTATIC, gen.nameThis, name(), TDESC_VARNODE);
}
/**
* {@inheritDoc}
*
* <p>
* To clarify, this <em>does not</em> load a varnode's current value onto the JVM stack. That is
* done by {@link VarGen}. This loads a ref to the {@link Varnode} instance. Also, it's not
* precisely the same instance as given, but a re-construction of it as a plain {@link Varnode},
* i.e., just the (space,offset,size) triple.
*
*/
@Override
public void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv) {
rv.visitFieldInsn(GETSTATIC, gen.nameThis, name(), TDESC_VARNODE);
}
}

View file

@ -0,0 +1,41 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
/**
* A field request for a pre-fetched or pre-constructed element
*/
interface FieldReq {
/**
* Derive a suitable name for the field
*
* @return the name
*/
String name();
/**
* Emit code to load the field onto the JVM stack
*
* @param gen the code generator
* @param rv the visitor often for the {@link JitCompiledPassage#run(int) run} method, but could
* be the static initializer or constructor
*/
void generateLoadCode(JitCodeGenerator gen, MethodVisitor rv);
}

View file

@ -0,0 +1,223 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.reflect.TypeLiteral;
import org.objectweb.asm.Type;
import ghidra.generic.util.datastruct.SemisparseByteArray;
import ghidra.pcode.emu.jit.*;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorStatePiece.JitBytesPcodeExecutorStateSpace;
import ghidra.pcode.emu.jit.JitPassage.AddrCtx;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.EntryPoint;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage.ExitSlot;
import ghidra.pcode.error.LowlevelError;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
import ghidra.program.model.address.*;
import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.pcode.Varnode;
/**
* Various constants (namely class names, type descriptions, method descriptions, etc. used during
* bytecode generation.
*/
@SuppressWarnings("javadoc")
public interface GenConsts {
public static final int BLOCK_SIZE = SemisparseByteArray.BLOCK_SIZE;
public static final String TDESC_ADDRESS = Type.getDescriptor(Address.class);
public static final String TDESC_ADDRESS_FACTORY = Type.getDescriptor(AddressFactory.class);
public static final String TDESC_ADDRESS_SPACE = Type.getDescriptor(AddressSpace.class);
public static final String TDESC_BYTE_ARR = Type.getDescriptor(byte[].class);
public static final String TDESC_EXIT_SLOT = Type.getDescriptor(ExitSlot.class);
public static final String TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE =
Type.getDescriptor(JitBytesPcodeExecutorState.class);
public static final String TDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE =
Type.getDescriptor(JitBytesPcodeExecutorStateSpace.class);
public static final String TDESC_JIT_PCODE_THREAD = Type.getDescriptor(JitPcodeThread.class);
public static final String TDESC_LANGUAGE = Type.getDescriptor(Language.class);
public static final String TDESC_LIST = Type.getDescriptor(List.class);
public static final String TDESC_PCODE_USEROP_DEFINITION =
Type.getDescriptor(PcodeUseropDefinition.class);
public static final String TDESC_REGISTER_VALUE = Type.getDescriptor(RegisterValue.class);
public static final String TDESC_STRING = Type.getDescriptor(String.class);
public static final String TDESC_VARNODE = Type.getDescriptor(Varnode.class);
public static final String TSIG_LIST_ADDRCTX =
JitJvmTypeUtils.typeToSignature(new TypeLiteral<List<AddrCtx>>() {}.value);
public static final String MDESC_ADDR_CTX__$INIT = Type.getMethodDescriptor(Type.VOID_TYPE,
Type.getType(RegisterValue.class), Type.getType(Address.class));
public static final String MDESC_ADDRESS_FACTORY__GET_ADDRESS_SPACE =
Type.getMethodDescriptor(Type.getType(AddressSpace.class), Type.INT_TYPE);
public static final String MDESC_ADDRESS_SPACE__GET_ADDRESS =
Type.getMethodDescriptor(Type.getType(Address.class), Type.LONG_TYPE);
public static final String MDESC_ARRAY_LIST__$INIT = Type.getMethodDescriptor(Type.VOID_TYPE);
// NOTE: The void (String) form is private....
public static final String MDESC_ASSERTION_ERROR__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(Object.class));
public static final String MDESC_DOUBLE__DOUBLE_TO_RAW_LONG_BITS =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.DOUBLE_TYPE);
public static final String MDESC_DOUBLE__IS_NAN =
Type.getMethodDescriptor(Type.BOOLEAN_TYPE, Type.DOUBLE_TYPE);
public static final String MDESC_DOUBLE__LONG_BITS_TO_DOUBLE =
Type.getMethodDescriptor(Type.DOUBLE_TYPE, Type.LONG_TYPE);
public static final String MDESC_FLOAT__FLOAT_TO_RAW_INT_BITS =
Type.getMethodDescriptor(Type.INT_TYPE, Type.FLOAT_TYPE);
public static final String MDESC_FLOAT__INT_BITS_TO_FLOAT =
Type.getMethodDescriptor(Type.FLOAT_TYPE, Type.INT_TYPE);
public static final String MDESC_FLOAT__IS_NAN =
Type.getMethodDescriptor(Type.BOOLEAN_TYPE, Type.FLOAT_TYPE);
public static final String MDESC_ILLEGAL_ARGUMENT_EXCEPTION__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(String.class));
public static final String MDESC_INTEGER__BIT_COUNT =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_INTEGER__COMPARE_UNSIGNED =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_INTEGER__NUMBER_OF_LEADING_ZEROS =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_INTEGER__TO_UNSIGNED_LONG =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE__GET_LANGUAGE =
Type.getMethodDescriptor(Type.getType(Language.class));
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE__GET_SPACE_FOR =
Type.getMethodDescriptor(Type.getType(JitBytesPcodeExecutorStateSpace.class),
Type.getType(AddressSpace.class));
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__GET_DIRECT =
Type.getMethodDescriptor(Type.getType(byte[].class), Type.LONG_TYPE);
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__READ =
Type.getMethodDescriptor(Type.getType(byte[].class), Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE__WRITE =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.LONG_TYPE, Type.getType(byte[].class),
Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__CONV_OFFSET2_TO_LONG =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__COUNT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_CONTEXT =
Type.getMethodDescriptor(Type.getType(RegisterValue.class), Type.getType(Language.class),
Type.getType(String.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_DECODE_ERROR =
Type.getMethodDescriptor(Type.getType(DecodePcodeExecutionException.class),
Type.getType(String.class), Type.LONG_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_EXIT_SLOT =
Type.getMethodDescriptor(Type.getType(ExitSlot.class), Type.LONG_TYPE,
Type.getType(RegisterValue.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__CREATE_VARNODE =
Type.getMethodDescriptor(Type.getType(Varnode.class), Type.getType(AddressFactory.class),
Type.getType(String.class), Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__GET_CHAINED =
Type.getMethodDescriptor(Type.getType(EntryPoint.class), Type.getType(ExitSlot.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__GET_LANGUAGE =
Type.getMethodDescriptor(Type.getType(Language.class), Type.getType(String.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__GET_USEROP_DEFINITION =
Type.getMethodDescriptor(Type.getType(PcodeUseropDefinition.class),
Type.getType(String.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__INVOKE_USEROP =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(PcodeUseropDefinition.class),
Type.getType(Varnode.class), Type.getType(Varnode[].class));
public static final String MDESC_JIT_COMPILED_PASSAGE__READ_INTX =
Type.getMethodDescriptor(Type.INT_TYPE, Type.getType(byte[].class), Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__READ_LONGX =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.getType(byte[].class), Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__RETIRE_COUNTER_AND_CONTEXT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.LONG_TYPE, Type.getType(RegisterValue.class));
public static final String MDESC_JIT_COMPILED_PASSAGE__S_CARRY_INT_RAW =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__S_CARRY_LONG_RAW =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__WRITE_INTX =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.INT_TYPE, Type.getType(byte[].class),
Type.INT_TYPE);
public static final String MDESC_JIT_COMPILED_PASSAGE__WRITE_LONGX =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.LONG_TYPE, Type.getType(byte[].class),
Type.INT_TYPE);
public static final String MDESC_JIT_PCODE_THREAD__GET_STATE =
Type.getMethodDescriptor(Type.getType(JitThreadBytesPcodeExecutorState.class));
public static final String MDESC_LANGUAGE__GET_ADDRESS_FACTORY =
Type.getMethodDescriptor(Type.getType(AddressFactory.class));
public static final String MDESC_LANGUAGE__GET_DEFAULT_SPACE =
Type.getMethodDescriptor(Type.getType(AddressSpace.class));
public static final String MDESC_LIST__ADD =
Type.getMethodDescriptor(Type.BOOLEAN_TYPE, Type.getType(Object.class));
public static final String MDESC_LONG__BIT_COUNT =
Type.getMethodDescriptor(Type.INT_TYPE, Type.LONG_TYPE);
public static final String MDESC_LONG__COMPARE_UNSIGNED =
Type.getMethodDescriptor(Type.INT_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_LONG__NUMBER_OF_LEADING_ZEROS =
Type.getMethodDescriptor(Type.INT_TYPE, Type.LONG_TYPE);
public static final String MDESC_LOW_LEVEL_ERROR__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(String.class));
public static final String MDESC_PCODE_USEROP_DEFINITION__GET_DEFINING_LIBRARY =
Type.getMethodDescriptor(Type.getType(PcodeUseropLibrary.class));
public static final String MDESC_SLEIGH_LINK_EXCEPTION__$INIT =
Type.getMethodDescriptor(Type.VOID_TYPE, Type.getType(String.class));
public static final String MDESC_$DOUBLE_UNOP =
Type.getMethodDescriptor(Type.DOUBLE_TYPE, Type.DOUBLE_TYPE);
public static final String MDESC_$FLOAT_UNOP =
Type.getMethodDescriptor(Type.FLOAT_TYPE, Type.FLOAT_TYPE);
public static final String MDESC_$INT_BINOP =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String MDESC_$LONG_BINOP =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_$SHIFT_JJ =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.LONG_TYPE);
public static final String MDESC_$SHIFT_JI =
Type.getMethodDescriptor(Type.LONG_TYPE, Type.LONG_TYPE, Type.INT_TYPE);
public static final String MDESC_$SHIFT_IJ =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.LONG_TYPE);
public static final String MDESC_$SHIFT_II =
Type.getMethodDescriptor(Type.INT_TYPE, Type.INT_TYPE, Type.INT_TYPE);
public static final String NAME_ADDR_CTX = Type.getInternalName(AddrCtx.class);
public static final String NAME_ADDRESS = Type.getInternalName(Address.class);
public static final String NAME_ADDRESS_FACTORY = Type.getInternalName(AddressFactory.class);
public static final String NAME_ADDRESS_SPACE = Type.getInternalName(AddressSpace.class);
public static final String NAME_ARRAY_LIST = Type.getInternalName(ArrayList.class);
public static final String NAME_ASSERTION_ERROR = Type.getInternalName(AssertionError.class);
public static final String NAME_DOUBLE = Type.getInternalName(Double.class);
public static final String NAME_EXIT_SLOT = Type.getInternalName(ExitSlot.class);
public static final String NAME_FLOAT = Type.getInternalName(Float.class);
public static final String NAME_ILLEGAL_ARGUMENT_EXCEPTION =
Type.getInternalName(IllegalArgumentException.class);
public static final String NAME_INTEGER = Type.getInternalName(Integer.class);
public static final String NAME_JIT_BYTES_PCODE_EXECUTOR_STATE =
Type.getInternalName(JitBytesPcodeExecutorState.class);
public static final String NAME_JIT_BYTES_PCODE_EXECUTOR_STATE_SPACE =
Type.getInternalName(JitBytesPcodeExecutorStateSpace.class);
public static final String NAME_JIT_COMPILED_PASSAGE =
Type.getInternalName(JitCompiledPassage.class);
public static final String NAME_JIT_PCODE_THREAD = Type.getInternalName(JitPcodeThread.class);
public static final String NAME_LANGUAGE = Type.getInternalName(Language.class);
public static final String NAME_LIST = Type.getInternalName(List.class);
public static final String NAME_LONG = Type.getInternalName(Long.class);
public static final String NAME_LOW_LEVEL_ERROR = Type.getInternalName(LowlevelError.class);
public static final String NAME_MATH = Type.getInternalName(Math.class);
public static final String NAME_OBJECT = Type.getInternalName(Object.class);
public static final String NAME_PCODE_USEROP_DEFINITION =
Type.getInternalName(PcodeUseropDefinition.class);
public static final String NAME_SLEIGH_LINK_EXCEPTION =
Type.getInternalName(SleighLinkException.class);
public static final String NAME_THROWABLE = Type.getInternalName(Throwable.class);
public static final String NAME_VARNODE = Type.getInternalName(Varnode.class);
}

View file

@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
/**
* An instance field request initialized in the class constructor
*/
interface InstanceFieldReq extends FieldReq {
/**
* Emit the field declaration and its initialization bytecode
*
* <p>
* The declaration is emitted into the class definition, and the initialization code is emitted
* into the class constructor.
*
* @param gen the code generator
* @param cv the visitor for the class definition
* @param iv the visitor for the class constructor
*/
void generateInitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor iv);
}

View file

@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.MethodVisitor;
/**
* A static field request initialized in the class initializer
*/
interface StaticFieldReq extends FieldReq {
/**
* Emit the field declaration and its initialization bytecode
*
* <p>
* The declaration is emitted into the class definition, and the initialization code is
* emitted into the class initializer.
*
* @param gen the code generator
* @param cv the visitor for the class definition
* @param sv the visitor for the class (static) initializer
*/
void generateClinitCode(JitCodeGenerator gen, ClassVisitor cv, MethodVisitor sv);
}

View file

@ -0,0 +1,90 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitBinOp;
/**
* An extension that provides conveniences and common implementations for binary p-code operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface BinOpGen<T extends JitBinOp> extends OpGen<T> {
/**
* Emit code between reading the left and right operands
*
* <p>
* This is invoked immediately after emitting code to push the left operand onto the stack,
* giving the implementation an opportunity to perform any manipulations of that operand
* necessary to set up the operation, before code to push the right operand is emitted.
*
* @param gen the code generator
* @param op the operator
* @param lType the actual type of the left operand
* @param rType the actual type of the right operand
* @param rv the method visitor
* @return the new actual type of the left operand
*/
default JitType afterLeft(JitCodeGenerator gen, T op, JitType lType, JitType rType,
MethodVisitor rv) {
return lType;
}
/**
* Emit code for the binary operator
*
* <p>
* At this point both operands are on the stack. After this returns, code to write the result
* from the stack into the destination operand will be emitted.
*
* @param gen the code generator
* @param op the operator
* @param block the block containing the operator
* @param lType the actual type of the left operand
* @param rType the actual type of the right operand
* @param rv the method visitor
* @return the actual type of the result
*/
JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv);
/**
* {@inheritDoc}
*
* <p>
* This default implementation emits code to load the left operand, invokes the
* {@link #afterLeft(JitCodeGenerator, JitBinOp, JitType, JitType, MethodVisitor) after-left}
* hook point, emits code to load the right operand, invokes
* {@link #generateBinOpRunCode(JitCodeGenerator, JitBinOp, JitBlock, JitType, JitType, MethodVisitor)
* generate-binop}, and finally emits code to write the destination operand.
*/
@Override
default void generateRunCode(JitCodeGenerator gen, T op, JitBlock block, MethodVisitor rv) {
JitType lType = gen.generateValReadCode(op.l(), op.lType());
JitType rType = op.rType().resolve(gen.getTypeModel().typeOf(op.r()));
lType = afterLeft(gen, op, lType, rType, rv);
JitType checkRType = gen.generateValReadCode(op.r(), op.rType());
assert checkRType == rType;
JitType outType = generateBinOpRunCode(gen, op, block, lType, rType, rv);
gen.generateVarWriteCode(op.out(), outType);
}
}

View file

@ -0,0 +1,125 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.ILOAD;
import static org.objectweb.asm.Opcodes.ISTORE;
import org.objectweb.asm.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitBinOp;
/**
* An extension for bitwise binary operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface BitwiseBinOpGen<T extends JitBinOp> extends BinOpGen<T> {
/**
* The JVM opcode to implement this operator with int operands on the stack.
*
* @return the opcode
*/
int intOpcode();
/**
* The JVM opcode to implement this operator with long operands on the stack.
*
* @return the opcode
*/
int longOpcode();
/**
* <b>WIP</b>: The implementation for multi-precision ints.
*
* @param gen the code generator
* @param type the type of each operand, including the reuslt
* @param mv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
default void generateMpIntBinOp(JitCodeGenerator gen, MpIntJitType type,
MethodVisitor mv) {
/**
* We need temp locals to get things in order. Read in right operand, do the op as we pop
* each left op. Then push it all back.
*
* No masking of the result is required, since both operands should already be masked, and
* the bitwise op cannot generate bits of more significance.
*/
// [lleg1,...,llegN,rleg1,rlegN] (N is least-significant leg)
int legCount = type.legsAlloc();
int firstIndex = gen.getAllocationModel().nextFreeLocal();
Label start = new Label();
Label end = new Label();
mv.visitLabel(start);
for (int i = 0; i < legCount; i++) {
mv.visitLocalVariable("result" + i, Type.getDescriptor(int.class), null, start, end,
firstIndex + i);
mv.visitVarInsn(ISTORE, firstIndex + i);
// NOTE: More significant legs have higher indices (reverse of stack)
}
for (int i = 0; i < legCount; i++) {
// [lleg1,...,llegN:INT]
mv.visitVarInsn(ILOAD, firstIndex + i);
// [lleg1,...,llegN:INT,rlegN:INT]
mv.visitInsn(intOpcode());
// [lleg1,...,olegN:INT]
mv.visitVarInsn(ISTORE, firstIndex + i);
// [lleg1,...]
}
// Push it all back, in reverse order
for (int i = 0; i < legCount; i++) {
mv.visitVarInsn(ILOAD, firstIndex + legCount - i - 1);
}
mv.visitLabel(end);
}
@Override
default JitType afterLeft(JitCodeGenerator gen, T op, JitType lType, JitType rType,
MethodVisitor rv) {
return TypeConversions.forceUniformZExt(lType, rType, rv);
}
/**
* {@inheritDoc}
*
* <p>
* This implementation reduces the need to just the JVM opcode. We simply ensure both operands
* have the same size and JVM type, select and emit the correct opcode, and return the type of
* the result.
*/
@Override
default JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv) {
rType = TypeConversions.forceUniformZExt(rType, lType, rv);
switch (rType) {
case IntJitType t -> rv.visitInsn(intOpcode());
case LongJitType t -> rv.visitInsn(longOpcode());
case MpIntJitType t when t.size() == lType.size() -> generateMpIntBinOp(gen, t, rv);
case MpIntJitType t -> TODO("MpInt of differing sizes");
default -> throw new AssertionError();
}
return lType;
}
}

View file

@ -0,0 +1,44 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IAND;
import static org.objectweb.asm.Opcodes.LAND;
import ghidra.pcode.emu.jit.op.JitBoolAndOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolAnd;
/**
* The generator for a {@link JitBoolAndOp bool_and}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolAnd}. Thus, this is identical to {@link IntAndOpGen}.
*/
public enum BoolAndOpGen implements BitwiseBinOpGen<JitBoolAndOp> {
/** The generator singleton */
GEN;
@Override
public int intOpcode() {
return IAND;
}
@Override
public int longOpcode() {
return LAND;
}
}

View file

@ -0,0 +1,59 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IXOR;
import static org.objectweb.asm.Opcodes.LXOR;
import org.objectweb.asm.MethodVisitor;
import ghidra.lifecycle.Unfinished;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitBoolNegateOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolNegate;
/**
* The generator for a {@link JitBoolNegateOp bool_negate}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolNegate}.
*/
public enum BoolNegateOpGen implements UnOpGen<JitBoolNegateOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitBoolNegateOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case IntJitType t -> {
rv.visitLdcInsn(1);
rv.visitInsn(IXOR);
}
case LongJitType t -> {
rv.visitLdcInsn(1L);
rv.visitInsn(LXOR);
}
case MpIntJitType t -> Unfinished.TODO("MpInt");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,44 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IOR;
import static org.objectweb.asm.Opcodes.LOR;
import ghidra.pcode.emu.jit.op.JitBoolOrOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolOr;
/**
* The generator for a {@link JitBoolOrOp bool_or}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolOr}. Thus, this is identical to {@link IntOrOpGen}.
*/
public enum BoolOrOpGen implements BitwiseBinOpGen<JitBoolOrOp> {
/** The generator singleton */
GEN;
@Override
public int intOpcode() {
return IOR;
}
@Override
public int longOpcode() {
return LOR;
}
}

View file

@ -0,0 +1,44 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.IXOR;
import static org.objectweb.asm.Opcodes.LXOR;
import ghidra.pcode.emu.jit.op.JitBoolXorOp;
import ghidra.pcode.opbehavior.OpBehaviorBoolXor;
/**
* The generator for a {@link JitBoolXorOp bool_xor}.
*
* @implNote It is the responsibility of the slaspec author to ensure boolean values are 0 or 1.
* This allows us to use bitwise logic instead of having to check for any non-zero value,
* just like {@link OpBehaviorBoolXor}. Thus, this is identical to {@link IntXorOpGen}.
*/
public enum BoolXorOpGen implements BitwiseBinOpGen<JitBoolXorOp> {
/** The generator singleton */
GEN;
@Override
public int intOpcode() {
return IXOR;
}
@Override
public int longOpcode() {
return LXOR;
}
}

View file

@ -0,0 +1,57 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.ACONST_NULL;
import static org.objectweb.asm.Opcodes.ARETURN;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPcodeThread;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.LongJitType;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitBranchIndOp;
/**
* The generator for a {@link JitBranchIndOp branchind}.
*
* <p>
* This emits code to load the target from the operand and then retire it to the program counter,
* along with the current flow context and live variables. It then emits code to return null so that
* the {@link JitPcodeThread thread} knows to loop to the <b>Fetch</b> step for the new counter.
*/
public enum BranchIndOpGen implements OpGen<JitBranchIndOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitBranchIndOp op, JitBlock block,
MethodVisitor rv) {
gen.generatePassageExit(block, () -> {
// [...]
JitType targetType = gen.generateValReadCode(op.target(), op.targetType());
// [...,target:?]
TypeConversions.generateToLong(targetType, LongJitType.I8, rv);
// [...,target:LONG]
}, op.branch().flowCtx(), rv);
rv.visitInsn(ACONST_NULL);
rv.visitInsn(ARETURN);
}
}

View file

@ -0,0 +1,97 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_JIT_COMPILED_PASSAGE__GET_CHAINED;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_JIT_COMPILED_PASSAGE;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.*;
import ghidra.pcode.emu.jit.JitPcodeThread;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.op.JitBranchOp;
/**
* The generator for a {@link JitBranchOp branch}.
*
* <p>
* With an {@link IntBranch} record, this simply looks up the label for the target block and emits a
* block transition followed by a {@link #GOTO goto}.
*
* <p>
* With an {@link ExtBranch} record, this emits code to retire the target to the program counter,
* along with the target context and live variables. It then emits code to request the chained entry
* point from the target's exit slot and return it. The {@link JitPcodeThread thread} can then
* immediately execute the chained passage entry.
*/
public enum BranchOpGen implements OpGen<JitBranchOp> {
/** The generator singleton */
GEN;
/**
* Emit code that exits via a direct branch
*
* <p>
* This emits the {@link ExtBranch} record case.
*
* @param gen the code generator
* @param exit the target causing us to exit
* @param block the block containing the op
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
static void generateExtBranchCode(JitCodeGenerator gen, AddrCtx exit, JitBlock block,
MethodVisitor rv) {
FieldForExitSlot slotField = gen.requestFieldForExitSlot(exit);
gen.generatePassageExit(block, () -> {
// [...]
rv.visitLdcInsn(exit.address.getOffset());
// [...,target:LONG]
}, exit.rvCtx, rv);
// []
slotField.generateLoadCode(gen, rv);
// [slot]
rv.visitMethodInsn(INVOKESTATIC, NAME_JIT_COMPILED_PASSAGE, "getChained",
MDESC_JIT_COMPILED_PASSAGE__GET_CHAINED, true);
// [chained:ENTRY]
rv.visitInsn(ARETURN);
}
@Override
public void generateRunCode(JitCodeGenerator gen, JitBranchOp op, JitBlock block,
MethodVisitor rv) {
switch (op.branch()) {
case IntBranch ib -> {
JitBlock target = block.getTargetBlock(ib);
Label label = gen.labelForBlock(target);
VarGen.computeBlockTransition(gen, block, target).generate(rv);
rv.visitJumpInsn(GOTO, label);
}
case ExtBranch eb -> {
generateExtBranchCode(gen, eb.to(), block, rv);
}
default -> throw new AssertionError("Branch type confusion");
}
}
}

View file

@ -0,0 +1,85 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.JitPassage.ExtBranch;
import ghidra.pcode.emu.jit.JitPassage.IntBranch;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.gen.var.VarGen.BlockTransition;
import ghidra.pcode.emu.jit.op.JitCBranchOp;
/**
* The generator for a {@link JitCBranchOp cbranch}.
*
* <p>
* First, emits code to load the condition onto the JVM stack.
*
* <p>
* With an {@link IntBranch} record, this looks up the label for the target block and checks if a
* transition is necessary. If one is necessary, it emits an {@link #IFEQ ifeq} with the transition
* and {@link #GOTO goto} it guards. The {@code ifeq} skips to the fall-through case. If a
* transition is not necessary, it simply emits an {@link #IFNE ifne} to the target label.
*
* <p>
* With an {@link ExtBranch} record, this does the same as {@link BranchOpGen} but guarded by an
* {@link #IFEQ ifeq} that skips to the fall-through case.
*/
public enum CBranchOpGen implements OpGen<JitCBranchOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitCBranchOp op, JitBlock block,
MethodVisitor rv) {
JitType cType = gen.generateValReadCode(op.cond(), op.condType());
TypeConversions.generateIntToBool(cType, rv);
switch (op.branch()) {
case IntBranch ib -> {
JitBlock target = block.getTargetBlock(ib);
Label label = gen.labelForBlock(target);
BlockTransition transition = VarGen.computeBlockTransition(gen, block, target);
if (transition.needed()) {
Label fall = new Label();
rv.visitJumpInsn(IFEQ, fall);
transition.generate(rv);
rv.visitJumpInsn(GOTO, label);
rv.visitLabel(fall);
}
else {
rv.visitJumpInsn(IFNE, label);
}
}
case ExtBranch eb -> {
Label fall = new Label();
rv.visitJumpInsn(IFEQ, fall);
BranchOpGen.generateExtBranchCode(gen, eb.to(), block, rv);
rv.visitLabel(fall);
}
default -> throw new AssertionError("Branch type confusion");
}
}
}

View file

@ -0,0 +1,60 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_SLEIGH_LINK_EXCEPTION__$INIT;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_SLEIGH_LINK_EXCEPTION;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitCallOtherMissingOp;
import ghidra.pcode.exec.SleighLinkException;
/**
* The generator for a {@link JitCallOtherMissingOp callother-missing}.
*
* <p>
* This emits code to retire the program counter, context, and live variables, then throw a
* {@link SleighLinkException}.
*/
public enum CallOtherMissingOpGen implements OpGen<JitCallOtherMissingOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitCallOtherMissingOp op, JitBlock block,
MethodVisitor rv) {
gen.generatePassageExit(block, () -> {
rv.visitLdcInsn(gen.getAddressForOp(op.op()).getOffset());
}, gen.getExitContext(op.op()), rv);
String message = gen.getErrorMessage(op.op());
// [...]
rv.visitTypeInsn(NEW, NAME_SLEIGH_LINK_EXCEPTION);
// [...,error:NEW]
rv.visitInsn(DUP);
// [...,error:NEW,error:NEW]
rv.visitLdcInsn(message);
// [...,error:NEW,error:NEW,message]
rv.visitMethodInsn(INVOKESPECIAL, NAME_SLEIGH_LINK_EXCEPTION, "<init>",
MDESC_SLEIGH_LINK_EXCEPTION__$INIT, false);
// [...,error]
rv.visitInsn(ATHROW);
}
}

View file

@ -0,0 +1,249 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import java.lang.reflect.Method;
import java.lang.reflect.Parameter;
import org.objectweb.asm.*;
import ghidra.pcode.emu.jit.JitBytesPcodeExecutorState;
import ghidra.pcode.emu.jit.JitPassage.DecodedPcodeOp;
import ghidra.pcode.emu.jit.analysis.*;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.*;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.gen.var.VarGen;
import ghidra.pcode.emu.jit.gen.var.VarGen.BlockTransition;
import ghidra.pcode.emu.jit.op.JitCallOtherDefOp;
import ghidra.pcode.emu.jit.op.JitCallOtherOpIf;
import ghidra.pcode.emu.jit.var.JitVal;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.model.pcode.Varnode;
/**
* The generator for a {@link JitCallOtherOpIf callother}.
*
* <p>
* The checks if Direct invocation is possible. If so, it emits code using
* {@link #generateRunCodeUsingDirectStrategy(JitCodeGenerator, JitCallOtherOpIf, JitBlock, MethodVisitor)}.
* If not, it emits code using
* {@link #generateRunCodeUsingRetirementStrategy(JitCodeGenerator, PcodeOp, JitBlock, PcodeUseropDefinition, MethodVisitor)}.
* Direct invocation is possible when the userop is {@link PcodeUseropDefinition#isFunctional()
* functional} and all of its parameters and return type have a supported primitive type.
* ({@code char} is not supported.) Regarding the invocation strategies, see
* {@link JitDataFlowUseropLibrary} and note that the Inline strategy is already handled by this
* point.
*
* <p>
* For the Standard strategy, we emit code to retire the program counter, decode context, and all
* live variables. We then request a field to hold the userop and emit code to load it. We then emit
* code to prepare its arguments and place them on the stack, namely the output varnode and an array
* for the input varnodes. We request a field for each varnode and emit code to load them as needed.
* For the array, we emit code to construct and fill it. We then emit code to invoke
* {@link JitCompiledPassage#invokeUserop(PcodeUseropDefinition, Varnode, Varnode[])}. The userop
* definition handles retrieving all of its inputs and writing the output, directly to the
* {@link JitBytesPcodeExecutorState state}. Thus, we now need only to emit code to re-birth all the
* live variables. If any errors occur, execution is interrupted as usual, and our state is
* consistent.
*
* <p>
* For the Direct strategy, we wish to avoid retirement and re-birth, so we request an
* {@link ExceptionHandler}. We request a field for the userop, just as in the Standard strategy,
* but we emit code to invoke {@link PcodeUseropDefinition#getDefiningLibrary()} instead. We can use
* {@link PcodeUseropDefinition#getJavaMethod()} <em>at generation time</em> to reflect its Java
* definition. We then emit code to cast the library and load each of the operands onto the JVM
* stack. We then emit the invocation of the Java method, guarded by the exception handler. We then
* have to consider whether the userop has an output operand and whether its definition returns a
* value. If both are true, we emit code to write the result. If neither is true, we're done. If a
* result is returned, but no output operand is provided, we <em>must</em> still emit a {@link #POP
* pop}.
*/
public enum CallOtherOpGen implements OpGen<JitCallOtherOpIf> {
/** The generator singleton */
GEN;
/**
* Emit code to implement the Standard strategy (see the class documentation)
*
* @param gen the code generator
* @param op the p-code op
* @param block the block containing the op
* @param userop the userop definition, wrapped by the {@link JitDataFlowUseropLibrary}
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public static void generateRunCodeUsingRetirementStrategy(JitCodeGenerator gen, PcodeOp op,
JitBlock block, PcodeUseropDefinition<?> userop, MethodVisitor rv) {
/**
* This is about the simplest (laziest) approach we could take for the moment, but it should
* suffice, depending on the frequency of CALLOTHER executions. We immediately retire all
* variables, then invoke the userop as it would be by the p-code interpreter. It can access
* its variables in the usual fashion. Although not ideal, it can also feed the executor
* (interpreter) ops to execute --- they won't be jitted here. Then, we liven the variables
* back.
*
* NOTE: The output variable should be "alive", so we need not store it into a local. It'll
* be made alive in the return block transition.
*
* TODO: Implement direct invocation for functional userops. NOTE: Cannot avoid block
* retirement and re-birth unless I also do direct invocation. Otherwise, the parameters are
* read from the state instead of from the local variables.
*/
BlockTransition transition = VarGen.computeBlockTransition(gen, block, null);
transition.generate(rv);
gen.generateRetirePcCtx(() -> {
rv.visitLdcInsn(gen.getAddressForOp(op).getOffset());
}, gen.getExitContext(op), rv);
// []
rv.visitVarInsn(ALOAD, 0);
// [this]
gen.requestFieldForUserop(userop).generateLoadCode(gen, rv);
// [this,userop]
if (op.getOutput() == null) {
rv.visitInsn(ACONST_NULL);
}
else {
gen.requestStaticFieldForVarnode(op.getOutput()).generateLoadCode(gen, rv);
}
// [this,userop,outVn]
rv.visitLdcInsn(op.getNumInputs() - 1);
rv.visitTypeInsn(ANEWARRAY, NAME_VARNODE);
// [this,userop,outVn,inVns:ARR]
for (int i = 1; i < op.getNumInputs(); i++) {
// [this,userop,outVn,inVns:ARR]
rv.visitInsn(DUP);
// [this,userop,outVn,inVns:ARR,inVns:ARR]
rv.visitLdcInsn(i - 1);
// [this,userop,outVn,inVns:ARR,inVns:ARR,index]
// Yes, including constants :/
Varnode input = op.getInput(i);
gen.requestStaticFieldForVarnode(input).generateLoadCode(gen, rv);
// [this,userop,outVn,inVns:ARR,inVns:ARR,index,inVn]
rv.visitInsn(AASTORE);
// [this,userop,outVn,inVns:ARR]
}
// [this,userop,outVn,inVns:ARR]
rv.visitMethodInsn(INVOKEINTERFACE, NAME_JIT_COMPILED_PASSAGE, "invokeUserop",
MDESC_JIT_COMPILED_PASSAGE__INVOKE_USEROP, true);
transition.generateInv(rv);
}
/**
* Emit code to implement the Direct strategy (see the class documentation)
*
* @param gen the code generator
* @param op the p-code op use-def node
* @param block the block containing the op
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
public static void generateRunCodeUsingDirectStrategy(JitCodeGenerator gen,
JitCallOtherOpIf op, JitBlock block, MethodVisitor rv) {
FieldForUserop useropField = gen.requestFieldForUserop(op.userop());
// Set<Varnode> live = gen.vsm.getLiveVars(block);
/**
* NOTE: It doesn't matter if there are live variables. We still have to "retire" the
* program counter and contextreg if the userop throws an exception.
*/
final Label tryStart = new Label();
final Label tryEnd = new Label();
rv.visitTryCatchBlock(tryStart, tryEnd,
gen.requestExceptionHandler((DecodedPcodeOp) op.op(), block).label(), NAME_THROWABLE);
// []
useropField.generateLoadCode(gen, rv);
// [userop]
rv.visitMethodInsn(INVOKEINTERFACE, NAME_PCODE_USEROP_DEFINITION, "getDefiningLibrary",
MDESC_PCODE_USEROP_DEFINITION__GET_DEFINING_LIBRARY, true);
// [library:PcodeUseropLibrary]
Method method = op.userop().getJavaMethod();
String owningLibName = Type.getInternalName(method.getDeclaringClass());
rv.visitTypeInsn(CHECKCAST, owningLibName);
// [library:OWNING_TYPE]
Parameter[] parameters = method.getParameters();
for (int i = 0; i < op.args().size(); i++) {
JitVal arg = op.args().get(i);
Parameter p = parameters[i];
JitType type = gen.generateValReadCode(arg, JitTypeBehavior.ANY);
if (p.getType() == boolean.class) {
TypeConversions.generateIntToBool(type, rv);
}
else {
TypeConversions.generate(gen, type, JitType.forJavaType(p.getType()), rv);
}
}
// [library,params...]
rv.visitLabel(tryStart);
rv.visitMethodInsn(INVOKEVIRTUAL, owningLibName, method.getName(),
Type.getMethodDescriptor(method), false);
// [return?]
rv.visitLabel(tryEnd);
if (op instanceof JitCallOtherDefOp defOp) {
gen.generateVarWriteCode(defOp.out(), JitType.forJavaType(method.getReturnType()));
}
else if (method.getReturnType() != void.class) {
TypeConversions.generatePop(JitType.forJavaType(method.getReturnType()), rv);
}
}
/**
* Check if the Direct invocation strategy is applicable (see class documentation)
*
* @param op the p-code op use-def node
* @return true if applicable
*/
public static boolean canDoDirectInvocation(JitCallOtherOpIf op) {
if (!op.userop().isFunctional()) {
return false;
}
for (JitTypeBehavior type : op.inputTypes()) {
if (type == JitTypeBehavior.ANY) {
return false;
}
}
if (op instanceof JitCallOtherDefOp defOp) {
if (defOp.type() == JitTypeBehavior.ANY) {
return false;
}
}
return true;
}
@Override
public void generateRunCode(JitCodeGenerator gen, JitCallOtherOpIf op, JitBlock block,
MethodVisitor rv) {
if (canDoDirectInvocation(op)) {
generateRunCodeUsingDirectStrategy(gen, op, block, rv);
}
else {
generateRunCodeUsingRetirementStrategy(gen, op.op(), block, op.userop(), rv);
}
}
}

View file

@ -0,0 +1,43 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitVarScopeModel;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitCatenateOp;
/**
* The generator for a {@link JitCatenateOp catenate}.
*
* <p>
* We emit nothing. This generator ought never to be invoked, anyway, but things may change. The
* argument here is similar to that of {@link PhiOpGen}.
*
* @see JitVarScopeModel
*/
public enum CatenateOpGen implements OpGen<JitCatenateOp> {
/** The generator singleton */
GEN;
@Override
public void generateRunCode(JitCodeGenerator gen, JitCatenateOp op, JitBlock block,
MethodVisitor rv) {
throw new AssertionError("Cannnot generate synthetic op");
}
}

View file

@ -0,0 +1,104 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.GOTO;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.lifecycle.Unfinished;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitFloatTestOp;
/**
* An extension for float comparison operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface CompareFloatOpGen<T extends JitFloatTestOp> extends BinOpGen<T> {
/**
* The JVM opcode to perform the comparison with float operands on the stack.
*
* @return the opcode
*/
int fcmpOpcode();
/**
* The JVM opcode to perform the comparison with double operands on the stack.
*
* @return the opcode
*/
int dcmpOpcode();
/**
* The JVM opcode to perform the conditional jump.
*
* <p>
* The condition should correspond to the true case of the p-code operator.
*
* @return the opcode
*/
int condOpcode();
/**
* {@inheritDoc}
*
* <p>
* This implementation reduces the need to just a few opcodes: 1) the opcode for comparing in
* case of JVM {@code float}, 2) the opcode for comparing in the case of JVM {@code double}, and
* 3) the conditional jump on the result of that comparison. First, the comparison opcode is
* emitted. It should result in and int &lt;0, ==0, or &gt;0 on the stack, depending on whether
* L&lt;R, L==R, or L&gt;R, respectively. Then the conditional jump is emitted. We place labels
* in an if-else pattern to place either a 1 (true) or 0 (false) value of the appropriate p-code
* type on the stack.
*
* @implNote This template is consistently generated by the Java compiler (Adoptium OpenJDK 21),
* despite there being possible branchless implementations. That could indicate one of
* a few things: 1) the HotSpot JIT knows how to optimize this pattern, perhaps using
* branchless native instructions, 2) branchless optimizations don't yield the speedup
* here we might expect, or 3) they didn't care to optimize. <b>TODO</b>: Investigate
* in case it's thing 3. We might like to see if branchless JVM bytecodes can improve
* performance.
*/
@Override
default JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv) {
assert rType == lType;
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
Label lblTrue = new Label();
Label lblDone = new Label();
switch (rType) {
case FloatJitType t -> rv.visitInsn(fcmpOpcode());
case DoubleJitType t -> rv.visitInsn(dcmpOpcode());
case MpFloatJitType t -> Unfinished.TODO("MpFloat");
default -> throw new AssertionError();
}
rv.visitJumpInsn(condOpcode(), lblTrue);
TypeConversions.generateLdcFalse(outType, rv);
rv.visitJumpInsn(GOTO, lblDone);
rv.visitLabel(lblTrue);
TypeConversions.generateLdcTrue(outType, rv);
rv.visitLabel(lblDone);
return outType;
}
}

View file

@ -0,0 +1,143 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodVisitor;
import ghidra.lifecycle.Unfinished;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.gen.tgt.JitCompiledPassage;
import ghidra.pcode.emu.jit.gen.type.TypeConversions;
import ghidra.pcode.emu.jit.op.JitIntTestOp;
/**
* An extension for integer comparison operators
*
* @param <T> the class of p-code op node in the use-def graph
*/
public interface CompareIntBinOpGen<T extends JitIntTestOp> extends BinOpGen<T> {
/**
* Whether the comparison of p-code integers is signed
*
* <p>
* If the comparison is unsigned, we will emit invocations of
* {@link Integer#compareUnsigned(int, int)} or {@link Long#compareUnsigned(long, long)},
* followed by a conditional jump corresponding to this p-code comparison op. If the comparison
* is signed, and the type fits in a JVM int, we emit the conditional jump of ints directly
* implementing this p-code comparison op. If the type requires a JVM long, we first emit an
* {@link #LCMP lcmp}, followed by the same opcode that would be used in the unsigned case.
*
* @return true if signed, false if not
*/
boolean isSigned();
/**
* The JVM opcode to perform the conditional jump for signed integers.
*
* @return the opcode
*/
int icmpOpcode();
/**
* Emits bytecode for the JVM int case
*
* @param lblTrue the target bytecode label for the true case
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
default void generateIntJump(Label lblTrue, MethodVisitor rv) {
if (isSigned()) {
rv.visitJumpInsn(icmpOpcode(), lblTrue);
}
else {
rv.visitMethodInsn(INVOKESTATIC, NAME_INTEGER, "compareUnsigned",
MDESC_INTEGER__COMPARE_UNSIGNED, false);
rv.visitJumpInsn(ifOpcode(), lblTrue);
}
}
/**
* Emits bytecode for the JVM long case
*
* @param lblTrue the target bytecode label for the true case
* @param rv the visitor for the {@link JitCompiledPassage#run(int) run} method
*/
default void generateLongJump(Label lblTrue, MethodVisitor rv) {
if (isSigned()) {
rv.visitInsn(LCMP);
}
else {
rv.visitMethodInsn(INVOKESTATIC, NAME_LONG, "compareUnsigned",
MDESC_LONG__COMPARE_UNSIGNED, false);
}
rv.visitJumpInsn(ifOpcode(), lblTrue);
}
/**
* The JVM opcode to perform the conditional jump for unsigned or long integers.
*
* This is emitted <em>after</em> the application of {@link #LCMP} or the comparator method.
*
* @return the opcode
*/
int ifOpcode();
@Override
default JitType afterLeft(JitCodeGenerator gen, T op, JitType lType, JitType rType,
MethodVisitor rv) {
return TypeConversions.forceUniformZExt(lType, rType, rv);
}
/**
* {@inheritDoc}
*
* <p>
* This reduces the implementation to a flag for signedness, the opcode for the conditional jump
* on integer operands, and the opcode for a conditional jump after the comparison of longs. The
* JVM, does not provide conditional jumps on long operands, so we must first compare the longs,
* pushing an int onto the stack, and then conditionally jumping on that. This pattern is
* similar for unsigned comparison of integers.
*/
@Override
default JitType generateBinOpRunCode(JitCodeGenerator gen, T op, JitBlock block, JitType lType,
JitType rType, MethodVisitor rv) {
Label lblTrue = new Label();
Label lblDone = new Label();
rType = TypeConversions.forceUniformZExt(rType, lType, rv);
switch (rType) {
case IntJitType t -> generateIntJump(lblTrue, rv);
case LongJitType t -> generateLongJump(lblTrue, rv);
case MpIntJitType t -> Unfinished.TODO("MpInt");
default -> throw new AssertionError();
}
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
TypeConversions.generateLdcFalse(outType, rv);
rv.visitJumpInsn(GOTO, lblDone);
rv.visitLabel(lblTrue);
TypeConversions.generateLdcTrue(outType, rv);
rv.visitLabel(lblDone);
return outType;
}
}

View file

@ -0,0 +1,42 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.op.JitCopyOp;
/**
* The generator for a {@link JitCopyOp copy}.
*
* <p>
* This uses the unary operator generator and emits nothing extra. The unary generator template will
* emit code to load the input operand, this emits nothing, and then the template emits code to
* write the output operand, effecting a simple copy.
*/
public enum CopyOpGen implements UnOpGen<JitCopyOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitCopyOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
return uType;
}
}

View file

@ -0,0 +1,54 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static ghidra.pcode.emu.jit.gen.GenConsts.*;
import static org.objectweb.asm.Opcodes.INVOKESTATIC;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatAbsOp;
/**
* The generator for a {@link JitFloatAbsOp float_abs}.
*
* <p>
* This uses the unary operator generator and emits an invocation of {@link Math#abs(float)} or
* {@link Math#abs(double)}, depending on the type.
*/
public enum FloatAbsOpGen implements UnOpGen<JitFloatAbsOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatAbsOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case FloatJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "abs",
MDESC_$FLOAT_UNOP, false);
case DoubleJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "abs",
MDESC_$DOUBLE_UNOP, false);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,53 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.DADD;
import static org.objectweb.asm.Opcodes.FADD;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatAddOp;
/**
* The generator for a {@link JitFloatAddOp float_add}.
*
* <p>
* This uses the binary operator generator and simply emits {@link #FADD} or {@link #DADD} depending
* on the type.
*/
public enum FloatAddOpGen implements BinOpGen<JitFloatAddOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateBinOpRunCode(JitCodeGenerator gen, JitFloatAddOp op, JitBlock block,
JitType lType, JitType rType, MethodVisitor rv) {
assert rType == lType;
switch (lType) {
case FloatJitType t -> rv.visitInsn(FADD);
case DoubleJitType t -> rv.visitInsn(DADD);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return lType;
}
}

View file

@ -0,0 +1,59 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_$DOUBLE_UNOP;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_MATH;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatCeilOp;
/**
* The generator for a {@link JitFloatCeilOp float_ceil}.
*
* <p>
* This uses the unary operator generator and emits an invocation of {@link Math#ceil(double)},
* possibly surrounding it with conversions from and to float.
*/
public enum FloatCeilOpGen implements UnOpGen<JitFloatCeilOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatCeilOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case FloatJitType t -> {
// There apparently is no Math.ceil(float)???
rv.visitInsn(F2D);
rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "ceil", MDESC_$DOUBLE_UNOP, false);
rv.visitInsn(D2F);
}
case DoubleJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "ceil",
MDESC_$DOUBLE_UNOP, false);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,53 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.DDIV;
import static org.objectweb.asm.Opcodes.FDIV;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatDivOp;
/**
* The generator for a {@link JitFloatDivOp float_div}.
*
* <p>
* This uses the binary operator generator and simply emits {@link #FDIV} or {@link #DDIV} depending
* on the type.
*/
public enum FloatDivOpGen implements BinOpGen<JitFloatDivOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateBinOpRunCode(JitCodeGenerator gen, JitFloatDivOp op, JitBlock block,
JitType lType, JitType rType, MethodVisitor rv) {
assert rType == lType;
switch (lType) {
case FloatJitType t -> rv.visitInsn(FDIV);
case DoubleJitType t -> rv.visitInsn(DDIV);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return lType;
}
}

View file

@ -0,0 +1,47 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.*;
import ghidra.pcode.emu.jit.op.JitFloatEqualOp;
/**
* The generator for a {@link JitFloatEqualOp float_equal}.
*
* <p>
* This uses the float comparison operator generator and simply emits {@link #FCMPL} or
* {@link #DCMPL} depending on the type and then {@link #IFEQ}.
*/
public enum FloatEqualOpGen implements CompareFloatOpGen<JitFloatEqualOp> {
/** The generator singleton */
GEN;
@Override
public int fcmpOpcode() {
return FCMPL;
}
@Override
public int dcmpOpcode() {
return DCMPL;
}
@Override
public int condOpcode() {
return IFEQ;
}
}

View file

@ -0,0 +1,66 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.D2F;
import static org.objectweb.asm.Opcodes.F2D;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatFloat2FloatOp;
/**
* The generator for a {@link JitFloatFloat2FloatOp float_float2float}.
*
* <p>
* This uses the unary operator generator and emits {@link #F2D} or {@link #D2F}.
*/
public enum FloatFloat2FloatOpGen implements UnOpGen<JitFloatFloat2FloatOp> {
/** The generator singleton */
GEN;
private JitType gen(MethodVisitor rv, int opcode, JitType type) {
rv.visitInsn(opcode);
return type;
}
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatFloat2FloatOp op,
JitBlock block, JitType uType, MethodVisitor rv) {
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
return switch (uType) {
case FloatJitType ut -> switch (outType) {
case FloatJitType ot -> ot;
case DoubleJitType ot -> gen(rv, F2D, ot);
case MpFloatJitType ot -> TODO("MpFloat");
default -> throw new AssertionError();
};
case DoubleJitType ut -> switch (outType) {
case FloatJitType ot -> gen(rv, D2F, ot);
case DoubleJitType ot -> ot;
case MpFloatJitType ot -> TODO("MpFloat");
default -> throw new AssertionError();
};
case MpFloatJitType ot -> TODO("MpFloat");
default -> throw new AssertionError();
};
}
}

View file

@ -0,0 +1,59 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static ghidra.pcode.emu.jit.gen.GenConsts.MDESC_$DOUBLE_UNOP;
import static ghidra.pcode.emu.jit.gen.GenConsts.NAME_MATH;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatFloorOp;
/**
* The generator for a {@link JitFloatFloorOp float_floor}.
*
* <p>
* This uses the unary operator generator and emits an invocation of {@link Math#floor(double)},
* possibly surrounding it with conversions from and to float.
*/
public enum FloatFloorOpGen implements UnOpGen<JitFloatFloorOp> {
/** The generator singleton */
GEN;
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatFloorOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
switch (uType) {
case FloatJitType t -> {
// There apparently is no Math.floor(float)???
rv.visitInsn(F2D);
rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "floor", MDESC_$DOUBLE_UNOP, false);
rv.visitInsn(D2F);
}
case DoubleJitType t -> rv.visitMethodInsn(INVOKESTATIC, NAME_MATH, "floor",
MDESC_$DOUBLE_UNOP, false);
case MpFloatJitType t -> TODO("MpFloat");
default -> throw new AssertionError();
}
return uType;
}
}

View file

@ -0,0 +1,66 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static ghidra.lifecycle.Unfinished.TODO;
import static org.objectweb.asm.Opcodes.*;
import org.objectweb.asm.MethodVisitor;
import ghidra.pcode.emu.jit.analysis.JitControlFlowModel.JitBlock;
import ghidra.pcode.emu.jit.analysis.JitType;
import ghidra.pcode.emu.jit.analysis.JitType.*;
import ghidra.pcode.emu.jit.gen.JitCodeGenerator;
import ghidra.pcode.emu.jit.op.JitFloatInt2FloatOp;
/**
* The generator for a {@link JitFloatInt2FloatOp float_int2float}.
*
* <p>
* This uses the unary operator generator and emits {@link #I2F}, {@link #I2D}, {@link #L2F}, or
* {@link #L2D}.
*/
public enum FloatInt2FloatOpGen implements UnOpGen<JitFloatInt2FloatOp> {
/** The generator singleton */
GEN;
private JitType gen(MethodVisitor rv, int opcode, JitType type) {
rv.visitInsn(opcode);
return type;
}
@Override
public JitType generateUnOpRunCode(JitCodeGenerator gen, JitFloatInt2FloatOp op, JitBlock block,
JitType uType, MethodVisitor rv) {
JitType outType = op.type().resolve(gen.getTypeModel().typeOf(op.out()));
return switch (uType) {
case IntJitType ut -> switch (outType) {
case FloatJitType ot -> gen(rv, I2F, ot);
case DoubleJitType ot -> gen(rv, I2D, ot);
case MpFloatJitType ot -> TODO("MpInt/Float");
default -> throw new AssertionError();
};
case LongJitType ut -> switch (outType) {
case FloatJitType ot -> gen(rv, L2F, ot);
case DoubleJitType ot -> gen(rv, L2D, ot);
case MpFloatJitType ot -> TODO("MpInt/Float");
default -> throw new AssertionError();
};
case MpIntJitType ut -> TODO("MpInt/Float");
default -> throw new AssertionError();
};
}
}

View file

@ -0,0 +1,47 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.*;
import ghidra.pcode.emu.jit.op.JitFloatLessEqualOp;
/**
* The generator for a {@link JitFloatLessEqualOp float_lessequal}.
*
* <p>
* This uses the float comparison operator generator and simply emits {@link #FCMPG} or
* {@link #DCMPG} depending on the type and then {@link #IFLE}.
*/
public enum FloatLessEqualOpGen implements CompareFloatOpGen<JitFloatLessEqualOp> {
/** The generator singleton */
GEN;
@Override
public int fcmpOpcode() {
return FCMPG;
}
@Override
public int dcmpOpcode() {
return DCMPG;
}
@Override
public int condOpcode() {
return IFLE;
}
}

View file

@ -0,0 +1,47 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.jit.gen.op;
import static org.objectweb.asm.Opcodes.*;
import ghidra.pcode.emu.jit.op.JitFloatLessOp;
/**
* The generator for a {@link JitFloatLessOp float_less}.
*
* <p>
* This uses the float comparison operator generator and simply emits {@link #FCMPG} or
* {@link #DCMPG} depending on the type and then {@link #IFLT}.
*/
public enum FloatLessOpGen implements CompareFloatOpGen<JitFloatLessOp> {
/** The generator singleton */
GEN;
@Override
public int fcmpOpcode() {
return FCMPG;
}
@Override
public int dcmpOpcode() {
return DCMPG;
}
@Override
public int condOpcode() {
return IFLT;
}
}

Some files were not shown because too many files have changed in this diff Show more