Fix assembling instructions with unknown/don't care context bits

Without this change, if unspecified context bits are provided to the assembler they are defaulted to 0 and the resulting context is used to filter for valid assembly instructions. After this change unspecified bits are kept as unspecified through the assembly process possibly providing more valid assembly results.
This commit is contained in:
Peter Lucia 2024-11-15 12:26:13 -05:00 committed by Dan
parent f5354381cf
commit 69292c546f
9 changed files with 499 additions and 15 deletions

View file

@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicLong;
import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong;
import ghidra.app.plugin.assembler.sleigh.expr.SolverException;
import ghidra.app.plugin.assembler.sleigh.util.AsmUtil;
import ghidra.app.plugin.processors.sleigh.ContextCommit;
import ghidra.app.plugin.processors.sleigh.ContextOp;
import ghidra.app.plugin.processors.sleigh.expression.ContextField;
import ghidra.app.plugin.processors.sleigh.expression.TokenField;
@ -403,6 +404,60 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
return new AssemblyPatternBlock(newOffset, newMask, newVals);
}
/**
* Combine this pattern block with another given block
*
* <p>
* The two blocks are combined regardless if their corresponding defined bits agree. When blocks
* are combined, their bytes are aligned according to their shifts, and the defined bits are
* taken from either block. If neither block defines a bit (i.e., the mask bit at that position
* is 0 for both input blocks), then the output has an undefined bit in the corresponding
* position. If both blocks define the bit, but they have opposite values, then the value from
* <code>that</code> takes precedence.
*
* @see RegisterValue#combineValues(RegisterValue)
*
* @param that the other block
* @return the new combined block
*/
public AssemblyPatternBlock assign(AssemblyPatternBlock that) {
int newOffset = Math.min(this.offset, that.offset);
int bufLen = Math.max(this.length(), that.length()) - newOffset;
byte[] newMask = new byte[bufLen];
byte[] newVals = new byte[bufLen];
int diff = this.offset - newOffset;
for (int i = 0; i < this.mask.length; i++) {
newMask[diff + i] = this.mask[i];
newVals[diff + i] = this.vals[i];
}
diff = that.offset - newOffset;
for (int i = 0; i < that.mask.length; i++) {
byte mask = that.mask[i];
byte clearMask = (byte) ~mask;
newMask[diff + i] |= mask;
newVals[diff + i] = (byte) ((that.vals[i] & mask) | (newVals[diff + i] & clearMask));
}
return new AssemblyPatternBlock(newOffset, newMask, newVals);
}
/**
* Invert the mask bits of this pattern block
*
* @return a copy of this pattern block with mask bits inverted
*/
public AssemblyPatternBlock invertMask() {
int maskLen = this.mask.length;
byte[] newMask = new byte[maskLen];
for (int i = 0; i < maskLen; i++) {
newMask[i] = (byte) ~this.mask[i];
}
return new AssemblyPatternBlock(this.offset, newMask, this.vals);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@ -575,6 +630,30 @@ public class AssemblyPatternBlock implements Comparable<AssemblyPatternBlock> {
return MaskedLong.fromMaskAndValue(rmsk >>> cop.getShift(), rval >>> cop.getShift());
}
/**
* Write mask bits from context commit to mask array of block
*
* @implNote This is used when scraping for valid input contexts to determine which context variables
* are passed to the <code>globalset</code> directive.
*
* @param cc the context commit
* @return the result
*/
public AssemblyPatternBlock writeContextCommitMask(ContextCommit cc) {
byte[] newMask = Arrays.copyOf(this.mask, this.mask.length);
int idx = cc.getWordIndex();
int imsk = cc.getMask();
for (int i = 3; i >= 0; i--) {
int index = idx * 4 + i - this.offset;
if (index < newMask.length && index >= 0) {
newMask[index] |= imsk;
}
imsk >>= 8;
}
return new AssemblyPatternBlock(this.offset, newMask, this.vals);
}
/**
* Set all bits read by a given context operation to unknown
*

View file

@ -37,6 +37,14 @@ public interface AssemblyResolvedPatterns extends AssemblyResolution {
*/
AssemblyPatternBlock getContext();
/**
* Create a copy of this resolution with a new context
*
* @param ctx the new context
* @return the copy
*/
AssemblyResolvedPatterns withContext(AssemblyPatternBlock ctx);
/**
* Get the length of the instruction encoding
*

View file

@ -560,6 +560,25 @@ public class DefaultAssemblyResolvedPatterns extends AbstractAssemblyResolution
return ctx;
}
protected AbstractAssemblyResolvedPatternsBuilder<?> withContextBuilder(
AssemblyPatternBlock ctx) {
var builder = factory.newPatternsBuilder();
builder.description = description;
builder.cons = cons;
builder.children = children;
builder.right = right;
builder.ins = ins;
builder.ctx = ctx;
builder.backfills = backfills;
builder.forbids = forbids;
return builder;
}
@Override
public AssemblyResolvedPatterns withContext(AssemblyPatternBlock ctx) {
return withContextBuilder(ctx).build();
}
@Override
public MaskedLong readInstruction(int start, int len) {
return ins.readBytes(start, len);

View file

@ -0,0 +1,134 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.plugin.languages.sleigh;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyDefaultContext;
import ghidra.app.plugin.assembler.sleigh.sem.AssemblyPatternBlock;
import ghidra.app.plugin.processors.sleigh.Constructor;
import ghidra.app.plugin.processors.sleigh.ContextChange;
import ghidra.app.plugin.processors.sleigh.ContextCommit;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern;
import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol;
/**
* A class for scraping input contexts from a SLEIGH language to get all of the valid input contexts
* that affect constructor selection
*
*/
public class InputContextScraper {
private final SleighLanguage language;
public InputContextScraper(SleighLanguage language) {
this.language = language;
}
/**
* Get set of all valid input contexts that affect constructor selection.
*
* <ol>
* <li>Start with mask of the language's default context
* <li>Scrape language for <code>globalset</code> context variables and OR their masks into our
* mask
* <li>Flip bits of our mask to get mask of context variables not used as input
* (local/transient)
* <li>Check constructor constraints and use mask to get values of relevant input context
* variables
* </ol>
*/
public Set<AssemblyPatternBlock> scrapeInputContexts() {
// We don't care about the actual default values, just if a context variable HAS a default
// value. It's possible for a local context variable to be set in the default context, but
// doing so is questionable. It could be an input context variable in that case, so to
// account for it, we start with the default context mask. Doing so ensures those variables
// are included
AssemblyPatternBlock defaultCtx = new AssemblyDefaultContext(language).getDefault();
// Erase the values for posterity; we don't care about them at this point
Arrays.fill(defaultCtx.getVals(), (byte) 0);
GlobalSetScraper globalSetScraper = new GlobalSetScraper(defaultCtx);
SleighLanguages.traverseConstructors(language, globalSetScraper);
AssemblyPatternBlock nonInputCtxMask = globalSetScraper.getContextMask().invertMask();
ConstraintScraper constraintScraper =
new ConstraintScraper(nonInputCtxMask, language.getContextBaseRegister().getNumBytes());
SleighLanguages.traverseConstructors(language, constraintScraper);
return constraintScraper.getInputContexts();
}
private static class GlobalSetScraper implements ConstructorEntryVisitor {
private AssemblyPatternBlock contextMask;
GlobalSetScraper(AssemblyPatternBlock contextMask) {
this.contextMask = contextMask;
}
public AssemblyPatternBlock getContextMask() {
return contextMask;
}
@Override
public int visit(SubtableSymbol subtable, DisjointPattern pattern, Constructor cons) {
for (ContextChange chg : cons.getContextChanges()) {
if (chg instanceof ContextCommit cc) {
contextMask = contextMask.writeContextCommitMask(cc);
}
}
return CONTINUE;
}
}
private static class ConstraintScraper implements ConstructorEntryVisitor {
private final AssemblyPatternBlock nonInputMask;
private final AssemblyPatternBlock blankContext;
private final Set<AssemblyPatternBlock> inputContexts;
ConstraintScraper(AssemblyPatternBlock mask, int contextRegLen) {
nonInputMask = mask;
blankContext = AssemblyPatternBlock.fromLength(contextRegLen);
inputContexts = new HashSet<>();
}
public Set<AssemblyPatternBlock> getInputContexts() {
return inputContexts;
}
@Override
public int visit(SubtableSymbol subtable, DisjointPattern pattern, Constructor cons) {
AssemblyPatternBlock contextConstraint =
AssemblyPatternBlock.fromPattern(pattern, pattern.getLength(true), true);
if (contextConstraint.getMask().length > 0) {
// Combine constraint with blank context to ensure generated context has no shifts
AssemblyPatternBlock inputCtx =
blankContext.combine(contextConstraint).maskOut(nonInputMask);
// Filter out entirely undefined context
if (inputCtx.getSpecificity() > 0) {
inputContexts.add(inputCtx);
}
}
return CONTINUE;
}
}
}

View file

@ -51,4 +51,11 @@ public class ContextCommit implements ContextChange {
decoder.closeElement(el);
}
public int getWordIndex() {
return num;
}
public int getMask() {
return mask;
}
}