From 1ba91ebb51cad80f5d95414f0cc916e66d093335 Mon Sep 17 00:00:00 2001 From: Gary Hayward Date: Tue, 15 Aug 2023 13:24:23 -0400 Subject: [PATCH] GP-3707 ELF RISCV relocation processing improvements --- .../RISCV_ElfRelocationHandler.java | 239 ++++++++++++------ 1 file changed, 167 insertions(+), 72 deletions(-) diff --git a/Ghidra/Processors/RISCV/src/main/java/ghidra/app/util/bin/format/elf/relocation/RISCV_ElfRelocationHandler.java b/Ghidra/Processors/RISCV/src/main/java/ghidra/app/util/bin/format/elf/relocation/RISCV_ElfRelocationHandler.java index 36b5c2b4ca..52c3d57542 100644 --- a/Ghidra/Processors/RISCV/src/main/java/ghidra/app/util/bin/format/elf/relocation/RISCV_ElfRelocationHandler.java +++ b/Ghidra/Processors/RISCV/src/main/java/ghidra/app/util/bin/format/elf/relocation/RISCV_ElfRelocationHandler.java @@ -13,18 +13,38 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package ghidra.app.util.bin.format.elf.relocation; +import java.util.Iterator; + import ghidra.app.util.bin.format.elf.*; -import ghidra.program.model.address.Address; +import ghidra.program.model.address.*; import ghidra.program.model.listing.Program; import ghidra.program.model.mem.Memory; import ghidra.program.model.mem.MemoryAccessException; +import ghidra.program.model.reloc.*; import ghidra.program.model.reloc.Relocation.Status; -import ghidra.program.model.reloc.RelocationResult; import ghidra.util.exception.NotFoundException; +/** + * See https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc for information on the different riscv elf + * relocation types. Different relocation types are found in different contexts - not all of which are currently handled here. + * The contexts we *attempt* to handle include: + * + * 1. fully linked Elf executables like the Linux `cp` utility which rely on dynamic linking to libraries like libc.so.6 + * 2. object files compiled with or without Position Independent code (`-fpic`) support + * 3. Sharable object libraries like `libc.so.6` + * 3. kernel load modules compiled with position independent code (`-fpic`) support + * + * Keep in mind: + * + * 1. You may find multiple relocations at any single address. + * 2. Many relocations and relocation variants are there to support linker/loader optimizations unneeded by Ghidra. + * 3. Some relocations can only name their target indirectly. R_RISCV_PCREL_LO12_I references a R_RISCV_PCREL_HI20 relocation, + * but needs the symbol referenced by that R_RISCV_PCREL_HI20 in order to compute a PC relative offset. + * 4. Many discrete symbols can share the same symbol name, e.g. `.L0`. These symbol names can include non-printing characters like `".L0^B2"` + * + */ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { @Override @@ -32,6 +52,84 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { return elf.e_machine() == ElfConstants.EM_RISCV; } + /** + * Get the adjusted 20 high bits of a 32 bit target. The lower 12 bits will + * be found in a later instruction, using a sign-extended signed addition. If those + * 12 bits will be seen as negative, we need to increment the higher bits by one + * @param target a 32 bit offset + * @return the higher 20 bits possibly incremented by 0x1000 + */ + static int getHi20(int target) { + int target_h = target & 0xfffff000; + // the low order 12 bits are sign-extended before adding, so we may need to adjust the high order 20 bits + if ((target & 0x00000800) == 0x800) { + target_h = target_h + 0x1000; + } + return target_h; + } + + /** + * get the lower 12 bits of a 32 bit target. These will typically be added - not or'ed - to + * a register holding the higher 20 bits. + * @param target a 32 bit offset + * @return the lower 12 bits of target + */ + static int getLo12(int target) { + return (target & 0x00000fff); + } + + /** + * PC relative relocations like R_RISCV_PCREL_LO12_I find their target indirectly, + * using their symbolValue to locate the address of the matching R_RISCV_PCREL_HI20 + * or R_RISCV_GOT_HI20. + * That *HI20 relocation's symbol value points to the actual target. + * This function attempts to locate that actual target by querying Ghidra's Relocation + * and Symbol tables. There can be more than one relocation assigned to a given address, + * so we need to search. + * + * Note that this function probably belongs within ElfRelocationContext, but we have no + * published strategy for integration testing that class. + * + * @return the relocation symbol value associated with the linked relocation type. + */ + static int getSymbolValueIndirect(ElfRelocationContext elfRelocationContext, int hi20Addr) { + + int target; + // Get the relevant Ghidra tables + Program program = elfRelocationContext.getProgram(); + AddressSpace space = program.getAddressFactory().getDefaultAddressSpace(); + RelocationTable relocationTable = program.getRelocationTable(); + + // get the possible address of R_RISCV_PCREL_HI20 relocation holding the target address + Address hi20Address = program.getAddressFactory().getAddress(space.getSpaceID(), hi20Addr); + + // we need an address set with one holding a single address to retrieve multiple relocations + AddressSet relocationAddressSet = new AddressSet(hi20Address); + // get any relocations at this address - hopefully R_RISCV_PCREL_HI20 and likely R_RISCV_RELAX + Iterator relocs = relocationTable.getRelocations(relocationAddressSet); + // search the relocations for a R_RISCV_PCREL_HI20 relocation + while (relocs.hasNext()) { + Relocation rel = relocs.next(); + // there may be other valid relocation types to process + if ((rel.getType() == RISCV_ElfRelocationConstants.R_RISCV_PCREL_HI20) || + (rel.getType() == RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20)) { + int refSymbolIndex = (int) rel.getValues()[0]; +// System.out.println("Matching PCREL_HI20 value is 0x" + Long.toHexString(refSymbolIndex)); + // Note that an elf symbol index is not the same thing as a regular symbol index. + ElfSymbol elfSym = elfRelocationContext.getSymbol(refSymbolIndex); + int targetOffset = (int) elfRelocationContext.getSymbolValue(elfSym); +// System.out.println( +// "PCREL_HI20 Symbol \"" + elfRelocationContext.getSymbolName(refSymbolIndex) + +// "\" found with offset 0x" + Long.toHexString(targetOffset)); + // compute the target offset from the referred auipc instruction + target = targetOffset - hi20Addr; +// System.out.println("PCREL_HI20 Symbol location is 0x" + Long.toHexString(target)); + return target; + } + } + return 0; + } + @Override public RelocationResult relocate(ElfRelocationContext elfRelocationContext, ElfRelocation relocation, @@ -49,7 +147,8 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { return RelocationResult.SKIPPED; } - long addend = relocation.hasAddend() ? relocation.getAddend() : is32 ? memory.getInt(relocationAddress) : memory.getLong(relocationAddress); + long addend = relocation.hasAddend() ? relocation.getAddend() + : is32 ? memory.getInt(relocationAddress) : memory.getLong(relocationAddress); long offset = relocationAddress.getOffset(); long base = elfRelocationContext.getImageBaseWordAdjustmentOffset(); @@ -59,27 +158,11 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { long symbolValue = elfRelocationContext.getSymbolValue(sym); String symbolName = elfRelocationContext.getSymbolName(symbolIndex); - //TODO remove debug - switch(type) { - case 2: - case 3: - case 5: - break; - default: -// System.out.println("DEBUG RISCV: " + -// type + " " + relocationAddress + " " + -// String.format("%x", symbolValue) + " " + -// String.format("%x", addend) + " " + -// String.format("%x", offset) + " " + -// String.format("%x", base));// + " " + - //String.format("%x", memory.getInt(relocationAddress))); - break; - } - long value64 = 0; int value32 = 0; short value16 = 0; byte value8 = 0; + int target = 0; int byteLength = 4; // most relocations affect 4-bytes (change if different) @@ -151,7 +234,7 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { case RISCV_ElfRelocationConstants.R_RISCV_TLS_DTPMOD64: // TLS relocation word64 = S->TLSINDEX - markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTPMOD32", symbolName, + markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTPMOD64", symbolName, symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); return RelocationResult.UNSUPPORTED; @@ -169,7 +252,7 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { case RISCV_ElfRelocationConstants.R_RISCV_TLS_TPREL32: // TLS relocation word32 = TLS + S + A + S_TLS_OFFSET - TLS_DTV_OFFSET - markAsWarning(program, relocationAddress, "R_RISCV_TLS_DTREL32", symbolName, + markAsWarning(program, relocationAddress, "R_RISCV_TLS_TPREL32", symbolName, symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); return RelocationResult.UNSUPPORTED; @@ -181,7 +264,7 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { case RISCV_ElfRelocationConstants.R_RISCV_BRANCH: // PC-relative branch (B-Type) - int target = (int) (addend + symbolValue - offset); + target = (int) (addend + symbolValue - offset); value32 = ((target & 0x01e) << 7) | ((target & 0x0800) >> 4) | ((target & 0x03e0) << 20) | ((target & 0x1000) << 19) | memory.getInt(relocationAddress); @@ -198,27 +281,18 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { break; case RISCV_ElfRelocationConstants.R_RISCV_CALL: - // PC-relative call MACRO call,tail (auipc+jalr pair) - target = (int) (addend + symbolValue - offset); - int target_l = target << 20 >> 20; - int target_h = target - target_l; - value32 = (target_h & 0xfffff000) | memory.getInt(relocationAddress); - memory.setInt(relocationAddress, value32); - value32 = ((target_l & 0x00000fff) << 20) | memory.getInt(relocationAddress.add(4)); - memory.setInt(relocationAddress.add(4), value32); - break; + // PC-relative call (PLT) MACRO call,tail (auipc+jalr pair) PIC + // Identical processing in Ghidra as the following case RISCV_ElfRelocationConstants.R_RISCV_CALL_PLT: - // PC-relative call (PLT) MACRO call,tail (auipc+jalr pair) PIC - markAsWarning(program, relocationAddress, "R_RISCV_CALL_PLT", symbolName, - symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); - return RelocationResult.UNSUPPORTED; - - case RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20: - // PC-relative GOT reference MACRO la - markAsWarning(program, relocationAddress, "R_RISCV_GOT_HI20", symbolName, - symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); - return RelocationResult.UNSUPPORTED; + // PC-relative call MACRO call,tail (auipc+jalr pair) + target = (int) (addend + symbolValue - offset); + memory.setInt(relocationAddress, + getHi20(target) | memory.getInt(relocationAddress)); + memory.setInt(relocationAddress.add(4), + (getLo12(target) << 20) | memory.getInt(relocationAddress.add(4))); + byteLength = 8; + break; case RISCV_ElfRelocationConstants.R_RISCV_TLS_GOT_HI20: // PC-relative TLS IE GOT offset MACRO la.tls.ie @@ -232,23 +306,42 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); return RelocationResult.UNSUPPORTED; + case RISCV_ElfRelocationConstants.R_RISCV_GOT_HI20: + // PC-relative GOT reference MACRO la case RISCV_ElfRelocationConstants.R_RISCV_PCREL_HI20: - // PC-relative reference %pcrel_hi(symbol) (U-Type) - markAsWarning(program, relocationAddress, "R_RISCV_PCREL_HI20", symbolName, - symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); - return RelocationResult.UNSUPPORTED; + // PC-relative, not tested on 32 bit objects + target = (int) (addend + symbolValue - offset); + memory.setInt(relocationAddress, + getHi20(target) | memory.getInt(relocationAddress)); + break; case RISCV_ElfRelocationConstants.R_RISCV_PCREL_LO12_I: - // PC-relative reference %pcrel_lo(symbol) (I-Type) - markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName, - symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); - return RelocationResult.UNSUPPORTED; + // PC-relative reference %pcrel_lo(symbol) (I-Type), relative to the cited pc_rel_hi20 + target = getSymbolValueIndirect(elfRelocationContext, (int) symbolValue); + if (target == 0) { + markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName, + symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); + return RelocationResult.UNSUPPORTED; + } + value32 = ((target & 0x00000fff) << 20) | memory.getInt(relocationAddress); + memory.setInt(relocationAddress, value32); + break; case RISCV_ElfRelocationConstants.R_RISCV_PCREL_LO12_S: // PC-relative reference %pcrel_lo(symbol) (S-Type) - markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_S", symbolName, - symbolIndex, "TODO, needs support ", elfRelocationContext.getLog()); - return RelocationResult.UNSUPPORTED; + // S-type immediates split the 12 bit value into separate 7 bit and 5 bit fields. + // Warning: untested! + target = getSymbolValueIndirect(elfRelocationContext, (int) symbolValue); + if (target == 0) { + markAsWarning(program, relocationAddress, "R_RISCV_PCREL_LO12_I", symbolName, + symbolIndex, + "TODO, needs support ", elfRelocationContext.getLog()); + return RelocationResult.UNSUPPORTED; + } + value32 = ((target & 0x000007f) << 25) | (target & 0x00000f80) | + memory.getInt(relocationAddress); + memory.setInt(relocationAddress, value32); + break; case RISCV_ElfRelocationConstants.R_RISCV_HI20: // Absolute address %hi(symbol) (U-Type) @@ -298,6 +391,8 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { case RISCV_ElfRelocationConstants.R_RISCV_ADD8: // 8-bit label addition word8 = old + S + A + markAsWarning(program, relocationAddress, "R_RISCV_ADD8", symbolName, symbolIndex, + "TODO, needs support ", elfRelocationContext.getLog()); value8 = memory.getByte(relocationAddress); value8 += (byte) symbolValue; value8 += (byte) addend; @@ -384,34 +479,36 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { "TODO, needs support ", elfRelocationContext.getLog()); return RelocationResult.UNSUPPORTED; - case RISCV_ElfRelocationConstants.R_RISCV_RVC_BRANCH: + case RISCV_ElfRelocationConstants.R_RISCV_RVC_BRANCH: { // PC-relative branch offset (CB-Type) - target = (short) (addend + symbolValue - offset); + short target_s = (short) (addend + symbolValue - offset); // 15 13 | 12 11 10|9 7| 6 5 4 3 2|1 0 // C.BEQZ offset[8|4:3] src offset[7:6|2:1|5] C1 - value16 = (short) (((target & 0x100) << 4) | ((target & 0x18) << 7) | - ((target & 0xc0) >> 1) | - ((target & 0x06) << 2) | ((target & 0x20) >> 3) | + value16 = (short) (((target_s & 0x100) << 4) | ((target_s & 0x18) << 7) | + ((target_s & 0xc0) >> 1) | + ((target_s & 0x06) << 2) | ((target_s & 0x20) >> 3) | (memory.getShort(relocationAddress) & 0xe383)); - byteLength = 2; memory.setShort(relocationAddress, value16); + byteLength = 2; break; + } - case RISCV_ElfRelocationConstants.R_RISCV_RVC_JUMP: - // PC-relative jump offset (CJ-Type) - target = (short) (addend + symbolValue - offset); + case RISCV_ElfRelocationConstants.R_RISCV_RVC_JUMP: { + short target_s = (short) (addend + symbolValue - offset); // Complicated swizzling going on here. // For details, see The RISC-V Instruction Set Manual Volume I: Unprivileged ISA // 15 13 | 12 11 10 9 8 7 6 5 3 2|1 0 // C.J offset[11| 4|9:8|10|6|7|3:1|5] C1 - value16 = (short) (((target & 0x800) << 1) | ((target & 0x10) << 7) | - ((target & 0x300) << 1) | - ((target & 0x400) >> 2) | ((target & 0x40) << 1) | ((target & 0x80) >> 1) | - ((target & 0x0e) << 2) | ((target & 0x20) >> 3) | + value16 = (short) (((target_s & 0x800) << 1) | ((target_s & 0x10) << 7) | + ((target_s & 0x300) << 1) | + ((target_s & 0x400) >> 2) | ((target_s & 0x40) << 1) | + ((target_s & 0x80) >> 1) | + ((target_s & 0x0e) << 2) | ((target_s & 0x20) >> 3) | (memory.getShort(relocationAddress) & 0xe003)); - byteLength = 2; memory.setShort(relocationAddress, value16); + byteLength = 2; break; + } case RISCV_ElfRelocationConstants.R_RISCV_RVC_LUI: // Absolute address (CI-Type) @@ -444,10 +541,8 @@ public class RISCV_ElfRelocationHandler extends ElfRelocationHandler { return RelocationResult.UNSUPPORTED; case RISCV_ElfRelocationConstants.R_RISCV_RELAX: - // Instruction pair can be relaxed - markAsWarning(program, relocationAddress, "R_RISCV_RELAX", symbolName, symbolIndex, - "TODO, needs support ", elfRelocationContext.getLog()); - return RelocationResult.UNSUPPORTED; + // Instruction pair can be relaxed by the linker/loader- ignore + return RelocationResult.SKIPPED; case RISCV_ElfRelocationConstants.R_RISCV_SUB6: // Local label subtraction