GP-5871 add analyzer option to set charset used by DWARF importer

This commit is contained in:
dev747368 2025-07-25 18:32:34 +00:00
parent 1724227c31
commit 3aa00a125b
10 changed files with 85 additions and 30 deletions

View file

@ -659,7 +659,7 @@ public class BinaryReader {
* @return the string * @return the string
* @exception IOException if an I/O error occurs * @exception IOException if an I/O error occurs
*/ */
private String readNextString(Charset charset, int charLen) throws IOException { public String readNextString(Charset charset, int charLen) throws IOException {
byte[] bytes = readUntilNullTerm(currentIndex, charLen); byte[] bytes = readUntilNullTerm(currentIndex, charLen);
currentIndex += bytes.length + charLen; currentIndex += bytes.length + charLen;
@ -681,7 +681,7 @@ public class BinaryReader {
* @return the string * @return the string
* @exception IOException if an I/O error occurs * @exception IOException if an I/O error occurs
*/ */
private String readNextString(int charCount, Charset charset, int charLen) throws IOException { public String readNextString(int charCount, Charset charset, int charLen) throws IOException {
if (charCount < 0) { if (charCount < 0) {
throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount)); throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount));
} }
@ -946,7 +946,7 @@ public class BinaryReader {
* @return the string * @return the string
* @exception IOException if an I/O error occurs * @exception IOException if an I/O error occurs
*/ */
private String readString(long index, int charCount, Charset charset, int charLen) public String readString(long index, int charCount, Charset charset, int charLen)
throws IOException { throws IOException {
if (charCount < 0) { if (charCount < 0) {
throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount)); throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount));
@ -968,7 +968,7 @@ public class BinaryReader {
* @return the string * @return the string
* @exception IOException if an I/O error occurs * @exception IOException if an I/O error occurs
*/ */
private String readString(long index, Charset charset, int charLen) throws IOException { public String readString(long index, Charset charset, int charLen) throws IOException {
byte[] bytes = readUntilNullTerm(index, charLen); byte[] bytes = readUntilNullTerm(index, charLen);
String result = new String(bytes, charset); String result = new String(bytes, charset);

View file

@ -15,6 +15,8 @@
*/ */
package ghidra.app.util.bin.format.dwarf; package ghidra.app.util.bin.format.dwarf;
import java.nio.charset.Charset;
import ghidra.app.plugin.core.analysis.AnalysisOptionsUpdater; import ghidra.app.plugin.core.analysis.AnalysisOptionsUpdater;
import ghidra.app.plugin.core.analysis.DWARFAnalyzer; import ghidra.app.plugin.core.analysis.DWARFAnalyzer;
import ghidra.app.services.Analyzer; import ghidra.app.services.Analyzer;
@ -83,11 +85,16 @@ public class DWARFImportOptions {
"Maximum length for a source map entry. Longer lengths will be replaced with 0"; "Maximum length for a source map entry. Longer lengths will be replaced with 0";
private static final String OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS = private static final String OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS =
"Copy external debug file symbols"; "Copy External Debug File Symbols";
private static final String OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS_DESC = private static final String OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS_DESC =
"Copies symbols (which will typically be mangled) from a found external debug file into " + "Copies symbols (which will typically be mangled) from a found external debug file into " +
"the main program"; "the main program";
private static final String OPTION_CHARSET_NAME = "Debug Strings Charset";
private static final String OPTION_CHARSET_NAME_DESC = """
Charset to use when decoding debug strings (symbols, filenames, etc).
Default is utf-8. Typical values will be 'ascii' or 'utf-8'.""";
//================================================================================================== //==================================================================================================
// Old Option Names - Should stick around for multiple major versions after 10.2 // Old Option Names - Should stick around for multiple major versions after 10.2
//================================================================================================== //==================================================================================================
@ -126,6 +133,7 @@ public class DWARFImportOptions {
private String defaultCC = ""; private String defaultCC = "";
private long maxSourceMapEntryLength = 2000; private long maxSourceMapEntryLength = 2000;
private boolean copyExternalDebugFileSymbols = true; private boolean copyExternalDebugFileSymbols = true;
private String charsetName = "";
/** /**
* Create new instance * Create new instance
@ -453,6 +461,25 @@ public class DWARFImportOptions {
copyExternalDebugFileSymbols = b; copyExternalDebugFileSymbols = b;
} }
public Charset getCharset(Charset defaultCharset) {
try {
return charsetName != null && !charsetName.isBlank()
? Charset.forName(charsetName)
: defaultCharset;
}
catch (Throwable th) {
return defaultCharset;
}
}
public String getCharsetName() {
return charsetName;
}
public void setCharsetName(String charsetName) {
this.charsetName = charsetName;
}
/** /**
* See {@link Analyzer#registerOptions(Options, ghidra.program.model.listing.Program)} * See {@link Analyzer#registerOptions(Options, ghidra.program.model.listing.Program)}
* *
@ -498,6 +525,9 @@ public class DWARFImportOptions {
options.registerOption(OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS, options.registerOption(OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS,
isCopyExternalDebugFileSymbols(), null, OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS_DESC); isCopyExternalDebugFileSymbols(), null, OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS_DESC);
options.registerOption(OPTION_CHARSET_NAME, getCharsetName(), null,
OPTION_CHARSET_NAME_DESC);
} }
/** /**
@ -529,6 +559,6 @@ public class DWARFImportOptions {
options.getLong(OPTION_MAX_SOURCE_ENTRY_LENGTH, getMaxSourceMapEntryLength())); options.getLong(OPTION_MAX_SOURCE_ENTRY_LENGTH, getMaxSourceMapEntryLength()));
setCopyExternalDebugFileSymbols(options.getBoolean(OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS, setCopyExternalDebugFileSymbols(options.getBoolean(OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS,
isCopyExternalDebugFileSymbols())); isCopyExternalDebugFileSymbols()));
setCharsetName(options.getString(OPTION_CHARSET_NAME, getCharsetName()));
} }
} }

View file

@ -19,6 +19,8 @@ import static ghidra.app.util.bin.format.dwarf.DWARFTag.*;
import static ghidra.app.util.bin.format.dwarf.attribs.DWARFAttribute.*; import static ghidra.app.util.bin.format.dwarf.attribs.DWARFAttribute.*;
import java.io.*; import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.*;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -129,6 +131,7 @@ public class DWARFProgram implements Closeable {
private DWARFSectionProvider sectionProvider; private DWARFSectionProvider sectionProvider;
private StringTable debugStrings; private StringTable debugStrings;
private StringTable lineStrings; private StringTable lineStrings;
private Charset charset;
private int totalAggregateCount; private int totalAggregateCount;
private long programBaseAddressFixup; private long programBaseAddressFixup;
@ -255,10 +258,11 @@ public class DWARFProgram implements Closeable {
this.locationListTable = this.locationListTable =
new DWARFIndirectTable(this.debugLocLists, DWARFCompilationUnit::getLocListsBase); new DWARFIndirectTable(this.debugLocLists, DWARFCompilationUnit::getLocListsBase);
this.charset = importOptions.getCharset(StandardCharsets.UTF_8);
this.debugStrings = this.debugStrings =
StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_STR, monitor)); StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_STR, monitor), charset);
this.lineStrings = this.lineStrings =
StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_LINE_STR, monitor)); StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_LINE_STR, monitor), charset);
// if there are relocations (already handled by the ghidra loader) anywhere in the // if there are relocations (already handled by the ghidra loader) anywhere in the
// debuginfo or debugrange sections, then we don't need to manually fix up addresses // debuginfo or debugrange sections, then we don't need to manually fix up addresses
@ -1039,6 +1043,13 @@ public class DWARFProgram implements Closeable {
return getAggregate(die); return getAggregate(die);
} }
/**
* {@return charset to use when decoding debug strings}
*/
public Charset getCharset() {
return charset;
}
/** /**
* Returns a DWARF attribute string value, as specified by a form, offset/index, and the cu. * Returns a DWARF attribute string value, as specified by a form, offset/index, and the cu.
* *

View file

@ -16,6 +16,7 @@
package ghidra.app.util.bin.format.dwarf; package ghidra.app.util.bin.format.dwarf;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset;
import ghidra.app.util.bin.BinaryReader; import ghidra.app.util.bin.BinaryReader;
import ghidra.util.datastruct.WeakValueHashMap; import ghidra.util.datastruct.WeakValueHashMap;
@ -33,23 +34,26 @@ public class StringTable {
* @param reader BinaryReader * @param reader BinaryReader
* @return new instance, or null if reader is null * @return new instance, or null if reader is null
*/ */
public static StringTable of(BinaryReader reader) { public static StringTable of(BinaryReader reader, Charset charset) {
if (reader == null) { if (reader == null) {
return null; return null;
} }
return new StringTable(reader); return new StringTable(reader, charset);
} }
protected BinaryReader reader; protected BinaryReader reader;
protected WeakValueHashMap<Long, String> cache = new WeakValueHashMap<>(); protected WeakValueHashMap<Long, String> cache = new WeakValueHashMap<>();
private Charset charset;
/** /**
* Creates a StringTable * Creates a StringTable
* *
* @param reader {@link BinaryReader} .debug_str or .debug_line_str * @param reader {@link BinaryReader} .debug_str or .debug_line_str
* @param charset {@link Charset} of strings
*/ */
public StringTable(BinaryReader reader) { public StringTable(BinaryReader reader, Charset charset) {
this.reader = reader; this.reader = reader;
this.charset = charset;
} }
/** /**
@ -82,7 +86,7 @@ public class StringTable {
String s = cache.get(offset); String s = cache.get(offset);
if (s == null) { if (s == null) {
s = reader.readUtf8String(offset); s = reader.readString(offset, charset, 1);
cache.put(offset, s); cache.put(offset, s);
} }

View file

@ -79,13 +79,14 @@ public enum DWARFForm {
@Override @Override
public long getSize(DWARFFormContext context) throws IOException { public long getSize(DWARFFormContext context) throws IOException {
long start = context.reader().getPointerIndex(); long start = context.reader().getPointerIndex();
context.reader().readNextUtf8String(); context.reader().readNextString(context.dprog().getCharset(), 1);
return context.reader().getPointerIndex() - start; return context.reader().getPointerIndex() - start;
} }
@Override @Override
public DWARFAttributeValue readValue(DWARFFormContext context) throws IOException { public DWARFAttributeValue readValue(DWARFFormContext context) throws IOException {
return new DWARFStringAttribute(context.reader().readNextUtf8String(), context.def()); String s = context.reader().readNextString(context.dprog().getCharset(), 1);
return new DWARFStringAttribute(s, context.def());
} }
}, },
DW_FORM_block(0x9, DWARFForm.DYNAMIC_SIZE, block) { DW_FORM_block(0x9, DWARFForm.DYNAMIC_SIZE, block) {

View file

@ -16,6 +16,7 @@
package ghidra.app.util.bin.format.dwarf.line; package ghidra.app.util.bin.format.dwarf.line;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List; import java.util.List;
import ghidra.app.util.bin.BinaryReader; import ghidra.app.util.bin.BinaryReader;
@ -32,11 +33,14 @@ public class DWARFFile {
* Reads a DWARFFile entry. * Reads a DWARFFile entry.
* *
* @param reader BinaryReader * @param reader BinaryReader
* @param cu {@link DWARFCompilationUnit}
* @return new DWARFFile, or null if end-of-list was found * @return new DWARFFile, or null if end-of-list was found
* @throws IOException if error reading * @throws IOException if error reading
*/ */
public static DWARFFile readV4(BinaryReader reader) throws IOException { public static DWARFFile readV4(BinaryReader reader, DWARFCompilationUnit cu)
String name = reader.readNextAsciiString(); throws IOException {
Charset charset = cu.getProgram().getCharset();
String name = reader.readNextString(charset, 1);
if (name.length() == 0) { if (name.length() == 0) {
// empty name == end-of-list of files // empty name == end-of-list of files
return null; return null;

View file

@ -16,6 +16,7 @@
package ghidra.app.util.bin.format.dwarf.line; package ghidra.app.util.bin.format.dwarf.line;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -106,19 +107,21 @@ public class DWARFLine {
} }
result.directories.add(new DWARFFile(defaultCompDir)); result.directories.add(new DWARFFile(defaultCompDir));
Charset charset = cu.getProgram().getCharset();
// Read all include directories, which are only a list of names in v4 // Read all include directories, which are only a list of names in v4
String dirName = reader.readNextAsciiString(); String dirName = reader.readNextString(charset, 1);
while (dirName.length() != 0) { while (dirName.length() != 0) {
DWARFFile dir = new DWARFFile(dirName); DWARFFile dir = new DWARFFile(dirName);
dir = fixupDir(dir, defaultCompDir); dir = fixupDir(dir, defaultCompDir);
result.directories.add(dir); result.directories.add(dir);
dirName = reader.readNextAsciiString(); dirName = reader.readNextString(charset, 1);
} }
// Read all files, ending when null (hit empty filename) // Read all files, ending when null (hit empty filename)
DWARFFile file; DWARFFile file;
while ((file = DWARFFile.readV4(reader)) != null) { while ((file = DWARFFile.readV4(reader, cu)) != null) {
result.files.add(file); result.files.add(file);
} }
} }

View file

@ -166,7 +166,7 @@ public final class DWARFLineProgramExecutor implements Closeable {
case DW_LNE_define_file: { case DW_LNE_define_file: {
// this instruction is deprecated in v5+, and not fully supported in this // this instruction is deprecated in v5+, and not fully supported in this
// impl // impl
String sourceFilename = reader.readNextUtf8String(); String sourceFilename = reader.readNextUtf8String(); // TODO: this is not used, but to be 100% should use dwarfprog's charset
int dirIndex = reader.readNextUnsignedVarIntExact(LEB128::unsigned); int dirIndex = reader.readNextUnsignedVarIntExact(LEB128::unsigned);
long lastMod = reader.readNext(LEB128::unsigned); long lastMod = reader.readNext(LEB128::unsigned);
long fileLen = reader.readNext(LEB128::unsigned); long fileLen = reader.readNext(LEB128::unsigned);

View file

@ -23,7 +23,8 @@ import ghidra.app.util.bin.*;
public class MockStringTable extends StringTable { public class MockStringTable extends StringTable {
public MockStringTable(BinaryReader reader) { public MockStringTable(BinaryReader reader) {
super(new BinaryReader(new ByteArrayProvider(new byte[4 * 1024]), true /* LE */)); super(new BinaryReader(new ByteArrayProvider(new byte[4 * 1024]), true /* LE */),
StandardCharsets.UTF_8);
} }
public void add(int index, String s) throws IOException { public void add(int index, String s) throws IOException {

View file

@ -18,6 +18,7 @@ package ghidra.app.util.bin.format.dwarf;
import static org.junit.Assert.*; import static org.junit.Assert.*;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.junit.Test; import org.junit.Test;
@ -44,7 +45,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0 /* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
); );
// @formatter:on // @formatter:on
StringTable st = new StringTable(br); StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
assertEquals("ab", st.getStringAtOffset(0)); assertEquals("ab", st.getStringAtOffset(0));
assertEquals("c", st.getStringAtOffset(3)); assertEquals("c", st.getStringAtOffset(3));
@ -62,7 +63,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0 /* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
); );
// @formatter:on // @formatter:on
StringTable st = new StringTable(br); StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
assertEquals("ab", st.getStringAtOffset(0)); assertEquals("ab", st.getStringAtOffset(0));
assertEquals("b", st.getStringAtOffset(1)); assertEquals("b", st.getStringAtOffset(1));
@ -79,7 +80,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0 /* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
); );
// @formatter:on // @formatter:on
StringTable st = new StringTable(br); StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
try { try {
st.getStringAtOffset(9); st.getStringAtOffset(9);
@ -99,7 +100,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0 /* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
); );
// @formatter:on // @formatter:on
StringTable st = new StringTable(br); StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
try { try {
st.getStringAtOffset(-2); st.getStringAtOffset(-2);
@ -113,7 +114,7 @@ public class StringTableTest extends AbstractGenericTest {
@Test @Test
public void testEmptyStrTable() { public void testEmptyStrTable() {
BinaryReader br = br(); BinaryReader br = br();
StringTable st = new StringTable(br); StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
try { try {
st.getStringAtOffset(0); st.getStringAtOffset(0);