GP-5871 add analyzer option to set charset used by DWARF importer

This commit is contained in:
dev747368 2025-07-25 18:32:34 +00:00
parent 1724227c31
commit 3aa00a125b
10 changed files with 85 additions and 30 deletions

View file

@ -659,7 +659,7 @@ public class BinaryReader {
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readNextString(Charset charset, int charLen) throws IOException {
public String readNextString(Charset charset, int charLen) throws IOException {
byte[] bytes = readUntilNullTerm(currentIndex, charLen);
currentIndex += bytes.length + charLen;
@ -681,7 +681,7 @@ public class BinaryReader {
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readNextString(int charCount, Charset charset, int charLen) throws IOException {
public String readNextString(int charCount, Charset charset, int charLen) throws IOException {
if (charCount < 0) {
throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount));
}
@ -946,7 +946,7 @@ public class BinaryReader {
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readString(long index, int charCount, Charset charset, int charLen)
public String readString(long index, int charCount, Charset charset, int charLen)
throws IOException {
if (charCount < 0) {
throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount));
@ -968,7 +968,7 @@ public class BinaryReader {
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readString(long index, Charset charset, int charLen) throws IOException {
public String readString(long index, Charset charset, int charLen) throws IOException {
byte[] bytes = readUntilNullTerm(index, charLen);
String result = new String(bytes, charset);

View file

@ -15,6 +15,8 @@
*/
package ghidra.app.util.bin.format.dwarf;
import java.nio.charset.Charset;
import ghidra.app.plugin.core.analysis.AnalysisOptionsUpdater;
import ghidra.app.plugin.core.analysis.DWARFAnalyzer;
import ghidra.app.services.Analyzer;
@ -83,11 +85,16 @@ public class DWARFImportOptions {
"Maximum length for a source map entry. Longer lengths will be replaced with 0";
private static final String OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS =
"Copy external debug file symbols";
"Copy External Debug File Symbols";
private static final String OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS_DESC =
"Copies symbols (which will typically be mangled) from a found external debug file into " +
"the main program";
private static final String OPTION_CHARSET_NAME = "Debug Strings Charset";
private static final String OPTION_CHARSET_NAME_DESC = """
Charset to use when decoding debug strings (symbols, filenames, etc).
Default is utf-8. Typical values will be 'ascii' or 'utf-8'.""";
//==================================================================================================
// Old Option Names - Should stick around for multiple major versions after 10.2
//==================================================================================================
@ -126,6 +133,7 @@ public class DWARFImportOptions {
private String defaultCC = "";
private long maxSourceMapEntryLength = 2000;
private boolean copyExternalDebugFileSymbols = true;
private String charsetName = "";
/**
* Create new instance
@ -453,6 +461,25 @@ public class DWARFImportOptions {
copyExternalDebugFileSymbols = b;
}
public Charset getCharset(Charset defaultCharset) {
try {
return charsetName != null && !charsetName.isBlank()
? Charset.forName(charsetName)
: defaultCharset;
}
catch (Throwable th) {
return defaultCharset;
}
}
public String getCharsetName() {
return charsetName;
}
public void setCharsetName(String charsetName) {
this.charsetName = charsetName;
}
/**
* See {@link Analyzer#registerOptions(Options, ghidra.program.model.listing.Program)}
*
@ -498,6 +525,9 @@ public class DWARFImportOptions {
options.registerOption(OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS,
isCopyExternalDebugFileSymbols(), null, OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS_DESC);
options.registerOption(OPTION_CHARSET_NAME, getCharsetName(), null,
OPTION_CHARSET_NAME_DESC);
}
/**
@ -529,6 +559,6 @@ public class DWARFImportOptions {
options.getLong(OPTION_MAX_SOURCE_ENTRY_LENGTH, getMaxSourceMapEntryLength()));
setCopyExternalDebugFileSymbols(options.getBoolean(OPTION_COPY_EXTERNAL_DEBUG_FILE_SYMBOLS,
isCopyExternalDebugFileSymbols()));
setCharsetName(options.getString(OPTION_CHARSET_NAME, getCharsetName()));
}
}

View file

@ -19,6 +19,8 @@ import static ghidra.app.util.bin.format.dwarf.DWARFTag.*;
import static ghidra.app.util.bin.format.dwarf.attribs.DWARFAttribute.*;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.Map.Entry;
@ -129,6 +131,7 @@ public class DWARFProgram implements Closeable {
private DWARFSectionProvider sectionProvider;
private StringTable debugStrings;
private StringTable lineStrings;
private Charset charset;
private int totalAggregateCount;
private long programBaseAddressFixup;
@ -255,10 +258,11 @@ public class DWARFProgram implements Closeable {
this.locationListTable =
new DWARFIndirectTable(this.debugLocLists, DWARFCompilationUnit::getLocListsBase);
this.charset = importOptions.getCharset(StandardCharsets.UTF_8);
this.debugStrings =
StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_STR, monitor));
StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_STR, monitor), charset);
this.lineStrings =
StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_LINE_STR, monitor));
StringTable.of(getBinaryReaderFor(DWARFSectionNames.DEBUG_LINE_STR, monitor), charset);
// if there are relocations (already handled by the ghidra loader) anywhere in the
// debuginfo or debugrange sections, then we don't need to manually fix up addresses
@ -1039,6 +1043,13 @@ public class DWARFProgram implements Closeable {
return getAggregate(die);
}
/**
* {@return charset to use when decoding debug strings}
*/
public Charset getCharset() {
return charset;
}
/**
* Returns a DWARF attribute string value, as specified by a form, offset/index, and the cu.
*

View file

@ -16,6 +16,7 @@
package ghidra.app.util.bin.format.dwarf;
import java.io.IOException;
import java.nio.charset.Charset;
import ghidra.app.util.bin.BinaryReader;
import ghidra.util.datastruct.WeakValueHashMap;
@ -33,23 +34,26 @@ public class StringTable {
* @param reader BinaryReader
* @return new instance, or null if reader is null
*/
public static StringTable of(BinaryReader reader) {
public static StringTable of(BinaryReader reader, Charset charset) {
if (reader == null) {
return null;
}
return new StringTable(reader);
return new StringTable(reader, charset);
}
protected BinaryReader reader;
protected WeakValueHashMap<Long, String> cache = new WeakValueHashMap<>();
private Charset charset;
/**
* Creates a StringTable
*
* @param reader {@link BinaryReader} .debug_str or .debug_line_str
* @param charset {@link Charset} of strings
*/
public StringTable(BinaryReader reader) {
public StringTable(BinaryReader reader, Charset charset) {
this.reader = reader;
this.charset = charset;
}
/**
@ -82,7 +86,7 @@ public class StringTable {
String s = cache.get(offset);
if (s == null) {
s = reader.readUtf8String(offset);
s = reader.readString(offset, charset, 1);
cache.put(offset, s);
}

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -79,13 +79,14 @@ public enum DWARFForm {
@Override
public long getSize(DWARFFormContext context) throws IOException {
long start = context.reader().getPointerIndex();
context.reader().readNextUtf8String();
context.reader().readNextString(context.dprog().getCharset(), 1);
return context.reader().getPointerIndex() - start;
}
@Override
public DWARFAttributeValue readValue(DWARFFormContext context) throws IOException {
return new DWARFStringAttribute(context.reader().readNextUtf8String(), context.def());
String s = context.reader().readNextString(context.dprog().getCharset(), 1);
return new DWARFStringAttribute(s, context.def());
}
},
DW_FORM_block(0x9, DWARFForm.DYNAMIC_SIZE, block) {

View file

@ -16,6 +16,7 @@
package ghidra.app.util.bin.format.dwarf.line;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.util.bin.BinaryReader;
@ -32,11 +33,14 @@ public class DWARFFile {
* Reads a DWARFFile entry.
*
* @param reader BinaryReader
* @param cu {@link DWARFCompilationUnit}
* @return new DWARFFile, or null if end-of-list was found
* @throws IOException if error reading
*/
public static DWARFFile readV4(BinaryReader reader) throws IOException {
String name = reader.readNextAsciiString();
public static DWARFFile readV4(BinaryReader reader, DWARFCompilationUnit cu)
throws IOException {
Charset charset = cu.getProgram().getCharset();
String name = reader.readNextString(charset, 1);
if (name.length() == 0) {
// empty name == end-of-list of files
return null;

View file

@ -16,6 +16,7 @@
package ghidra.app.util.bin.format.dwarf.line;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
@ -106,19 +107,21 @@ public class DWARFLine {
}
result.directories.add(new DWARFFile(defaultCompDir));
Charset charset = cu.getProgram().getCharset();
// Read all include directories, which are only a list of names in v4
String dirName = reader.readNextAsciiString();
String dirName = reader.readNextString(charset, 1);
while (dirName.length() != 0) {
DWARFFile dir = new DWARFFile(dirName);
dir = fixupDir(dir, defaultCompDir);
result.directories.add(dir);
dirName = reader.readNextAsciiString();
dirName = reader.readNextString(charset, 1);
}
// Read all files, ending when null (hit empty filename)
DWARFFile file;
while ((file = DWARFFile.readV4(reader)) != null) {
while ((file = DWARFFile.readV4(reader, cu)) != null) {
result.files.add(file);
}
}

View file

@ -166,7 +166,7 @@ public final class DWARFLineProgramExecutor implements Closeable {
case DW_LNE_define_file: {
// this instruction is deprecated in v5+, and not fully supported in this
// impl
String sourceFilename = reader.readNextUtf8String();
String sourceFilename = reader.readNextUtf8String(); // TODO: this is not used, but to be 100% should use dwarfprog's charset
int dirIndex = reader.readNextUnsignedVarIntExact(LEB128::unsigned);
long lastMod = reader.readNext(LEB128::unsigned);
long fileLen = reader.readNext(LEB128::unsigned);

View file

@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,7 +23,8 @@ import ghidra.app.util.bin.*;
public class MockStringTable extends StringTable {
public MockStringTable(BinaryReader reader) {
super(new BinaryReader(new ByteArrayProvider(new byte[4 * 1024]), true /* LE */));
super(new BinaryReader(new ByteArrayProvider(new byte[4 * 1024]), true /* LE */),
StandardCharsets.UTF_8);
}
public void add(int index, String s) throws IOException {

View file

@ -18,6 +18,7 @@ package ghidra.app.util.bin.format.dwarf;
import static org.junit.Assert.*;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.junit.Test;
@ -44,7 +45,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
);
// @formatter:on
StringTable st = new StringTable(br);
StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
assertEquals("ab", st.getStringAtOffset(0));
assertEquals("c", st.getStringAtOffset(3));
@ -62,7 +63,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
);
// @formatter:on
StringTable st = new StringTable(br);
StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
assertEquals("ab", st.getStringAtOffset(0));
assertEquals("b", st.getStringAtOffset(1));
@ -79,7 +80,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
);
// @formatter:on
StringTable st = new StringTable(br);
StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
try {
st.getStringAtOffset(9);
@ -99,7 +100,7 @@ public class StringTableTest extends AbstractGenericTest {
/* str3 */ (byte) 'x', (byte) 'y', (byte) '\n', (byte) 0
);
// @formatter:on
StringTable st = new StringTable(br);
StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
try {
st.getStringAtOffset(-2);
@ -113,7 +114,7 @@ public class StringTableTest extends AbstractGenericTest {
@Test
public void testEmptyStrTable() {
BinaryReader br = br();
StringTable st = new StringTable(br);
StringTable st = new StringTable(br, StandardCharsets.US_ASCII);
try {
st.getStringAtOffset(0);