From 0e3e3cccbfbaeb2cb88f727a7a9417abdf9a102d Mon Sep 17 00:00:00 2001 From: Ryan Kurtz Date: Wed, 9 Aug 2023 10:50:32 -0400 Subject: [PATCH] GP-3728: Libraries extracted from the dyld_shared_cache filesystem now contain local symbol information, which reduces the occurrence of "" primary symbols --- .../util/bin/format/macho/commands/NList.java | 18 +++++++++ .../macho/commands/SymbolTableCommand.java | 22 +++++++---- .../format/macho/dyld/DyldCacheHeader.java | 3 +- .../dyld/DyldCacheLocalSymbolsEntry.java | 37 ++++++++++++++++--- .../macho/dyld/DyldCacheLocalSymbolsInfo.java | 14 +++++-- .../app/util/opinion/MachoProgramBuilder.java | 16 ++++++-- .../dyldcache/DyldCacheDylibExtractor.java | 36 ++++++++++++++++-- .../ios/dyldcache/DyldCacheFileSystem.java | 2 +- 8 files changed, 121 insertions(+), 27 deletions(-) diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/NList.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/NList.java index 5d3e8c45db..ef4dc3c0fd 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/NList.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/NList.java @@ -16,6 +16,7 @@ package ghidra.app.util.bin.format.macho.commands; import java.io.IOException; +import java.util.List; import ghidra.app.util.bin.BinaryReader; import ghidra.app.util.bin.StructConverter; @@ -204,4 +205,21 @@ public class NList implements StructConverter { public String toString() { return string; } + + /** + * Gets the size in bytes of the given {@link NList}s (including associated strings) + * + * @param nlists A {@link List} of {@link NList}s + * @return The size in bytes of the given {@link NList}s (including associated strings) + */ + public static int getSize(List nlists) { + if (!nlists.isEmpty()) { + int totalStringSize = 0; + for (NList nlist : nlists) { + totalStringSize += nlist.getString().length() + 1; // Add 1 for null terminator + } + return nlists.size() * nlists.get(0).getSize() + totalStringSize; + } + return 0; + } } diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/SymbolTableCommand.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/SymbolTableCommand.java index 46cb9f3c08..2f013c3ead 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/SymbolTableCommand.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/commands/SymbolTableCommand.java @@ -123,6 +123,19 @@ public class SymbolTableCommand extends LoadCommand { return symbols; } + /** + * Adds the given {@link List} of {@link NList}s to this symbol/string table, and adjusts the + * affected symbol table load command fields appropriately + * + * @param list The {@link List} of {@link NList}s to add + */ + public void addSymbols(List list) { + symbols.addAll(list); + nsyms += list.size(); + stroff += list.size() * list.get(0).getSize(); + strsize = symbols.stream().mapToInt(e -> e.getString().length() + 1).sum(); + } + public NList getSymbolAt(int index) { if ((index & DynamicSymbolTableConstants.INDIRECT_SYMBOL_LOCAL) != 0 || (index & DynamicSymbolTableConstants.INDIRECT_SYMBOL_ABS) != 0) { @@ -159,14 +172,7 @@ public class SymbolTableCommand extends LoadCommand { @Override public int getLinkerDataSize() { - if (!symbols.isEmpty()) { - int totalStringSize = 0; - for (NList nlist : symbols) { - totalStringSize += nlist.getString().length() + 1; // Add 1 for null terminator - } - return nsyms * symbols.get(0).getSize() + totalStringSize; - } - return 0; + return NList.getSize(symbols); } @Override diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheHeader.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheHeader.java index 54bcd10f57..6f434a4581 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheHeader.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheHeader.java @@ -777,7 +777,8 @@ public class DyldCacheHeader implements StructConverter { monitor.initialize(1); try { reader.setPointerIndex(localSymbolsOffset); - localSymbolsInfo = new DyldCacheLocalSymbolsInfo(reader, architecture); + boolean use64bitOffsets = imagesOffsetOld == 0; + localSymbolsInfo = new DyldCacheLocalSymbolsInfo(reader, architecture, use64bitOffsets); localSymbolsInfo.parse(log, monitor); monitor.incrementProgress(1); } diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsEntry.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsEntry.java index bfefa8dd19..24f584b59a 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsEntry.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsEntry.java @@ -28,29 +28,56 @@ import ghidra.util.exception.DuplicateNameException; * * @see dyld_cache_format.h */ -@SuppressWarnings("unused") public class DyldCacheLocalSymbolsEntry implements StructConverter { - private int dylibOffset; + private long dylibOffset; private int nlistStartIndex; private int nlistCount; + private boolean use64bitOffsets; + /** * Create a new {@link DyldCacheLocalSymbolsEntry}. * * @param reader A {@link BinaryReader} positioned at the start of a DYLD local symbols entry + * @param use64bitOffsets True if the DYLD local symbol entries use 64-bit dylib offsets; false + * if they use 32-bit * @throws IOException if there was an IO-related problem creating the DYLD local symbols entry */ - public DyldCacheLocalSymbolsEntry(BinaryReader reader) throws IOException { - dylibOffset = reader.readNextInt(); + public DyldCacheLocalSymbolsEntry(BinaryReader reader, boolean use64bitOffsets) + throws IOException { + this.use64bitOffsets = use64bitOffsets; + + dylibOffset = use64bitOffsets ? reader.readNextLong() : reader.readNextInt(); nlistStartIndex = reader.readNextInt(); nlistCount = reader.readNextInt(); } + /** + * {@return The dylib offset} + */ + public long getDylibOffset() { + return dylibOffset; + } + + /** + * {@return The nlist start index} + */ + public int getNListStartIndex() { + return nlistStartIndex; + } + + /** + * {@return The nlist count} + */ + public int getNListCount() { + return nlistCount; + } + @Override public DataType toDataType() throws DuplicateNameException, IOException { StructureDataType struct = new StructureDataType("dyld_cache_local_symbols_entry", 0); - struct.add(DWORD, "dylibOffset", ""); + struct.add(use64bitOffsets ? QWORD : DWORD, "dylibOffset", ""); struct.add(DWORD, "nlistStartIndex", ""); struct.add(DWORD, "nlistCount", ""); struct.setCategoryPath(new CategoryPath(MachConstants.DATA_TYPE_CATEGORY)); diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsInfo.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsInfo.java index 4a230f1b96..807f3be7d0 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsInfo.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/macho/dyld/DyldCacheLocalSymbolsInfo.java @@ -56,16 +56,19 @@ public class DyldCacheLocalSymbolsInfo implements StructConverter { private List nlistList; private List localSymbolsEntryList; private boolean is32bit; + private boolean use64bitOffsets; /** * Create a new {@link DyldCacheLocalSymbolsInfo}. * * @param reader A {@link BinaryReader} positioned at the start of a DYLD local symbols info * @param architecture The {@link DyldArchitecture} + * @param use64bitOffsets True if the DYLD local symbol entries use 64-bit dylib offsets; false + * if they use 32-bit * @throws IOException if there was an IO-related problem creating the DYLD local symbols info */ - public DyldCacheLocalSymbolsInfo(BinaryReader reader, DyldArchitecture architecture) - throws IOException { + public DyldCacheLocalSymbolsInfo(BinaryReader reader, DyldArchitecture architecture, + boolean use64bitOffsets) throws IOException { this.reader = reader; this.startIndex = reader.getPointerIndex(); @@ -81,6 +84,8 @@ public class DyldCacheLocalSymbolsInfo implements StructConverter { is32bit = !(architecture.getCpuType() == CpuTypes.CPU_TYPE_ARM_64 || architecture.getCpuType() == CpuTypes.CPU_TYPE_X86_64); + + this.use64bitOffsets = use64bitOffsets; } /** @@ -175,13 +180,14 @@ public class DyldCacheLocalSymbolsInfo implements StructConverter { } } - private void parseLocalSymbols(MessageLog log, TaskMonitor monitor) throws CancelledException { + private void parseLocalSymbols(MessageLog log, TaskMonitor monitor) + throws CancelledException { monitor.setMessage("Parsing DYLD local symbol entries..."); monitor.initialize(entriesCount); reader.setPointerIndex(startIndex + entriesOffset); try { for (int i = 0; i < entriesCount; ++i) { - localSymbolsEntryList.add(new DyldCacheLocalSymbolsEntry(reader)); + localSymbolsEntryList.add(new DyldCacheLocalSymbolsEntry(reader, use64bitOffsets)); monitor.checkCancelled(); monitor.incrementProgress(1); } diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/MachoProgramBuilder.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/MachoProgramBuilder.java index 97fe494f43..7408285259 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/MachoProgramBuilder.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/MachoProgramBuilder.java @@ -500,6 +500,7 @@ public class MachoProgramBuilder { protected void processSymbolTables(MachHeader header, boolean processExports) throws Exception { monitor.setMessage("Processing symbol tables..."); + SymbolTable symbolTable = program.getSymbolTable(); List commands = header.getLoadCommands(SymbolTableCommand.class); for (SymbolTableCommand symbolTableCommand : commands) { List symbols = symbolTableCommand.getSymbols(); @@ -533,7 +534,7 @@ public class MachoProgramBuilder { } if (processExports && symbol.isExternal()) { - program.getSymbolTable().addExternalEntryPoint(addr); + symbolTable.addExternalEntryPoint(addr); } String string = symbol.getString(); @@ -546,14 +547,19 @@ public class MachoProgramBuilder { markAsThumb(addr); } - if (program.getSymbolTable().getGlobalSymbol(string, addr) != null) { + if (symbolTable.getGlobalSymbol(string, addr) != null) { continue; } try { if (!symbol.isExternal() || processExports) { - program.getSymbolTable().createLabel(addr, string, SourceType.IMPORTED); + Symbol primary = symbolTable.getPrimarySymbol(addr); + Symbol newSymbol = + symbolTable.createLabel(addr, string, SourceType.IMPORTED); + if (primary != null && primary.getName().equals("")) { + newSymbol.setPrimary(); + } if (symbol.isExternal()) { - program.getSymbolTable().addExternalEntryPoint(addr); + symbolTable.addExternalEntryPoint(addr); } } } @@ -569,6 +575,8 @@ public class MachoProgramBuilder { // from a dyld_shared_cache. If the Mach-O is fully-formed and contains binding information // (found in the DyldChainedFixupsCommand or DyldInfoCommand), thunk analysis properly // associates indirect symbols with their "real" symbol and we shouldn't do anything here. + // We hope to one day include binding information in our extracted dylibs, at which point + // this method can fully go away. if (machoHeader.getFirstLoadCommand(DyldChainedFixupsCommand.class) != null || machoHeader.getFirstLoadCommand(DyldInfoCommand.class) != null) { return; diff --git a/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheDylibExtractor.java b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheDylibExtractor.java index b64ecf2aff..caec7980a8 100644 --- a/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheDylibExtractor.java +++ b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheDylibExtractor.java @@ -152,6 +152,9 @@ public class DyldCacheDylibExtractor { // the DYLIB we are extracting, resulting in a significantly smaller file. if (segment == linkEditSegment) { for (LoadCommand cmd : machoHeader.getLoadCommands()) { + if (cmd instanceof SymbolTableCommand symbolTable) { + symbolTable.addSymbols(getLocalSymbols(splitDyldCache)); + } int offset = cmd.getLinkerDataOffset(); int size = cmd.getLinkerDataSize(); if (offset == 0 || size == 0) { @@ -217,6 +220,31 @@ public class DyldCacheDylibExtractor { return new ByteArrayProvider(packed, fsrl); } + /** + * Gets a {@link List} of local {@link NList symbol}s + * + * @param splitDyldCache The {@link SplitDyldCache} + * @return A {@link List} of local {@link NList symbol}s (could be empty) + */ + private List getLocalSymbols(SplitDyldCache splitDyldCache) { + long base = splitDyldCache.getDyldCacheHeader(0).getBaseAddress(); + for (int i = 0; i < splitDyldCache.size(); i++) { + DyldCacheHeader header = splitDyldCache.getDyldCacheHeader(i); + DyldCacheLocalSymbolsInfo info = header.getLocalSymbolsInfo(); + if (info == null) { + continue; + } + for (DyldCacheLocalSymbolsEntry entry : info.getLocalSymbolsEntries()) { + int index = entry.getNListStartIndex(); + int count = entry.getNListCount(); + if (base + entry.getDylibOffset() == textSegment.getVMaddress() && count > 0) { + return info.getNList().subList(index, index + count); + } + } + } + return List.of(); + } + /** * Creates a packed __LINKEDIT segment array * @@ -233,16 +261,14 @@ public class DyldCacheDylibExtractor { for (LoadCommand cmd : packedLinkEditDataStarts.keySet()) { if (cmd instanceof SymbolTableCommand symbolTable && symbolTable.getNumberOfSymbols() > 0) { - byte[] packedSymbolStringTable = new byte[cmd.getLinkerDataSize()]; List symbols = symbolTable.getSymbols(); + byte[] packedSymbolStringTable = new byte[NList.getSize(symbols)]; int nlistIndex = 0; int stringIndex = symbols.get(0).getSize() * symbols.size(); int stringIndexOrig = stringIndex; for (NList nlist : symbols) { - byte[] nlistArray = nlistToArray(nlist, stringIndex); + byte[] nlistArray = nlistToArray(nlist, stringIndex - stringIndexOrig); byte[] stringArray = nlist.getString().getBytes(StandardCharsets.US_ASCII); - System.arraycopy(toBytes(stringIndex - stringIndexOrig, 4), 0, nlistArray, - 0, 4); System.arraycopy(nlistArray, 0, packedSymbolStringTable, nlistIndex, nlistArray.length); System.arraycopy(stringArray, 0, packedSymbolStringTable, stringIndex, @@ -394,11 +420,13 @@ public class DyldCacheDylibExtractor { if (cmd.getSymbolOffset() > 0) { long symbolOffset = fixup(cmd.getStartIndex() + 0x8, getLinkEditAdjustment(cmd), 4, linkEditSegment); + set(cmd.getStartIndex() + 0xc, cmd.getNumberOfSymbols(), 4); if (cmd.getStringTableOffset() > 0) { if (cmd.getNumberOfSymbols() > 0) { set(cmd.getStartIndex() + 0x10, symbolOffset + cmd.getNumberOfSymbols() * cmd.getSymbolAt(0).getSize(), 4); + set(cmd.getStartIndex() + 0x14, cmd.getStringTableSize(), 4); } else { set(cmd.getStartIndex() + 0x10, symbolOffset, 4); diff --git a/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheFileSystem.java b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheFileSystem.java index a7f1d7395e..b2cfc90afa 100644 --- a/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheFileSystem.java +++ b/Ghidra/Features/FileFormats/src/main/java/ghidra/file/formats/ios/dyldcache/DyldCacheFileSystem.java @@ -120,7 +120,7 @@ public class DyldCacheFileSystem extends GFileSystemBase { MessageLog log = new MessageLog(); monitor.setMessage("Opening DYLD cache..."); - splitDyldCache = new SplitDyldCache(provider, false, log, monitor); + splitDyldCache = new SplitDyldCache(provider, true, log, monitor); for (int i = 0; i < splitDyldCache.size(); i++) { DyldCacheHeader header = splitDyldCache.getDyldCacheHeader(i); monitor.setMessage("Find files...");