From c53e21813da51cea5a21ac878c0e5b622f2be0ef Mon Sep 17 00:00:00 2001 From: ghizard <50744617+ghizard@users.noreply.github.com> Date: Mon, 21 Aug 2023 08:17:41 -0400 Subject: [PATCH] GP-3714 - PDB perf: investigate random access of type records --- .../format/pdb2/pdbreader/AbstractPdb.java | 14 ++--- .../util/bin/format/pdb2/pdbreader/TPI.java | 7 +++ .../pdb2/pdbreader/TypeProgramInterface.java | 62 +++++++++++++++++-- 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/AbstractPdb.java b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/AbstractPdb.java index 2fa6dbe237..b62901398d 100644 --- a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/AbstractPdb.java +++ b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/AbstractPdb.java @@ -169,8 +169,7 @@ public abstract class AbstractPdb implements AutoCloseable { * @throws PdbException upon error in processing components * @throws CancelledException upon user cancellation */ - public void deserialize() - throws IOException, PdbException, CancelledException { + public void deserialize() throws IOException, PdbException, CancelledException { // msf should only be null for testing versions of PDB. if (msf == null) { return; @@ -330,6 +329,8 @@ public abstract class AbstractPdb implements AutoCloseable { recordNumber = fixupTypeIndex(recordNumber, typeClass); AbstractMsType msType = getTPI(recordNumber.getCategory()).getRecord(recordNumber.getNumber()); +// AbstractMsType msType = +// getTPI(recordNumber.getCategory()).getRandomAccessRecord(recordNumber.getNumber()); if (!typeClass.isInstance(msType)) { if (!recordNumber.isNoType()) { PdbLog.logGetTypeClassMismatch(msType, typeClass); @@ -494,8 +495,7 @@ public abstract class AbstractPdb implements AutoCloseable { * @throws PdbException upon error in processing components * @throws CancelledException upon user cancellation */ - void deserializeSubstreams() - throws IOException, PdbException, CancelledException { + void deserializeSubstreams() throws IOException, PdbException, CancelledException { if (substreamsDeserialized) { return; @@ -594,8 +594,7 @@ public abstract class AbstractPdb implements AutoCloseable { * @throws PdbException upon error parsing a field * @throws CancelledException upon user cancellation */ - abstract void deserializeDirectory() - throws IOException, PdbException, CancelledException; + abstract void deserializeDirectory() throws IOException, PdbException, CancelledException; /** * Dumps the PDB Directory to {@link Writer}. This package-protected method is for @@ -616,8 +615,7 @@ public abstract class AbstractPdb implements AutoCloseable { * inability to read required bytes * @throws CancelledException upon user cancellation */ - protected PdbByteReader getDirectoryReader() - throws IOException, CancelledException { + protected PdbByteReader getDirectoryReader() throws IOException, CancelledException { return getReaderForStreamNumber(PDB_DIRECTORY_STREAM_NUMBER, 0, MsfStream.MAX_STREAM_LENGTH); } diff --git a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TPI.java b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TPI.java index 02d0ce0c23..a40145fade 100644 --- a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TPI.java +++ b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TPI.java @@ -23,4 +23,11 @@ public interface TPI { int getTypeIndexMaxExclusive(); AbstractMsType getRecord(int recordNumber); + + /** + * Random access of {@link AbstractMsType} record indicated by {@code recordNumber} + * @param recordNumber record number of the record to retrieve + * @return {@link AbstractMsType} pertaining to the record number + */ + AbstractMsType getRandomAccessRecord(int recordNumber); } diff --git a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TypeProgramInterface.java b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TypeProgramInterface.java index 64ae0db649..e773b97127 100644 --- a/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TypeProgramInterface.java +++ b/Ghidra/Features/PDB/src/main/java/ghidra/app/util/bin/format/pdb2/pdbreader/TypeProgramInterface.java @@ -59,6 +59,10 @@ public abstract class TypeProgramInterface implements TPI { protected int versionNumber = 0; + private record OffLen(int offset, int length) {}; // record type for quick random access + + private List offLenRecords; + //============================================================================================== // API //============================================================================================== @@ -68,8 +72,7 @@ public abstract class TypeProgramInterface implements TPI { * @param recordCategory the RecordCategory of these records * @param streamNumber the stream number that contains the {@link TypeProgramInterface} data */ - public TypeProgramInterface(AbstractPdb pdb, RecordCategory recordCategory, - int streamNumber) { + public TypeProgramInterface(AbstractPdb pdb, RecordCategory recordCategory, int streamNumber) { Objects.requireNonNull(pdb, "pdb cannot be null"); this.pdb = pdb; this.recordCategory = recordCategory; @@ -142,6 +145,40 @@ public abstract class TypeProgramInterface implements TPI { return typeList.get(recordNumber - typeIndexMin); } + @Override + public AbstractMsType getRandomAccessRecord(int recordNumber) { + if (recordNumber < 0 || recordNumber - typeIndexMin > typeList.size()) { + // This should not happen, but we have seen it and cannot yet explain it. + // So, for now, we are creating and returning a new BadMsType. + PdbLog.logBadTypeRecordIndex(this, recordNumber); + BadMsType type = new BadMsType(pdb, 0); + type.setRecordNumber(RecordNumber.make(recordCategory, recordNumber)); + return type; + } + if (recordNumber < typeIndexMin) { + PrimitiveMsType primitive = primitiveTypesByRecordNumber.get(recordNumber); + if (primitive == null) { + primitive = new PrimitiveMsType(pdb, recordNumber); + primitiveTypesByRecordNumber.put(recordNumber, primitive); + } + return primitive; + } + + RecordNumber rn = RecordNumber.make(recordCategory, recordNumber); + OffLen offLen = offLenRecords.get(recordNumber - typeIndexMin); + + try { + PdbByteReader recordReader = + pdb.getReaderForStreamNumber(streamNumber, offLen.offset(), offLen.length()); + return TypeParser.parseRecord(pdb, recordReader, rn); + } + catch (PdbException | IOException | CancelledException e) { + BadMsType badType = new BadMsType(pdb, 0); + badType.setRecordNumber(RecordNumber.make(recordCategory, recordNumber)); + return badType; + } + } + //============================================================================================== // Package-Protected Internals //============================================================================================== @@ -165,7 +202,9 @@ public abstract class TypeProgramInterface implements TPI { // Commented out, but this is currently where we might put this method. See the note // placed within the method (deserializeHashStreams()) for more information about why // we have this commented out. - //hash.deserializeHashStreams(monitor); + //hash.deserializeHashStreams(pdb.getMonitor()); + + createOffLenRecords(reader); deserializeTypeRecords(reader); @@ -254,6 +293,20 @@ public abstract class TypeProgramInterface implements TPI { //============================================================================================== // Internal Data Methods //============================================================================================== + private void createOffLenRecords(PdbByteReader reader) throws PdbException, CancelledException { + int savedIndex = reader.getIndex(); + offLenRecords = new ArrayList<>(); + while (reader.hasMore()) { + pdb.checkCancelled(); + int recordLength = reader.parseUnsignedShortVal(); + // reading offset after parsing length so we have correct offset to read from later + int offset = reader.getIndex(); + reader.skip(recordLength); + offLenRecords.add(new OffLen(offset, recordLength)); + } + reader.setIndex(savedIndex); // restore reader to original state + } + /** * Deserializes the Type Records of this class * @param reader {@link PdbByteReader} from which to deserialize the data @@ -418,8 +471,7 @@ public abstract class TypeProgramInterface implements TPI { if (hashStreamNumberAuxiliary == 0xffff) { return; } - PdbByteReader readerAuxiliary = - pdb.getReaderForStreamNumber(hashStreamNumberAuxiliary); + PdbByteReader readerAuxiliary = pdb.getReaderForStreamNumber(hashStreamNumberAuxiliary); //readerAuxiliary.dump(); }