GT-3578_FIDPerfIssues resize the cache for better fid performance on

larger programs
This commit is contained in:
emteere 2020-03-12 14:42:41 -04:00 committed by caheckman
parent bdbbca30a1
commit 69dd762e4b
2 changed files with 80 additions and 35 deletions

View file

@ -0,0 +1,55 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.feature.fid.service;
import generic.cache.Factory;
import ghidra.feature.fid.hash.FidHashQuad;
import ghidra.program.model.address.Address;
import ghidra.program.model.listing.Function;
import ghidra.util.datastruct.LRUMap;
/**
* The caching factory for FID function hashes. Greatly speeds up processing by memoizing hash
* valuses for functions which are used repeatedly in different contexts.
*
* NOTE: The function is passed to the factory to create and cache the hash, however the
* function hashes are keyed by the entry point of the function.
*/
public class FIDFixedSizeMRUCachingFactory implements Factory<Function, FidHashQuad> {
private LRUMap<Address, FidHashQuad> cache;
private Factory<Function, FidHashQuad> delegate;
public FIDFixedSizeMRUCachingFactory(Factory<Function, FidHashQuad> factory, int size) {
this.delegate = factory;
this.cache = new LRUMap<Address, FidHashQuad>(size);
}
@Override
public FidHashQuad get(Function func) {
// Use the entry point of the function as the key, instead of the function
Address entryPoint = func.getEntryPoint();
FidHashQuad value = cache.get(entryPoint);
if (value != null) {
return value;
}
value = delegate.get(func);
cache.put(entryPoint, value);
return value;
}
}

View file

@ -15,30 +15,17 @@
*/
package ghidra.feature.fid.service;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.*;
import generic.cache.FixedSizeMRUCachingFactory;
import ghidra.feature.fid.db.FidQueryService;
import ghidra.feature.fid.db.FunctionRecord;
import ghidra.feature.fid.db.LibraryRecord;
import ghidra.feature.fid.db.*;
import ghidra.feature.fid.hash.FidHashQuad;
import ghidra.feature.fid.hash.FidHasher;
import ghidra.feature.fid.plugin.HashLookupListMode;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressIterator;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.FunctionIterator;
import ghidra.program.model.listing.FunctionManager;
import ghidra.program.model.listing.Program;
import ghidra.program.model.listing.*;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.symbol.Reference;
import ghidra.program.model.symbol.ReferenceIterator;
import ghidra.program.model.symbol.ReferenceManager;
import ghidra.program.model.symbol.*;
import ghidra.util.Msg;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
@ -61,13 +48,13 @@ public class FidProgramSeeker {
public final int MAX_NUM_PARENTS_FOR_SCORE = 500; // Limit number of (useless) parent (caller) functions
private final int CACHE_SIZE = 10000; // Maximum number of FidQuadHash cached
private final int CACHE_SIZE = 120000; // Maximum number of FidQuadHash cached
private final float scoreThreshold; // Code unit score a function must achieve to be considered a match
private final int mediumHashCodeUnitLengthLimit;
private final FidQueryService fidQueryService;
private final Program program;
private final FixedSizeMRUCachingFactory<Function, FidHashQuad> cacheFactory;
private final FIDFixedSizeMRUCachingFactory cacheFactory;
/**
* Creates a seek object.
@ -78,24 +65,24 @@ public class FidProgramSeeker {
* @param mediumHashCodeUnitLengthLimit the medium hash size
*/
public FidProgramSeeker(FidQueryService fidQueryService, Program program, FidHasher hasher,
byte shortHashCodeUnitLength, byte mediumHashCodeUnitLengthLimit, float scoreThreshold) {
byte shortHashCodeUnitLength, byte mediumHashCodeUnitLengthLimit,
float scoreThreshold) {
this.fidQueryService = fidQueryService;
this.program = program;
this.scoreThreshold = scoreThreshold;
this.mediumHashCodeUnitLengthLimit = mediumHashCodeUnitLengthLimit;
FidHasherFactory factory = new FidHasherFactory(hasher);
this.cacheFactory =
new FixedSizeMRUCachingFactory<Function, FidHashQuad>(factory, CACHE_SIZE);
this.cacheFactory = new FIDFixedSizeMRUCachingFactory(factory, CACHE_SIZE);
}
public static ArrayList<Function> getChildren(Function function,boolean followThunks) {
public static ArrayList<Function> getChildren(Function function, boolean followThunks) {
Program program = function.getProgram();
FunctionManager functionManager = program.getFunctionManager();
ReferenceManager referenceManager = program.getReferenceManager();
HashSet<Address> alreadyDone = new HashSet<Address>();
ArrayList<Function> funcList = new ArrayList<Function>();
AddressIterator referenceIterator =
referenceManager.getReferenceSourceIterator(function.getBody(), true);
referenceManager.getReferenceSourceIterator(function.getBody(), true);
for (Address address : referenceIterator) {
// monitor.checkCanceled();
Reference[] referencesFrom = referenceManager.getReferencesFrom(address);
@ -149,7 +136,7 @@ public class FidProgramSeeker {
}
}
public static ArrayList<Function> getParents(Function function,boolean followThunks) {
public static ArrayList<Function> getParents(Function function, boolean followThunks) {
Program program = function.getProgram();
FunctionManager functionManager = program.getFunctionManager();
ReferenceManager referenceManager = program.getReferenceManager();
@ -160,11 +147,12 @@ public class FidProgramSeeker {
Address[] thunkAddresses = null;
if (followThunks) {
thunkAddresses = function.getFunctionThunkAddresses();
if (thunkAddresses != null)
if (thunkAddresses != null) {
size = thunkAddresses.length;
}
}
int pos = -1;
for(;;) {
for (;;) {
ReferenceIterator referenceIterator = referenceManager.getReferencesTo(curAddr);
for (Reference reference : referenceIterator) {
// monitor.checkCanceled();
@ -181,7 +169,9 @@ public class FidProgramSeeker {
}
}
pos += 1;
if (pos >= size) break;
if (pos >= size) {
break;
}
curAddr = thunkAddresses[pos];
}
@ -216,8 +206,8 @@ public class FidProgramSeeker {
* @return the FidSearchResult describing any discovered matches
* @throws CancelledException if the user cancels
*/
private FidSearchResult processMatches(Function function, HashFamily family, TaskMonitor monitor)
throws CancelledException {
private FidSearchResult processMatches(Function function, HashFamily family,
TaskMonitor monitor) throws CancelledException {
List<HashMatch> hashMatches = lookupFamily(family, monitor);
FidSearchResult searchResult = null;
if (!hashMatches.isEmpty()) {
@ -264,10 +254,9 @@ public class FidProgramSeeker {
ArrayList<FidMatch> fidMatches = new ArrayList<FidMatch>();
for (FidMatchScore hashMatch : culledHashMatches) {
monitor.checkCanceled();
FidMatch match =
new FidMatchImpl(
fidQueryService.getLibraryForFunction(hashMatch.getFunctionRecord()),
function.getEntryPoint(), hashMatch);
FidMatch match = new FidMatchImpl(
fidQueryService.getLibraryForFunction(hashMatch.getFunctionRecord()),
function.getEntryPoint(), hashMatch);
fidMatches.add(match);
}
return new FidSearchResult(function, family.getHash(), fidMatches);
@ -365,8 +354,9 @@ public class FidProgramSeeker {
functionScore += 0.67 * specificCodeUnits; // Each specific constant count is worth 2/3 of a whole code unit
if (functionRecord.isForceRelation()) {
// If both auto-pass and force-relation are on, do not ding score if some children match
if (!functionRecord.autoPass() || (childCodeUnits == 0))
if (!functionRecord.autoPass() || (childCodeUnits == 0)) {
functionScore = 0;
}
}
float childScore = childCodeUnits;
float parentScore = parentCodeUnits;