/* ### * IP: GHIDRA * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Calculate similarity/significance scores between executables by // combining their function scores. //@category BSim import java.io.IOException; import java.net.URL; import java.security.InvalidParameterException; import ghidra.app.script.GhidraScript; import ghidra.features.bsim.query.BSimClientFactory; import ghidra.features.bsim.query.FunctionDatabase; import ghidra.features.bsim.query.client.*; import ghidra.features.bsim.query.description.ExecutableRecord; import ghidra.features.bsim.query.protocol.QueryExeInfo; import ghidra.features.bsim.query.protocol.ResponseExe; /** * An example script using {@link ExecutableComparison} to compare executables within a BSim database. * The user provides the URL of the database and the name of an executable within the database that * will be compared against every other executable. * * Executables are considered similar if they share similar functions, as determined by the BSim similarity metric. * Functions that are too common (high hitcount) or are too small (low self signficance) are not included * in the score. A score of 1.0 means that all functions included in the score are shared between the two * executables and each have a (function) similarity of 1.0. * * The script also computes a "library" score, which achieves 1.0 if the functions in the smaller of the two * executables all have a perfect match in the bigger executable. The bigger executable may have many functions * with no match in the smaller executable. * * For larger databases, repeated runs of this script can be made more efficient by allowing it to cache executable * "self-scores" between runs. Uncomment one of two lines instantiating the {@link ScoreCaching} object below. The * cache can be stored in the local file system or in an additional table/column in the BSim database. */ public class CompareExecutablesScript extends GhidraScript { private ExecutableComparison exeCompare; @Override protected void run() throws Exception { String urlString = askString("Enter BSim database URL", "URL: "); String execName = askString("Enter name of executable to compare against database", "Name: "); URL url = BSimClientFactory.deriveBSimURL(urlString); try (FunctionDatabase database = BSimClientFactory.buildClient(url, true)) { QueryExeInfo exeInfo = new QueryExeInfo(); exeInfo.filterExeName = execName; ResponseExe exeResult = exeInfo.execute(database); if (exeResult == null) { String message = database.getLastError() != null ? database.getLastError().message : "Unrecoverable error"; throw new IOException(message); } else if (exeResult.recordCount == 0) { throw new InvalidParameterException( "Executable " + execName + " is not present in database"); } else if (exeResult.recordCount > 1) { println("Multiple executables with the name - " + execName); ExecutableRecord exeRecord = exeResult.records.get(0); print("Using "); println(exeRecord.printRaw()); } String baseMd5 = exeResult.records.get(0).getMd5(); ScoreCaching cache = null; // If null, self scores will not be cached // Scores can be cached in the local file system by using FileScoreCaching // cache = new FileScoreCaching("/tmp/test_scorecacher.txt"); // Scores can be cached in a dedicated table within the database by using TableScoreCaching // TableScoreCaching is currently only supported for the PostgreSQL back-end. // cache = new TableScoreCaching(database); exeCompare = new ExecutableComparison(database, 1000000, baseMd5, cache, monitor); // Its possible to specify the executables to compare with the base executable by // specifying their md5 hashes directly. // exeCompare.addExecutable("22222222222222222222222222222222"); // 32 hex-digit string // exeCompare.addExecutable("33333333333333333333333333333333"); // Otherwise specify that we should compare the base executable against all executables exeCompare.addAllExecutables(5000); ExecutableScorer scorer = exeCompare.getScorer(); if (!exeCompare.isConfigured()) { exeCompare.resetThresholds(0.7, 10.0); } exeCompare.fillinSelfScores(); // Prefetch self-scores, calculate any we are missing exeCompare.performScoring(); scorer.commitSelfScore(); // Commit the newly calculated self-score println("Maximum cluster size = " + Integer.toString(exeCompare.getMaxHitCount())); println("Hit count exceeded = " + Integer.toString(exeCompare.getExceedCount())); float scoreThresh = 0.01f; int numExe = scorer.numExecutables(); ExecutableRecord exeA = scorer.getSingularExecutable(); float selfScoreA = scorer.getSingularSelfScore(); for (int i = 1; i <= numExe; ++i) { ExecutableRecord exeB = scorer.getExecutable(i); float selfScoreB = scorer.getScore(i); if (selfScoreB == 0.0f) { // This is possible if the executable has no "rare" functions. continue; // as defined by the ExecutableComparison.hitCountThreshold } ExecutableRecord smallRecord = selfScoreA < selfScoreB ? exeA : exeB; ExecutableRecord bigRecord = selfScoreA < selfScoreB ? exeB : exeA; float libScore = scorer.getNormalizedScore(i, true); float totalScore = scorer.getNormalizedScore(i, false); if (libScore < scoreThresh) { continue; } println(smallRecord.getNameExec() + " " + bigRecord.getNameExec()); println(" " + Float.toString(libScore) + " library score"); println(" " + Float.toString(totalScore) + " total score"); } } } }