GT-3481 - Gnu Demangler - Checkpoint 2 - Most docs and tests done;

analyzer test remains
This commit is contained in:
dragonmacher 2020-02-12 18:21:10 -05:00
parent b774ecb2d6
commit b4ce3012d7
55 changed files with 854 additions and 1283 deletions

View file

@ -28,7 +28,7 @@ body { margin-bottom: 50px; margin-left: 10px; margin-right: 10px; margin-top: 1
li { font-family:times new roman; font-size:14pt; }
h1 { color:#000080; font-family:times new roman; font-size:36pt; font-style:italic; font-weight:bold; text-align:center; }
h2 { margin: 10px; margin-top: 20px; color:#984c4c; font-family:times new roman; font-size:18pt; font-weight:bold; }
h3 { margin-left: 10px; margin-top: 20px; color:#0000ff; font-family:times new roman; font-size:14pt; font-weight:bold; }
h3 { margin-left: 10px; margin-top: 20px; color:#0000ff; font-family:times new roman; `font-size:14pt; font-weight:bold; }
h4 { margin-left: 10px; margin-top: 20px; font-family:times new roman; font-size:14pt; font-style:italic; }
/*
@ -55,4 +55,10 @@ table { margin-left: 20px; margin-top: 10px; width: 80%;}
td { font-family:times new roman; font-size:14pt; vertical-align: top; }
th { font-family:times new roman; font-size:14pt; font-weight:bold; background-color: #EDF3FE; }
code { color: black; font-family: courier new; font-size: 14pt; }
/*
Code-like formatting for things such as file system paths and proper names of classes,
methods, etc. To apply this to a file path, use this syntax:
<CODE CLASS="path">...</CODE>
*/
code { color: black; font-weight: bold; font-family: courier new, monospace; font-size: 14pt; white-space: nowrap; }
code.path { color: #4682B4; font-weight: bold; font-family: courier new, monospace; font-size: 14pt; white-space: nowrap; }

View file

@ -1,111 +0,0 @@
/* ###
* IP: GHIDRA
* REVIEWED: YES
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.analyzers;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.Program;
import ghidra.util.exception.CancelledException;
import ghidra.xml.XmlPullParser;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.TreeSet;
public class LibHashDB {
private TreeSet<LibraryRecord> libraries;
//Empty Constructor
public LibHashDB() {
this.libraries = new TreeSet<LibraryRecord>();
}
//Construct a DB from the current program, with a record for every function.
public LibHashDB(Program prgm) throws CancelledException {
this.libraries = new TreeSet<LibraryRecord>();
this.libraries.add(new LibraryRecord(prgm));
}
//Merge another DB into this one.
public void mergeWith(LibHashDB toMergeIn) {
this.libraries.addAll(toMergeIn.libraries);
}
//Add a library to the database.
public void addLibrary(LibraryRecord libRec) {
this.libraries.add(libRec);
return;
}
public TreeSet<FuncRecord> getRecords() {
TreeSet<FuncRecord> results = new TreeSet<FuncRecord>();
for (LibraryRecord lib : this.libraries) {
results.addAll(lib.getRecords());
}
return results;
}
//Find an entry of the database based on actual underlying function.
public ArrayList<FuncRecord> query(Function func) throws CancelledException {
FuncRecord queryHash = new FuncRecord(func);
ArrayList<FuncRecord> result = this.query(queryHash.hashValue); //Use the hash query method instead.
for (FuncRecord entry : result) {
if (entry.func == func) {
ArrayList<FuncRecord> newResult = new ArrayList<FuncRecord>();
newResult.add(entry);
return newResult;
}
}
return result; //Return all matches.
}
//Find an entry of the database based on hash. Returns all records with that hash.
public ArrayList<FuncRecord> query(Long hash) {
ArrayList<FuncRecord> result = new ArrayList<FuncRecord>(); //Set up the result.
FuncRecord temp = new FuncRecord();
temp.hashValue = hash;
for (LibraryRecord libRec : this.libraries) { //Search each library for a record matching the hash.
result.addAll(libRec.query(hash));
}
return result;
}
//DB is made up of libraries. To get a DB from a file/parser, look for the "funcDB" tag, and then pass the buck to the LibraryRecord class.
public void restoreXml(XmlPullParser parser) {
parser.start("funcDB"); //The XML tag for an entire DB.
while (parser.peek().isStart()) {
LibraryRecord libRec = new LibraryRecord();
libRec.restoreXml(parser); //Pass the buck.
this.addLibrary(libRec); //DB is a collection of library records.
}
parser.end();
return;
}
//Save DB to an XML file.
public void saveXml(Writer fwrite) throws IOException {
StringBuffer buf = new StringBuffer();
buf.append("<funcDB>\n"); //The XML tag for the entire DB.
fwrite.append(buf.toString());
for (LibraryRecord libRec : this.libraries) {
libRec.saveXml(fwrite); //Write out each library in XML.
}
fwrite.append("</funcDB>\n"); //Finish up.
return;
}
}

View file

@ -1,271 +0,0 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.analyzers;
import java.io.*;
import java.util.*;
import org.xml.sax.*;
import ghidra.app.cmd.label.*;
import ghidra.app.services.*;
import ghidra.app.util.importer.MessageLog;
import ghidra.framework.Application;
import ghidra.framework.cmd.Command;
import ghidra.framework.options.Options;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSetView;
import ghidra.program.model.listing.*;
import ghidra.program.model.symbol.*;
import ghidra.util.Msg;
import ghidra.util.task.TaskMonitor;
import ghidra.xml.NonThreadedXmlPullParserImpl;
import ghidra.xml.XmlPullParser;
public class LibraryHashAnalyzer extends AbstractAnalyzer {
private static final String NAME = "Library Hash Identification";
private static final String DESCRIPTION =
"Analyzes program for statically linked library functions (e.g., printf, scanf, etc.).";
private final static String OPTION_NAME_MEM_SEARCH = "Analyze undefined bytes";
private final static String OPTION_NAME_DISASSEMBLE = "Disassemble matches in undefined bytes";
private static final String OPTION_DESCRIPTION_MEM_SEARCH =
"Search for known library signatures in undefined bytes.";
private static final String OPTION_DESCRIPTION_DISASSEMBLE =
"Disassemble any library functions found while searching undefined bytes.";
private final static boolean OPTION_DEFAULT_MEM_SEARCH = true;
private final static boolean OPTION_DEFAULT_DISASSEMBLE = true;
private boolean memSearchOption = OPTION_DEFAULT_MEM_SEARCH;
private boolean disassembleOption = OPTION_DEFAULT_DISASSEMBLE;
public LibraryHashAnalyzer() {
super(NAME, DESCRIPTION, AnalyzerType.BYTE_ANALYZER);
setPrototype();
setPriority(AnalysisPriority.DATA_TYPE_PROPOGATION.before());
setSupportsOneTimeAnalysis();
}
@Override
public boolean canAnalyze(Program program) {
// TODO: for now, this can't analyze anything!
// WARNING: this will cause this analyzer not to show up for anything!
return false;
}
@Override
public boolean added(Program program, AddressSetView set, TaskMonitor monitor, MessageLog log) {
this.identifyLibraryFunctions(set, program, monitor);
return true;
}
@Override
public void registerOptions(Options options, Program program) {
options.registerOption(OPTION_NAME_MEM_SEARCH, memSearchOption, null,
OPTION_DESCRIPTION_MEM_SEARCH);
options.registerOption(OPTION_NAME_DISASSEMBLE, disassembleOption, null,
OPTION_DESCRIPTION_DISASSEMBLE);
}
/**
* @see ghidra.app.services.Analyzer#optionsChanged(ghidra.framework.options.Options, Program)
*/
@Override
public void optionsChanged(Options options, Program program) {
memSearchOption = options.getBoolean(OPTION_NAME_MEM_SEARCH, memSearchOption);
disassembleOption = options.getBoolean(OPTION_NAME_DISASSEMBLE, disassembleOption);
}
private void identifyLibraryFunctions(AddressSetView set, Program p, TaskMonitor monitor) {
//Get the library from the xml database file.
File libraryFile;
try {
libraryFile = Application.getModuleDataFile("lib/db.xml").getFile(true);
}
catch (FileNotFoundException e1) {
Msg.error(this, "Cannot find db.xml file--not hashing functions", e1);
return;
}
LibHashDB db = new LibHashDB();
//Handler is for the XML parser.
ErrorHandler handler = new ErrorHandler() {
@Override
public void warning(SAXParseException exception) throws SAXException {
throw exception;
}
@Override
public void error(SAXParseException exception) throws SAXException {
throw exception;
}
@Override
public void fatalError(SAXParseException exception) throws SAXException {
throw exception;
}
};
try {
InputStream hstream = new FileInputStream(libraryFile);
//Create the parser.
XmlPullParser parser = new NonThreadedXmlPullParserImpl(hstream,
"Function Database parser", handler, false);
hstream.close();
//Create the database.
db.restoreXml(parser);
HashMap<FuncRecord, FuncRecord> pinning = new HashMap<FuncRecord, FuncRecord>(); //Matching between query and library functions.
LibHashDB qdb = new LibHashDB(p);
FunctionIterator funcIter = p.getListing().getFunctions(true);
//If a signature is unique in the libraries and in the query, we may as well match them.
while (funcIter.hasNext()) {
Function func = funcIter.next();
ArrayList<FuncRecord> libResponse = db.query(func);
if (libResponse.size() != 1) { //Check uniqueness in libraries.
continue;
}
FuncRecord libVal = libResponse.get(0);
ArrayList<FuncRecord> queResponse = qdb.query(libVal.hashValue);
if (queResponse.size() != 1) { //Check uniqueness in query.
continue;
}
FuncRecord queVal = queResponse.get(0);
pinning.put(queVal, libVal);
}
PriorityQueue<FuncRecord> q = new PriorityQueue<FuncRecord>(pinning.keySet());
HashSet<FuncRecord> seen = new HashSet<FuncRecord>();
while (q.size() > 0) {
FuncRecord current = q.remove(); //A query record which is already matched.
seen.add(current);
Iterator<FuncRecord> qit = current.children.iterator();
FuncRecord partner = pinning.get(current);
Iterator<FuncRecord> lit = partner.children.iterator();
while (qit.hasNext()) {
FuncRecord qKid = qit.next(); //Child on the query side.
if (!lit.hasNext()) {
break;
}
FuncRecord lKid = lit.next(); //Child to match on the library side.
//Should we add a second seen set for the lKids?
if (qKid.hashValue != lKid.hashValue || seen.contains(qKid)) {
continue;
}
//Match 'em and put 'em in the queue.
//This little check is unnecessary, except that calls can be incorrectly disassembled.
if (qKid.children.size() != lKid.children.size()) {
continue;
}
pinning.put(qKid, lKid);
this.addSymbol(p, qKid.func.getEntryPoint(), lKid.funcName, false);
q.add(qKid);
}
}
/*
File outFile = new File(dataDir, "testy.txt");
File outFile2 = new File(dataDir, "testy2.txt");
FileWriter writer = new FileWriter(outFile);
FileWriter writer2 = new FileWriter(outFile2);
writer.write("Matched: " + pinning.size() + "\n");
writer2.write("Unmatched:\n");
for(FuncRecord key : qdb.getRecords()){
if(pinning.containsKey(key)){
writer.write(key.toString() + "\n");
}
else{
writer2.write(key.toString() + "\n");
}
}
writer.close();
writer2.close();
*/
}
catch (Exception e) {
e.printStackTrace();
}
return;
}
@Override
public void analysisEnded(Program program) {
// don't care
}
private void addSymbol(Program program, Address addr, String name, boolean localscope) {
SymbolTable st = program.getSymbolTable();
Symbol existSym = st.getPrimarySymbol(addr);
Command cmd = null;
if (existSym == null) { //Symbol didn't exist
cmd = new AddLabelCmd(addr, name, localscope, SourceType.IMPORTED); //So we prepare to add it.
}
else if (!existSym.getName().equals(name)) { //There is a symbol there with the wrong name.
if (existSym.getSource() == SourceType.DEFAULT || //It's got a non-smart name.
(existSym.getSource() == SourceType.ANALYSIS &&
existSym.getSymbolType().equals(SymbolType.FUNCTION))) {
cmd = new RenameLabelCmd(addr, existSym.getName(), name, //Prepare to rename it.
existSym.getParentNamespace(), SourceType.IMPORTED);
}
else {
cmd = new AddLabelCmd(addr, name, localscope, SourceType.IMPORTED); //Our name is better?
}
}
if (cmd != null && cmd.applyTo(program)) { //Apply the name, make sure it worked.
Msg.debug(this, "Created symbol for library function " + name + " at address " + addr);
Namespace space = st.getNamespace(addr);
if (!localscope) {
space = null;
}
cmd = new SetLabelPrimaryCmd(addr, name, space);
cmd.applyTo(program);
cmd = new DemanglerCmd(addr, name);
if (cmd.applyTo(program)) {
Msg.debug(this, "Demangled library function " + name);
}
//resolved.add(addr);
}
/*
program.getBookmarkManager().setBookmark(addr, "Analysis",
LibraryIdentificationConstants.LIB_BOOKMARK_CATEGORY, "Library function");
if (disassembleOption) {
PseudoDisassembler pdis = new PseudoDisassembler(program);
// make sure it is a disassembly
if (pdis.isValidSubroutine(addr, false)) {
disassembleSet.addRange(addr, addr);
}
}
*/
return;
}
}