From 82f50a930539f9d43052adb0b29836e8e486410d Mon Sep 17 00:00:00 2001 From: Ryan Kurtz Date: Thu, 7 Aug 2025 09:01:30 -0400 Subject: [PATCH] GP-5873: New idaxml.py for IDA 9.x --- GhidraBuild/IDAPro/Python/9xx/README.html | 40 + .../IDAPro/Python/9xx/loaders/xml_loader.py | 108 + .../IDAPro/Python/9xx/plugins/xml_exporter.py | 97 + .../IDAPro/Python/9xx/plugins/xml_importer.py | 99 + .../IDAPro/Python/9xx/python/idaxml.py | 3778 +++++++++++++++++ GhidraBuild/IDAPro/README.html | 2 +- GhidraBuild/IDAPro/certification.manifest | 1 + 7 files changed, 4124 insertions(+), 1 deletion(-) create mode 100644 GhidraBuild/IDAPro/Python/9xx/README.html create mode 100644 GhidraBuild/IDAPro/Python/9xx/loaders/xml_loader.py create mode 100644 GhidraBuild/IDAPro/Python/9xx/plugins/xml_exporter.py create mode 100644 GhidraBuild/IDAPro/Python/9xx/plugins/xml_importer.py create mode 100644 GhidraBuild/IDAPro/Python/9xx/python/idaxml.py diff --git a/GhidraBuild/IDAPro/Python/9xx/README.html b/GhidraBuild/IDAPro/Python/9xx/README.html new file mode 100644 index 0000000000..5c5c7c0361 --- /dev/null +++ b/GhidraBuild/IDAPro/Python/9xx/README.html @@ -0,0 +1,40 @@ + + + XML Exporter for IDA version 9 + + + +

XML Exporter for IDA version 9

+

+ The 7XX versions of the XML Exporter, Importer, and Loader can only be used + with IDA version 9.0 and greater. +

+

+ xml_exporter.py is a plugin to export an IDA database as an XML file. + It must be placed in the IDA plugins folder. +

+

+ xml_loader.py is an IDA loader to build a new database using an XML file. + It loads the .bytes file and builds the IDA database using the contents of + the XML file. NOTE: Currently, the loader does not support importing memory + overlays or Harvard architectures (e.g., 8051). + It must be placed in the IDA loaders folder. +

+

+ xml_importer.py is a plugin to add data from an XML file to an existing + database. It will NOT load any binary data from the bytes file. It will add + symbols, comments, code, data, functions, etc. for addresses that currently + exist in the database. + It must be placed in the IDA plugins folder. +

+

+ The idaxml.py module is a require import for the xml_exporter, xml_importer, + and xml_loader. + It must be placed in the IDA python folder. +

+ + diff --git a/GhidraBuild/IDAPro/Python/9xx/loaders/xml_loader.py b/GhidraBuild/IDAPro/Python/9xx/loaders/xml_loader.py new file mode 100644 index 0000000000..956e2abb32 --- /dev/null +++ b/GhidraBuild/IDAPro/Python/9xx/loaders/xml_loader.py @@ -0,0 +1,108 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +#--------------------------------------------------------------------- +# xmlldr.py - IDA XML loader +#--------------------------------------------------------------------- +""" +Loader for IDA to import a XML PROGRAM file and create a new database (.idb). +This file must be placed in the IDA loaders directory. +The file idaxml.py must be placed in the IDA python directory. +""" + +from __future__ import print_function +import ida_idaapi +import ida_idp +import ida_kernwin +import ida_pro +import idaxml +import idc +import sys + +if sys.version_info.major >= 3: + from idaxml import _exc_info + sys.exc_value = lambda: _exc_info()[1] + sys.exc_type = lambda: _exc_info()[0] + +""" +Loader functions +""" +def accept_file(li, filename): + """ + Check if the file is of supported format + + @param li: a file-like object which can be used to access the input data + @param n : format number. The function will be called with incrementing + number until it returns zero + @return: 0 - no more supported formats + string "name" - format name to display in the chooser dialog + dictionary { 'format': "name", 'options': integer } + options: should be 1, possibly ORed with ACCEPT_FIRST (0x8000) + to indicate preferred format + """ + if not idaxml.is_ida_version_supported(): + return 0 + # read 16K bytes to allow for the DTD + data = li.read(0x4000) + # look for start of element + start = data.find(b"= 0: + s = data.find(b"") + proc = p[:e] + ida_kernwin.info("Processor specified in the XML file is:\n" + proc.decode() + + "\n\nYou must select and set the compatible " + + "IDA processor type.") + return { 'format': "XML PROGRAM file", 'options': 0x8001 } + return 0 + + +def load_file(li, neflags, format): + """ + Load the file into database + + @param li: a file-like object which can be used to access the input data + @param neflags: options selected by the user, see loader.hpp + @return: 0-failure, 1-ok + """ + global event, element + if ida_idp.get_idp_name() == None: + ida_idp.set_processor_type("metapc", ida_idp.SETPROC_LOADER) + status = 0 + st = idc.set_ida_state(idc.IDA_STATUS_WORK) + xml = idaxml.XmlImporter(idaxml.LOADER, 0) + try: + status = xml.import_xml() + except idaxml.Cancelled: + msg = "XML PROGRAM import cancelled!" + print("\n" + msg) + idc.warning(msg) + except idaxml.MultipleAddressSpacesNotSupported: + msg = "XML Import cancelled!" + msg += "\n\nXML Import does not currently support" + msg += "\nimporting multiple address spaces." + print("\n" + msg) + idc.warning(msg) + except: + print("\nHouston, we have a problem!") + msg = "***** Exception occurred: XML loader failed! *****" + print("\n" + msg + "\n", sys.exc_type, sys.exc_value) + print(event, element.tag, element.attrib) + idc.warning(msg) + finally: + idc.set_ida_state(st) + xml.cleanup() + return status diff --git a/GhidraBuild/IDAPro/Python/9xx/plugins/xml_exporter.py b/GhidraBuild/IDAPro/Python/9xx/plugins/xml_exporter.py new file mode 100644 index 0000000000..87d31ff163 --- /dev/null +++ b/GhidraBuild/IDAPro/Python/9xx/plugins/xml_exporter.py @@ -0,0 +1,97 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +#--------------------------------------------------------------------- +# xmlexp.py - IDA XML Exporter plugin +#--------------------------------------------------------------------- +""" +Plugin for IDA which exports a XML PROGRAM document file from a database. +This file must be placed in the IDA plugins directory. +The file idaxml.py must be placed in the IDA python directory. +""" + +from __future__ import print_function +import ida_auto +import ida_idaapi +import ida_kernwin +import idaxml +import idc +import sys + +if sys.version_info.major >= 3: + from idaxml import _exc_info + sys.exc_value = lambda: _exc_info()[1] + sys.exc_type = lambda: _exc_info()[0] + +class XmlExporterPlugin(ida_idaapi.plugin_t): + """ + XML Exporter plugin class + """ + flags = 0 + comment = "Export database as XML file" + help = "Export database as XML document" + wanted_name = "XML Exporter" + wanted_hotkey = "Ctrl-Shift-x" + + + def init(self): + """ + init function for XML Exporter plugin. + + Returns: + Constant PLUGIN_OK if this IDA version supports the plugin, + else returns PLUGIN_SKIP if this IDA is older than the supported + baseline version. + """ + if idaxml.is_ida_version_supported(): + return ida_idaapi.PLUGIN_OK + else: + return ida_idaapi.PLUGIN_SKIP + + + def run(self, arg): + """ + run function for XML Exporter plugin. + + Args: + arg: Integer, non-zero value enables auto-run feature for + IDA batch (no gui) processing mode. Default is 0. + """ + st = idc.set_ida_state(idc.IDA_STATUS_WORK) + xml = idaxml.XmlExporter(arg) + try: + try: + xml.export_xml() + except idaxml.Cancelled: + ida_kernwin.hide_wait_box() + msg = "XML Export cancelled!" + print("\n" + msg) + idc.warning(msg) + except: + ida_kernwin.hide_wait_box() + msg = "***** Exception occurred: XML Exporter failed! *****" + print("\n" + msg + "\n", sys.exc_type, sys.exc_value) + idc.warning(msg) + finally: + xml.cleanup() + ida_auto.set_ida_state(st) + + + def term(self): + pass + + +def PLUGIN_ENTRY(): + return XmlExporterPlugin() diff --git a/GhidraBuild/IDAPro/Python/9xx/plugins/xml_importer.py b/GhidraBuild/IDAPro/Python/9xx/plugins/xml_importer.py new file mode 100644 index 0000000000..75da024c84 --- /dev/null +++ b/GhidraBuild/IDAPro/Python/9xx/plugins/xml_importer.py @@ -0,0 +1,99 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +#--------------------------------------------------------------------- +# xmlimp.py - IDA XML Importer plugin +#--------------------------------------------------------------------- +""" +Plugin for IDA to import a XML PROGRAM file into an existing open database. +This file must be placed in the IDA plugins directory. +The file idaxml.py must be placed in the IDA python directory. +""" + +from __future__ import print_function +import ida_idaapi +import ida_pro +import idaxml +import idc +import sys + +if sys.version_info.major >= 3: + from idaxml import _exc_info + sys.exc_value = lambda: _exc_info()[1] + sys.exc_type = lambda: _exc_info()[0] + +class XmlImporterPlugin(ida_idaapi.plugin_t): + """ + XML Importer plugin class + """ + flags = 0 + comment = "Import XML PROGRAM file" + help = "Import XML document to database" + wanted_name = "XML Importer" + wanted_hotkey = "Ctrl-Alt-l" + + def init(self): + """ + init function for XML Importer plugin. + + Returns: + Constant PLUGIN_OK if this IDA version supports the plugin, + else returns PLUGIN_SKIP if this IDA is older than the supported + baseline version. + """ + if idaxml.is_ida_version_supported(): + return ida_idaapi.PLUGIN_OK + else: + return ida_idaapi.PLUGIN_SKIP + + + def run(self, arg): + """ + run function for XML Importer plugin. + + Args: + arg: Integer, a non-zero value enables auto-run feature for + IDA batch (no gui) processing mode. Default is 0. + """ + st = idc.set_ida_state(idc.IDA_STATUS_WORK) + xml = idaxml.XmlImporter(idaxml.PLUGIN, arg) + try: + try: + xml.import_xml() + except idaxml.Cancelled: + msg = "XML Import cancelled!" + print("\n" + msg) + idc.warning(msg) + except idaxml.MultipleAddressSpacesNotSupported: + msg = "XML Import cancelled!" + msg += "\n\nXML Import does not currently support" + msg += "\nimporting multiple address spaces." + print("\n" + msg) + idc.warning(msg) + except: + msg = "***** Exception occurred: XML Importer failed! *****" + print("\n" + msg + "\n", sys.exc_type, sys.exc_value) + idc.warning(msg) + finally: + xml.cleanup() + idc.set_ida_state(st) + + + def term(self): + pass + + +def PLUGIN_ENTRY(): + return XmlImporterPlugin() diff --git a/GhidraBuild/IDAPro/Python/9xx/python/idaxml.py b/GhidraBuild/IDAPro/Python/9xx/python/idaxml.py new file mode 100644 index 0000000000..d32fdba095 --- /dev/null +++ b/GhidraBuild/IDAPro/Python/9xx/python/idaxml.py @@ -0,0 +1,3778 @@ +## ### +# IP: GHIDRA +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## +#--------------------------------------------------------------------- +# idaxml.py - IDA XML classes +#--------------------------------------------------------------------- +""" +""" + +from __future__ import print_function +from typing import Optional +import ida_auto +import ida_bytes +import ida_diskio +import ida_fpro +import ida_frame +import ida_funcs +import ida_ida +import ida_idaapi +import ida_idp +import ida_hexrays +import ida_kernwin +import ida_lines +import ida_loader +import ida_moves +import ida_nalt +import ida_name +import ida_netnode +import ida_pro +import ida_segment +import ida_segregs +import ida_typeinf +import ida_ua +import ida_xref +import idautils +import idc +import datetime +import os +import sys +import time +from xml.etree import cElementTree + +if sys.version_info.major >= 3: + import copy + setattr(time, 'clock', time.perf_counter) + _exc_info = copy.copy(sys.exc_info) + setattr(sys, 'exc_value', _exc_info()[1]) + setattr(sys, 'exc_type', _exc_info()[0]) + +DEBUG = 0 # print debug statements + +IDAXML_VERSION = "5.0.2" +BASELINE_IDA_VERSION = 900 +BASELINE_STR = '9.00' +IDA_SDK_VERSION = ida_pro.IDA_SDK_VERSION +BADADDR = idc.BADADDR +BADNODE = ida_netnode.BADNODE +PLUGIN = True +LOADER = not PLUGIN +AUTO_WAIT = True + + +def is_ida_version_supported(): + ''' + Determines if IDA version is supported by this idaxml module. + + Returns: + True if IDA version is supported, else False. + ''' + supported = IDA_SDK_VERSION >= BASELINE_IDA_VERSION + if not supported: + idc.msg('\nThe IDA XML plugins and loader are not supported ' + + 'by this version of IDA.\n') + idc.msg('Please use IDA ' + BASELINE_STR + ' or greater ' + + 'with this version of XML.\n') + return supported + + +def get_struc(sid: int) -> Optional[ida_typeinf.tinfo_t]: + try: + tif = ida_typeinf.tinfo_t(tid=sid) + return tif if tif.is_udt() else None + except ValueError: + return None + +def get_member(sid: int, offset: int) -> Optional[ida_typeinf.udm_t]: + struc_tif = get_struc(sid) + if struc_tif is None: + return None + udm = ida_typeinf.udm_t() + udm.offset = offset + idx = struc_tif.find_udm(udm, ida_typeinf.STRMEM_AUTO) + return udm if idx != -1 else None + +def get_member_by_idx(sid: int, idx: int) -> Optional[ida_typeinf.udm_t]: + mid = idc.get_member_by_idx(sid, idx) + member = ida_typeinf.udm_t() + return member if mid != -1 and ida_typeinf.tinfo_t().get_udm_by_tid(member, mid) != -1 else None + +def get_frame(pfn) -> Optional[ida_typeinf.tinfo_t]: + sf = ida_typeinf.tinfo_t() + return sf if sf.get_func_frame(pfn) else None + +def get_sptr(udm: ida_typeinf.udm_t) -> Optional[ida_typeinf.tinfo_t]: + tif = udm.type + return tif if tif.is_udt() and tif.is_struct() else None + +def _iter_enum_ids(): + """Iterate the IDs of all enums in the IDB""" + limit = ida_typeinf.get_ordinal_limit() + for ordinal in range(1, limit): + tif = ida_typeinf.tinfo_t() + tif.get_numbered_type(None, ordinal) + if tif.is_enum(): + yield tif.get_tid() + +def get_struc_qty(): + count = 0 + limit = ida_typeinf.get_ordinal_limit() + for i in range(1, limit): + tif = ida_typeinf.tinfo_t() + if not tif.get_numbered_type(i, ida_typeinf.BTF_STRUCT): + continue + else: + count += 1 + return count + +def get_enum_member_tid(eid: int, i: int) -> int: + try: + tif = ida_typeinf.tinfo_t(tid=eid) + except ValueError: + return BADADDR + edm = ida_typeinf.edm_t() + return edm.get_tid() if tif.get_edm(edm, i) >= 0 else BADADDR + +def find_enum_member_serial(enum_id: int, member_value: int, member_name: str): + """ + Return the serial index of the enum‐constant named `member_name` + (or holding `member_value`) inside `enum_id`. + Returns -1 on failure. + """ + try: + tif = ida_typeinf.tinfo_t(tid=enum_id) + except ValueError: + return -1 + ei = ida_typeinf.enum_type_data_t() + if not tif.get_enum_details(ei): + return -1 + + for i, m in enumerate(ei): + if m.name == member_name or m.value == member_value: + return ei.get_serial(i) + return -1 + + +class Cancelled(Exception): + pass + + +class FileError(Exception): + pass + + +class MultipleAddressSpacesNotSupported(Exception): + pass + + +class IdaXml: + def __init__(self, arg): + self.autorun = False if arg == 0 else True + self.debug = DEBUG + self.elements = {} + self.counters = [] + self.tags = [] + self.xmlfile = 0 + self.options = None + + + def cleanup(self): + """ + Frees memory and closes message box and XML file at termination. + """ + if self.options is not None: + self.options.Free() + ida_kernwin.hide_wait_box() + self.close_xmlfile() + + + def close_xmlfile(self): + """ + Closes the XML data file for the XML Exporter. + """ + if self.xmlfile != 0: + self.xmlfile.close() + self.xmlfile = 0 + + + def dbg(self, message): + """ + Outputs debug message if debug flag is enabled. + + Args: + message: String containing the debug message. + """ + if (self.debug): + idc.msg(message) + + + def display_summary(self, what): + """ + Displays summary in IDA output window. + """ + summary = '' + total = 0 + for tag in self.tags: + count = self.counters[self.elements[tag]] + summary += "\n%-26s %8d" % (tag, count) + total += count + summary = "\n--------------------------------------" + summary + summary += "\n--------------------------------------" + summary += ("\n%-26s %8d" % ("Total XML Elements:",total)) + idc.msg(summary) + if not self.autorun: # and self.plugin: + frmt = "TITLE XML " + what + " Successful!\n" + frmt += "ICON INFO\n" + frmt += "AUTOHIDE NONE\n" + frmt += "HIDECANCEL\n" + fileline = '\n\nFile: %s' % self.filename + details = '\nSee output window for details...' + ida_kernwin.info("%s" % (frmt + fileline + details)) + + + def display_version(self, what): + """ + Displays XML version info in IDA output window. + + Args: + what: String indicating Exporter, Importer, or Loader + """ + if os.path.isfile(os.path.join(ida_diskio.idadir('python'), 'idaxml.py')): + f = os.path.join(ida_diskio.idadir('python'), 'idaxml.py') + elif os.path.isfile(os.path.join(ida_diskio.idadir('python'), '3', 'idaxml.py')): + f = os.path.join(ida_diskio.idadir('python'), '3', 'idaxml.py') + elif os.path.isfile(os.path.join(ida_diskio.idadir('python'), '2', 'idaxml.py')): + f = os.path.join(ida_diskio.idadir('python'), '2', 'idaxml.py') + elif os.path.isfile(os.path.join(ida_diskio.get_user_idadir(), 'python', 'idaxml.py')): + f = os.path.join(ida_diskio.get_user_idadir(), 'python', 'idaxml.py') + else: + msg = "Error opening file " + os.path.join(ida_diskio.idadir('python'), 'idaxml.py') + " !\n" + idc.msg(msg) + raise FileError + + ftime = time.localtime(os.path.getmtime(f)) + ts = time.strftime('%b %d %Y %H:%M:%S', ftime) + version = "\nXML " + what + " v" + IDAXML_VERSION + version += " : SDK " + str(IDA_SDK_VERSION) + version += " : Python : "+ ts + '\n' + idc.msg(version) + + + def open_file(self, filename, mode): + """ + Opens filename to specified mode. + + Args: + filename: String representing absolute filepath. + mode: String representing mode for open. + + Returns + File handle. + + Exceptions: + Displays a warning and raises FileError exception + if open fails. + """ + try: + f = open(filename, mode) + return f + except Exception: + fmt = "TITLE ERROR!\n" + fmt += "ICON ERROR\n" + fmt += "AUTOHIDE NONE\n" + fmt += "HIDECANCEL\n" + fmt += "Error opening file" + filename + "!\n" + idc.warning(fmt) + raise FileError + + + def update_counter(self, tag): + """ + Updates the counter for the element tag. + + Args: + tag: String representing element tag. + """ + if tag in self.elements: + self.counters[self.elements[tag]] += 1 + else: + self.elements[tag] = len(self.elements) + self.counters.append(1) + self.tags.append(tag) + + + def update_status(self, tag): + """ + Displays the processing status in the IDA window. + + Args: + tag: String representing XML element tag + """ + status = 'Processing ' + tag + idc.msg('\n%-35s' % status) + ida_kernwin.hide_wait_box() + ida_kernwin.show_wait_box(status) + + +class XmlExporter(IdaXml): + """ + XML Exporter contains methods to export an IDA database as a + XML PROGRAM document. + """ + def __init__(self, arg): + """ + Initializes the XmlExporter attributes + + Args: + arg: Integer, non-zero value enables auto-run feature for + IDA batch (no gui) processing mode. Default is 0. + """ + IdaXml.__init__(self, arg) + self.indent_level = 0 + self.seg_addr = False + self.has_overlays = False + self.hexrays = False + + # initialize class variables from database + self.min_ea = ida_ida.inf_get_min_ea() + self.max_ea = ida_ida.inf_get_max_ea() + self.cbsize = (ida_idp.ph_get_cnbits()+7)//8 + self.processor = str.upper(ida_idp.get_idp_name()) + self.batch = ida_kernwin.cvar.batch + + + def export_xml(self): + """ + Exports the IDA database to a XML PROGRAM document file. + """ + self.display_version('Exporter') + self.check_and_load_decompiler() + + self.get_options() + + if (self.autorun): + (self.filename, ext) = os.path.splitext(idc.get_idb_path()) + self.filename += ".xml" + else: + self.filename=ida_kernwin.ask_file(1, "*.xml", + "Enter name of export xml file:") + + if self.filename is None or len(self.filename) == 0: + raise Cancelled + self.xmlfile = self.open_file(self.filename, "w") + + ida_kernwin.show_wait_box("Exporting XML document ....") + idc.msg("\n------------------------------------------------" + + "-----------") + idc.msg("\nExporting XML document ....") + begin = time.clock() + + self.write_xml_declaration() + self.export_program() + + # export database items based on options + if (self.options.DataTypes.checked or + self.options.DataDefinitions.checked or + self.options.Functions.checked ): + self.export_datatypes() + if (self.options.MemorySections.checked or + self.options.MemoryContent.checked ): + self.export_memory_map() + if (self.options.RegisterValues.checked ): + self.export_register_values() + if (self.options.CodeBlocks.checked ): + self.export_code() + if (self.options.DataDefinitions.checked ): + self.export_data() + if (self.options.Comments.checked ): + self.export_comments() + self.export_bookmarks() + if (self.options.EntryPoints.checked ): + self.export_program_entry_points() + if (self.options.Symbols.checked ): + self.export_symbol_table() + if (self.options.Functions.checked ): + self.export_functions() + if (self.options.MemoryReferences.checked or + self.options.StackReferences.checked or + self.options.Manual.checked or + self.options.DataTypes.checked ): + self.export_markup() + self.end_element(PROGRAM) + + idc.msg('\n%35s' % 'Total ') + self.display_cpu_time(begin) + ida_kernwin.hide_wait_box() + self.display_summary('Export') + idc.msg('\nDatabase exported to: ' + self.filename + '\n') + + + # TODO: Test decompiler comments in batch and gui modes + def check_and_load_decompiler(self): + """ + Checks for the presence of a decompiler plugin for the database. + + Note: The decompiler must be loaded by the XML Exporter plugin + if it is running in batch mode. IDA will load the decompiler + plugin automatically if not in batch mode. + + Note: There was no support for decompiler plugins in IDAPython until + IDA 6.6, so skip if this is an older version. + + Note: Currently the 4 decompiler plugins for the x86, x64, + ARM32, and ARM64 are supported. + """ + if self.batch == 0: + self.hexrays = ida_hexrays.init_hexrays_plugin() + return + plugin = '' + if self.processor == 'PC': + if ida_ida.inf_is_64bit(): + plugin = "hexx64" + elif ida_ida.inf_is_32bit_exactly(): + plugin = 'hexrays' + elif self.processor == 'ARM': + if ida_ida.inf_is_64bit(): + plugin = "hexarm64" + elif ida_ida.inf_is_32bit_exactly(): + plugin = "hexarm" + if len(plugin) > 0: + try: + ida_loader.load_plugin(plugin) + self.hexrays = ida_hexrays.init_hexrays_plugin() + except Exception: + return + + + def check_char(self, ch: str) -> str: + """ + Replaces a special XML character with an entity string. + + Args: + ch: String containing the character to check. + + Returns: + String containing either the character or the entity + substition string. + """ + if isinstance(ch, int): + ch = chr(ch) + if ((ord(ch) < 0x20) and (ord(ch) != 0x09 and + ord(ch) != 0x0A and ord(ch) != 0x0D)): return '' + elif ch == '&' : return '&' + elif ch == '<' : return "<" + elif ch == '>' : return ">" + elif ch == '\'' : return "'" + elif ch == '"' : return """ + elif ch == '\x7F': return '' + elif ord(ch) > 0x7F: return '&#x' + format(ord(ch),"x") + ";" + return ch + + + def check_for_entities(self, text: str) -> str: + """ + Checks all characters in a string for special XML characters. + + Args: + text: String to check for special XML characters. + + Returns: + String containing original string with substitutions for + any special XML characters. + """ + return ''.join(map(self.check_char, text)) + + + def check_if_seg_contents(self, seg): + """ + Determines if any address in a segment contains a value. + + Args: + seg: IDA segment object + + Returns: + True if any address in a segment contains a value. + False if no address in a segment contains a value. + """ + for addr in idautils.Heads(seg.start_ea, seg.end_ea): + if idc.has_value(idc.get_full_flags(addr)): + return True + return False + + + def check_stack_frame(self, sframe: ida_typeinf.tinfo_t) -> bool: + """ + Determines if stack frame contains any parameters or local variables. + + Args: + sframe: IDA stack frame for a function. + + Returns: + True if stack frame has parameters or local variables. + False if stack frame has no parameters or local variables. + """ + n = sframe.get_udt_nmembers() + for i in range(n): + member = get_member_by_idx(sframe.get_tid(), i) + if member is None: + continue + mname = member.name + if mname is not None and len(mname) > 0: + if mname != " s" and mname != " r": + return True + return False + + + def close_binfile(self): + """ + Closes the binary data file for the XML Exporter. + """ + if self.binfile != 0: + self.binfile.close() + self.binfile = 0 + + + def close_tag(self, has_contents=False): + """ + Closes the start tag for an XML element. + + Args: + has_contents: Boolean indicating if the element has + sub-elements or text. + """ + if has_contents: + self.write_to_xmlfile(">") + self.indent_level += 1 + else: + self.write_to_xmlfile(" />") + + + def display_cpu_time(self, start): + """ + Displays the elapsed CPU time since the start time. + + Args: + start: Floating-point value representing start time in seconds. + """ + idc.msg('CPU time: %6.4f' % (time.clock() - start)) + + + def end_element(self, tag, newline=True): + """ + Writes the element end tag to the XML file. + + Args: + tag: String containing the element name. + newline: Boolean indicating if end tag should go on new line. + """ + self.indent_level -= 1 + if newline: + start = '\n' + (" " * self.indent_level) + else: + start = '' + self.write_to_xmlfile(start + "") + + + ''' + # BIT_MASK not currently supported for ENUM + def export_bitmask(self, eid, mask): + """ + Exports an enum bitmask member as BIT_MASK element. + + Args: + eid: Integer representing the IDA enum id + mask: Integer representing the IDA enum mask value + """ + name = idc.get_bmask_name(eid, mask) + if name is None: + return + self.start_element(BIT_MASK) + self.write_attribute(NAME, name) + self.write_numeric_attribute(VALUE, mask) + regcmt = idc.get_bmask_cmt(eid, mask, False) + rptcmt = idc.get_bmask_cmt(eid, mask, True) + has_comment = regcmt is not None or rptcmt is not None + self.close_tag(has_comment) + if regcmt is not None and len(regcmt) > 0: + self.export_regular_cmt(regcmt) + if rptcmt is not None and len(rptcmt) > 0: + self.export_repeatable_cmt(rptcmt) + if (has_comment): + self.end_element(BIT_MASK) + ''' + + + def export_bookmarks(self): + """ + Exports marked location descriptions as BOOKMARK elements. + """ + found = False + timer = time.clock() + for slot in range(0,1025): + address = idc.get_bookmark(slot) + description = idc.get_bookmark_desc(slot) + if address == BADADDR: + continue + if description is None: + continue + if not found: + found = True + self.update_status(BOOKMARKS) + self.start_element(BOOKMARKS, True) + self.start_element(BOOKMARK) + self.write_address_attribute(ADDRESS, address) + self.write_attribute(DESCRIPTION, description) + self.close_tag() + if found: + self.end_element(BOOKMARKS) + self.display_cpu_time(timer) + + + def export_c_comments(self): + """ + Exports block and end-of-line comments entered in the decompiler + interface. + """ + if not self.hexrays: + return + functions = idautils.Functions() + if functions is None: + return + for addr in functions: + try: + if ida_segment.is_spec_ea(addr): + continue + ccmts = ida_hexrays.restore_user_cmts(addr) + if ccmts is None: + continue + p = ida_hexrays.user_cmts_begin(ccmts) + while p != ida_hexrays.user_cmts_end(ccmts): + cmk = ida_hexrays.user_cmts_first(p) + cmv = ida_hexrays.user_cmts_second(p) + if cmk.itp < (ida_hexrays.ITP_COLON+1): + self.export_comment(cmk.ea, "end-of-line", cmv.c_str()) + else: + self.export_comment(cmk.ea, "pre", cmv.c_str()) + p=ida_hexrays.user_cmts_next(p) + ida_hexrays.user_cmts_free(ccmts) + except Exception: + continue + + + def export_code(self): + """ + Exports the address ranges of code sequences as CODE_BLOCK(s) + with START and END address attributes. + """ + addr = self.min_ea + if not idc.is_code(idc.get_full_flags(addr)): + addr = ida_bytes.next_that(addr, self.max_ea, idc.is_code) + if (addr == BADADDR): + return + self.update_status(CODE) + timer = time.clock() + data = ida_bytes.next_that(addr, self.max_ea, idc.is_data) + unknown = ida_bytes.next_unknown(addr, self.max_ea) + self.start_element(CODE, True) + while (addr != BADADDR): + start = addr + end = min(data, unknown) + if (end == BADADDR): + if (ida_segment.getseg(start).end_ea < self.max_ea): + codeend = ida_segment.getseg(start).end_ea - 1 + addr = ida_segment.getseg(idc.next_addr(codeend)).start_ea + if not idc.is_code(idc.get_full_flags(addr)): + addr = ida_bytes.next_that(addr, self.max_ea, + idc.is_code) + else: + codeend = self.max_ea - 1 + addr = BADADDR + else: + if (ida_segment.getseg(start).end_ea < end): + codeend = ida_segment.getseg(start).end_ea - 1 + addr = ida_segment.getseg(idc.next_addr(codeend)).start_ea + if not idc.is_code(ida_bytes.get_full_flags(addr)): + addr = ida_bytes.next_that(addr, self.max_ea, + idc.is_code) + else: + codeend = idc.get_item_end(ida_bytes.prev_that(end, + start, idc.is_code)) - 1 + addr = ida_bytes.next_that(end, self.max_ea, idc.is_code) + if (data < addr): + data = ida_bytes.next_that(addr, self.max_ea, + idc.is_data) + if (unknown < addr): + unknown = ida_bytes.next_unknown(addr, self.max_ea) + self.start_element(CODE_BLOCK) + self.write_address_attribute(START, start) + self.write_address_attribute(END, codeend) + self.close_tag() + self.end_element(CODE) + self.display_cpu_time(timer) + + + def export_comment(self, addr, cmt_type, cmt): + """ + Exports a element with ADDRESS and TYPE attributes. + The comment is exported as the element text (parsed character data). + + Args: + addr: Integers representing address of comment. + cmt_type: String indicating the comment type. + cmt: String containing the comment. + """ + self.start_element(COMMENT) + self.write_address_attribute(ADDRESS, addr) + self.write_attribute(TYPE, cmt_type) + self.close_tag(True) + # tag_remove seems to be losing last character + # work around is to add a space + cmt_text = ida_lines.tag_remove(cmt + ' ') + if sys.version_info.major >= 3: + self.write_text(cmt_text) + else: + self.write_text(cmt_text.decode('utf-8')) + self.end_element(COMMENT, False) + + + def export_comments(self): + """ + Exports all comments in the IDA database as elements. + """ + addr = self.min_ea + if not ida_bytes.has_cmt(idc.get_full_flags(addr)): + addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_cmt) + if (addr == BADADDR): + return + self.update_status(COMMENTS) + timer = time.clock() + self.start_element(COMMENTS, True) + while (addr != BADADDR): + cmt = idc.get_cmt(addr, False) + if (cmt is not None): + self.export_comment(addr, "end-of-line", cmt) + cmt = idc.get_cmt(addr, True) + if (cmt is not None): + self.export_comment(addr, "repeatable", cmt) + addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_cmt) + addr = self.min_ea + if not ida_bytes.has_extra_cmts(idc.get_full_flags(addr)): + addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_extra_cmts) + while (addr != BADADDR): + extra = idc.get_extra_cmt(addr, idc.E_PREV) + if (extra is not None): + self.export_extra_comment(addr, "pre", idc.E_PREV) + extra = idc.get_extra_cmt(addr, idc.E_NEXT) + if (extra is not None): + self.export_extra_comment(addr, "post", idc.E_NEXT) + addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_extra_cmts) + self.export_c_comments() + self.end_element(COMMENTS) + self.display_cpu_time(timer) + + + def export_data(self): + """ + Exports the data items in the database as elements. + """ + addr = self.min_ea + if not idc.is_data(idc.get_full_flags(addr)): + addr = ida_bytes.next_that(addr, self.max_ea, idc.is_data) + if (addr == BADADDR): + return + timer = time.clock() + self.update_status(DATA) + self.start_element(DATA, True) + while (addr != BADADDR): + f = idc.get_full_flags(addr) + if ida_bytes.is_align(f): + addr = ida_bytes.next_that(addr, self.max_ea, idc.is_data) + continue + dtype = self.get_datatype(addr) + size = idc.get_item_size(addr) + ti = ida_nalt.opinfo_t() + msize = ida_bytes.get_data_elsize(addr, f, ti) + if ida_bytes.is_struct(f): + s = idc.get_struc_id(dtype) + msize = t if (t := idc.get_struc_size(s)) is not None else ida_typeinf.BADSIZE + if msize == 0: + msize = 1 + if not idc.is_strlit(f) and size != msize: + dtype = "%s[%d]" % (dtype, size//msize) + self.start_element(DEFINED_DATA) + self.write_address_attribute(ADDRESS, addr) + self.write_attribute(DATATYPE, dtype) + self.write_numeric_attribute(SIZE, size*self.cbsize) + #TODO consider using GetTrueNameEx and Demangle + demangled = ida_name.get_demangled_name(addr, + DEMANGLED_TYPEINFO, ida_ida.inf_get_demnames(), idc.GN_STRICT) + outbuf = '' + # TODO: How to handle print_type for data mangled names? + #outbuf = idaapi.print_type(addr, False) + if demangled == "'string'": + demangled = None + has_typeinfo = ((demangled is not None and len(demangled) > 0) or + (outbuf is not None and len(outbuf) > 0)) + #TODO export_data: add DISPLAY_SETTINGS + self.close_tag(has_typeinfo) + if has_typeinfo: + if demangled is not None and len(demangled) > 0: + self.export_typeinfo_cmt(demangled) + elif len(outbuf) > 0: + self.export_typeinfo_cmt(outbuf) + self.end_element(DEFINED_DATA) + addr = ida_bytes.next_that(addr, self.max_ea, idc.is_data) + self.end_element(DATA) + self.display_cpu_time(timer) + + + def export_datatypes(self): + """ + Exports the structures and enums in IDA database. + """ + # skip if no structures/unions to export + if get_struc_qty() == 0: return + self.update_status(DATATYPES) + timer = time.clock() + self.start_element(DATATYPES, True) + self.export_structures() + self.export_enums() + self.end_element(DATATYPES) + self.display_cpu_time(timer) + + + def export_enum_member(self, cid: int, bf: bool, radix: int, signness) -> None: + """ + Exports a member of an enum. + + Args: + cid: Integer representing id of enum member + bf: Boolean indicates if a bitfield + radix: Integer representing numeric display format + signness: Boolean indicating if signed value + """ + cname = idc.get_enum_member_name(cid) + if cname is None or len(cname) == 0: + return + regcmt = idc.get_enum_member_cmt(cid, False) + rptcmt = idc.get_enum_member_cmt(cid, True) + has_comment = regcmt is not None + self.start_element(ENUM_ENTRY) + self.write_attribute(NAME, cname) + value = idc.get_enum_member_value(cid) + self.write_numeric_attribute(VALUE, value, radix, signness) + # BIT_MASK attribute not currently supported for ENUM_ENTRY + #if bf: + # self.write_numeric_attribute(BIT_MASK, mask) + self.close_tag(has_comment) + if regcmt is not None and len(regcmt) > 0: + self.export_regular_cmt(regcmt) + if rptcmt is not None and len(rptcmt) > 0: + self.export_repeatable_cmt(rptcmt) + if (has_comment): + self.end_element(ENUM_ENTRY) + + + def export_enum_members(self, eid: int, bf: bool, eflags: int) -> None: + """ + Exports the members of an enum. + + This function can only be called by IDA versions newer than 6.3 + + Args: + eid: Integer representing id of enum + bf: Boolean indicates if a bitfield + eflags: Integer representing the enum flags + """ + mask = idc.get_first_bmask(eid) if bf else 0xFFFFFFFF + while mask != -1: + # ENUM BIT_MASK exporting not currently supported + #self.export_bitmask(eid, mask) + mask = idc.get_next_bmask(eid, mask) + for idx in range(idc.get_enum_size(eid)): + cid = get_enum_member_tid(eid, idx) + if cid == BADADDR: + break + self.export_enum_member(cid, bf, + ida_bytes.get_radix(eflags, 0), + self.is_signed_data(eflags)) + + + def export_enum_reference(self, addr: int, op: int) -> None: + """ + Exports the enum reference for an operand at an address. + + Args: + addr: Integer representing the instruction address. + op: Integer representing the operand index (0-based) + """ + (eid, serial) = ida_bytes.get_enum_id(addr, op) + insn = ida_ua.insn_t() + ida_ua.decode_insn(insn, addr) + value = insn.ops[op].value + cid = BADNODE + last = idc.get_last_bmask(eid) + if idc.is_bf(eid): + last = idc.get_last_bmask(eid) + mask = idc.get_first_bmask(eid) + while cid == BADNODE: + cid = idc.get_enum_member(eid, (value & mask), 0, mask) + if cid != BADNODE or mask == last: + break + mask = idc.get_next_bmask(eid, mask) + else: + cid = idc.get_enum_member(eid, value, 0, last) + if cid == BADNODE: + return + self.start_element(EQUATE_REFERENCE) + self.write_address_attribute(ADDRESS, addr) + self.write_numeric_attribute(OPERAND_INDEX, op, 10) + self.write_numeric_attribute(VALUE, idc.get_enum_member_value(cid)) + cname = idc.get_enum_member_name(cid) + if cname is not None and len(cname) > 0: + self.write_attribute(NAME, cname) + if idc.is_bf(eid): + self.write_numeric_attribute("BIT_MASK", mask) + self.close_tag() + + + def export_enum_references(self, addr): + """ + Finds and exports enum references at an address. + + Args: + addr: Integer representing the instruction address. + """ + f = idc.get_full_flags(addr) + for op in range(2): + if ida_bytes.is_enum(f, op): + self.export_enum_reference(addr, op) + + + def export_enums(self): + """ + Exports enumerations. + """ + for eid in _iter_enum_ids(): + self.start_element(ENUM) + ename = idc.get_enum_name(eid) + if (ename is None or len(ename) == 0): + continue + self.write_attribute(NAME, ename) + ewidth = idc.get_enum_width(eid) + if ewidth != 0 and ewidth <= 64: + self.write_numeric_attribute(SIZE, ewidth, 10) + eflags = idc.get_enum_flag(eid) + bf = idc.is_bf(eid) + # BIT_FIELD attribute not supported for ENUM export + #if bf: + # self.write_attribute(BIT_FIELD, "yes") + regcmt = idc.get_enum_cmt(eid) + rptcmt = ida_typeinf.tinfo_t(tid=eid).get_type_rptcmt() + has_children = ((idc.get_enum_size(eid) > 0) or + (regcmt is not None) or (rptcmt is not None) or + (ida_bytes.get_radix(eflags, 0) != 16) or + (self.is_signed_data(eflags))) + self.close_tag(has_children) + if (ida_bytes.get_radix(eflags, 0) != 16 or + self.is_signed_data(eflags)): + self.start_element(DISPLAY_SETTINGS) + if ida_bytes.get_radix(eflags, 0) != 16: + self.write_attribute(FORMAT, self.get_format(eflags)) + if self.is_signed_data(eflags): + self.write_attribute(SIGNED, "yes") + self.close_tag() + if regcmt is not None: + self.export_regular_cmt(regcmt) + if rptcmt is not None: + self.export_repeatable_cmt(rptcmt) + self.export_enum_members(eid, bf, eflags) + if (has_children): + self.end_element(ENUM) + + + def export_extra_comment(self, addr, cmt_type, extra): + """ + Exports pre- and post- comments for an address. + + Args: + addr: Integer representing the instruction address. + cmt_type: String indicating comment type + extra: Integer representing extra comment index + """ + cmt = '' + nextline = idc.get_extra_cmt(addr, extra) + while (nextline is not None): + # workaround for tag_remove bug is to add space + cmt += ida_lines.tag_remove(nextline + ' ') + extra += 1 + nextline = idc.get_extra_cmt(addr, extra) + if (nextline is not None): + cmt += '\n' + self.export_comment(addr, cmt_type, cmt) + + + def export_functions(self): + """ + Exports information about all functions. + """ + functions = idautils.Functions() + if functions is None: + return + self.update_status(FUNCTIONS) + timer = time.clock() + self.start_element(FUNCTIONS, True) + for addr in functions: + function = ida_funcs.get_func(addr) + if ida_segment.is_spec_ea(function.start_ea): + continue + self.start_element(FUNCTION) + self.write_address_attribute(ENTRY_POINT, function.start_ea) + if ida_bytes.has_user_name(idc.get_full_flags(addr)): + name = self.get_symbol_name(addr) + if name is not None and len(name) > 0: + self.write_attribute(NAME, name) + if function.flags & idc.FUNC_LIB != 0: + self.write_attribute(LIBRARY_FUNCTION, "y") + self.close_tag(True) + fchunks = idautils.Chunks(addr) + for (startEA, endEA) in fchunks: + self.start_element(ADDRESS_RANGE) + self.write_address_attribute(START, startEA) + self.write_address_attribute(END, endEA-1) + self.close_tag() + regcmt = ida_funcs.get_func_cmt(function, False) + if regcmt is not None: + self.export_regular_cmt(regcmt) + rptcmt = ida_funcs.get_func_cmt(function, True) + if rptcmt is not None: + self.export_repeatable_cmt(rptcmt) + demangled = ida_name.get_demangled_name(addr, + DEMANGLED_TYPEINFO, + ida_ida.inf_get_demnames(), True) + if demangled is not None and demangled == "'string'": + demangled = None + outbuf = '' + # TODO: How to handle print_type for function typeinfo cmts + #outbuf = idaapi.print_type(addr, False) + has_typeinfo = (demangled is not None or (outbuf is not None and + len(outbuf) > 0)) + if demangled is not None: + self.export_typeinfo_cmt(demangled) + elif has_typeinfo: + self.export_typeinfo_cmt(outbuf[:-1]) + self.export_stack_frame(function) + self.end_element(FUNCTION) + self.end_element(FUNCTIONS) + self.display_cpu_time(timer) + + + def export_manual_instruction(self, addr): + """ + Exports user-entered "manual instruction" at an address. + + Args: + addr: Integer representing instruction address. + """ + text = idc.get_manual_insn(addr) + if text is None or len(text) == 0: + return + self.start_element(MANUAL_INSTRUCTION) + self.write_address_attribute(ADDRESS, addr) + self.close_tag(True) + self.write_text(text) + self.end_element(MANUAL_INSTRUCTION, False) + + + def export_manual_operand(self, addr): + """ + Exports user-entered "manual operands" at an address. + + Args: + addr: Integer representing instruction address. + """ + for op in range(ida_ida.UA_MAXOP): + if ida_bytes.is_forced_operand(addr, op): + text = idc.get_forced_operand(addr, op) + if text is not None and len(text) > 0: + self.start_element(MANUAL_OPERAND) + self.write_address_attribute(ADDRESS, addr) + self.write_numeric_attribute(OPERAND_INDEX, op, 10) + self.close_tag(True) + self.write_text(text) + self.end_element(MANUAL_OPERAND, False) + + + def export_markup(self): + """ + Exports markup for instructions and data items including references + and manual instructions and operands. + """ + self.update_status(MARKUP) + timer = time.clock() + self.start_element(MARKUP, True) + addr = self.min_ea + while addr != BADADDR: + f = idc.get_full_flags(addr) + if self.options.MemoryReferences.checked: + if ida_bytes.has_xref(f): + self.export_user_memory_reference(addr) + if ida_bytes.is_off(f, ida_bytes.OPND_ALL): + self.export_memory_references(addr) + if (self.options.Functions.checked and + self.options.StackReferences.checked and + ida_bytes.is_stkvar(f, ida_bytes.OPND_ALL)): + self.export_stack_reference(addr) + if (self.options.DataTypes.checked and + ida_bytes.is_enum(f, ida_bytes.OPND_ALL)): + self.export_enum_references(addr) + if self.options.Manual.checked: + # TODO: Ask about OPND_ALL and retrieving additional manual operands + #if ida_bytes.is_forced_operand(addr, ida_bytes.OPND_ALL): + if (ida_bytes.is_forced_operand(addr, 0) or + ida_bytes.is_forced_operand(addr, 1)): + self.export_manual_operand(addr) + if ida_bytes.is_manual_insn(addr): + self.export_manual_instruction(addr) + addr = idc.next_head(addr, self.max_ea) + self.end_element(MARKUP) + self.display_cpu_time(timer) + + + def export_members(self, s: ida_typeinf.tinfo_t): + """ + Exports the members of a structure or union. + + Args: + s: IDA structure/union instance + """ + nmembers = s.get_udt_nmembers() + for n in range(nmembers): + m = get_member_by_idx(s.get_tid(), n) + if m is None: + continue + offset = m.offset//8 + if s.is_union(): + offset = 0 + self.start_element(MEMBER) + self.write_numeric_attribute(OFFSET, offset) + mname = m.name + if len(mname) > 0: + self.write_attribute(NAME, mname) + dtype = self.get_member_type(m) + if m.type.is_varmember(): + msize = 0 + size = 0 + else: + mtibuf = ida_nalt.opinfo_t() + mti, size, _, mtibuf, _ = ida_typeinf.get_idainfo_by_type(m.type) + #if IDA_SDK_VERSION < 640: + # msize = idaapi.get_type_size0(None, dtype) + # if msize is None or msize == 0: + # msize = ida_struct.get_member_size(m) + #else: + #msize = idaapi.get_data_type_size(m.flag, mtibuf) + msize = m.type.get_array_element().get_size() if m.type.is_array() else size + msize = ida_typeinf.BADSIZE if msize is None else msize + if size < msize: size = msize + if (size != msize): + arraytype = self.get_member_type(m) + dtype = "%s[%d]" % (arraytype, size//msize) + self.write_attribute(DATATYPE, dtype) + self.write_numeric_attribute(SIZE, size*self.cbsize) + regcmt = m.cmt if m.is_regcmt() else None + rptcmt = m.cmt if not m.is_regcmt() else None + hascmt = regcmt is not None or rptcmt is not None + self.close_tag(hascmt) + if (hascmt): + if regcmt is not None: + self.export_regular_cmt(regcmt) + if rptcmt is not None: + self.export_repeatable_cmt(rptcmt) + self.end_element(MEMBER) + + + def export_memory_contents(self, binfilename, binfile, start, end): + """ + Exports the binary memory contents in the database. + + A MEMORY_CONTENTS element is generated for each contiguous address + range where each address in the range contains a value. + The binary values are store in a separate file (not the XML file), + and the MEMORY_CONTENTS element identifies the file and the + offset in the file where the address range is located. + + Args: + binfilename: String containing the absolute filepath + binfile: IDA file instance for binary file + start: Integer representing the starting address + end: Integer representing the ending address + """ + length = 0 + startaddr = start + for addr in range(start, end): + # reset start address when length == 0 + if (length == 0): + startaddr = addr + has_val = ida_bytes.has_value(idc.get_full_flags(addr)) + if has_val: + length += self.cbsize + next_address = idc.next_addr(addr) + if ((not has_val) or (next_address != addr+1) or + (next_address == end)): + if length > 0: + offset = binfile.tell() + ida_loader.base2file(binfile.get_fp(), offset, startaddr, + startaddr+length) + self.start_element(MEMORY_CONTENTS) + self.write_address_attribute(START_ADDR, startaddr) + self.write_attribute(FILE_NAME, binfilename) + self.write_numeric_attribute(FILE_OFFSET, offset) + self.write_numeric_attribute(LENGTH, length) + self.close_tag(False) + length=0 + + + def export_memory_map(self): + """ + Exports information about all memory blocks in the database. + + A MEMORY_SECTION is generated for each block (segment). If the + memory block is initialized (has values), the contents are exported + using the MEMORY_CONTENTS element. + """ + nsegs = ida_segment.get_segm_qty() + if (nsegs == 0): + return + self.update_status(MEMORY_MAP) + timer = time.clock() + binfilename = '' + if (self.options.MemoryContent.checked): + (binfilename, ext) = os.path.splitext(self.filename) + binfilename += ".bytes" + self.binfile = ida_fpro.qfile_t() + self.binfile.open(binfilename,'wb') + self.start_element(MEMORY_MAP, True) + for i in range(nsegs): + self.export_memory_section(ida_segment.getnseg(i), binfilename) + self.end_element(MEMORY_MAP) + if (self.options.MemoryContent.checked): + self.close_binfile() + self.display_cpu_time(timer) + + + def export_memory_reference(self, addr, op): + """ + Exports the memory reference for operand at the address. + + Args: + addr: Integer representing the instruction address. + op: Integer representing the operand index (0-based) + """ + f = idc.get_full_flags(addr) + ri = ida_nalt.refinfo_t() + if ida_nalt.get_refinfo(ri, addr, op) == 1: + if ri.target != BADADDR: + target = ri.target + elif idc.is_code(f): + insn = ida_ua.insn_t() + ida_ua.decode_insn(insn, addr) + target = (insn.ops[op].value - ri.tdelta + ri.base) & ((1 << 64) - 1) + elif idc.is_data(f): + target = (self.get_data_value(addr) - ri.tdelta + ri.base) & ((1 << 64) - 1) + else: + return + else: + return + if not ida_bytes.is_mapped(target): + return + self.start_element(MEMORY_REFERENCE) + self.write_address_attribute(ADDRESS, addr) + self.write_numeric_attribute(OPERAND_INDEX, op, 10) + self.write_address_attribute(TO_ADDRESS, target) + self.write_attribute(PRIMARY, "y") + self.close_tag() + + + def export_memory_references(self, addr): + """ + Exports the memory references for any operands at the address. + + Args: + addr: Integer representing the instruction address. + """ + f = idc.get_full_flags(addr) + for op in range(ida_ida.UA_MAXOP): + if ida_bytes.is_off(f, op) and (idc.is_data(f) or + (idc.is_code(f) and + self.is_imm_op(addr, op))): + self.export_memory_reference(addr, op) + + + def export_memory_section(self, seg, binfilename): + """ + Exports segment information as a MEMORY_SECTIONS element. + + Args: + seg: IDA segment instance + binfilename: String containing absolute filepath for binary file. + """ + segname = ida_segment.get_segm_name(seg) + self.start_element(MEMORY_SECTION) + self.write_attribute(NAME, segname) + self.write_address_attribute(START_ADDR, seg.start_ea) + length = (seg.end_ea - seg.start_ea)*self.cbsize + self.write_numeric_attribute(LENGTH, length) + perms = "" + if (seg.perm != 0): + if (seg.perm & ida_segment.SEGPERM_READ != 0): + perms += 'r' + if (seg.perm & ida_segment.SEGPERM_WRITE != 0): + perms += 'w' + if (seg.perm & ida_segment.SEGPERM_EXEC != 0): + perms += 'x' + if (len(perms) > 0): + self.write_attribute(PERMISSIONS, perms) + has_contents = (self.options.MemoryContent.checked and + self.check_if_seg_contents(seg)) + self.close_tag(has_contents) + if (has_contents): + self.export_memory_contents(os.path.basename(binfilename), + self.binfile, seg.start_ea, seg.end_ea) + self.end_element(MEMORY_SECTION) + + + def export_program(self): + """ + Exports basic information about the program as the PROGRAM, + INFO_SOURCE, PROCESSOR, and COMPILER elements. + """ + # output the PROGRAM element + self.update_status(PROGRAM) + timer = time.clock() + self.start_element(PROGRAM) + self.write_attribute(NAME, idc.get_root_filename()) + self.write_attribute(EXE_PATH, idc.get_input_file_path()) + etype = ida_loader.get_file_type_name() + if (len(etype) > 0): + self.write_attribute(EXE_FORMAT, etype) + # check for presence of INPUT_MD5 netnode + md5 = ida_netnode.netnode(INPUT_MD5) + if md5 == BADNODE: + input_md5 = idc.retrieve_input_file_md5() + else: + input_md5 = md5.supval(ida_nalt.RIDX_MD5) + if input_md5 is not None: + self.write_attribute(INPUT_MD5,input_md5) + self.close_tag(True) + + # output the INFO_SOURCE element + self.start_element(INFO_SOURCE) + tool = 'IDA-Pro ' + ida_kernwin.get_kernel_version() + tool += ' XML plugin v' + IDAXML_VERSION + ' (Python) SDK ' + str(IDA_SDK_VERSION) + self.write_attribute(TOOL, tool) + user = os.getenv("USERNAME", "UNKNOWN") + if (user == "UNKNOWN"): + user = os.getenv("USER", "UNKNOWN") + self.write_attribute(USER, user) + self.write_attribute(FILE, idc.get_idb_path()) + ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + self.write_attribute(TIMESTAMP, ts) + self.close_tag() + + # output the PROCESSOR element + self.start_element(PROCESSOR) + self.write_attribute(NAME, ida_ida.inf_get_procname()) + if ida_ida.inf_is_be(): + byte_order = "big" + else: + byte_order = "little" + self.write_attribute(ENDIAN, byte_order) + self.seg_addr = False + bitness = 1 + model_warning = False + nsegs = ida_segment.get_segm_qty() + if (nsegs > 0): + bitness = ida_segment.getnseg(0).bitness + for i in range(1,nsegs): + seg = ida_segment.getnseg(i) + if (seg.bitness != bitness): + model_warning = True + if (seg.bitness > bitness): + bitness = seg.bitness + addr_model = "32-bit" + if (bitness == 0): + addr_model = "16-bit" + elif (bitness == 2): + addr_model = "64-bit" + self.write_attribute(ADDRESS_MODEL, addr_model) + self.close_tag() + if (model_warning): + idc.msg("WARNING: Segments do not have same " + + "addressing model!\n") + if (ida_idp.ph.id == ida_idp.PLFM_386 and bitness == 0): + self.seg_addr = True + # find any overlayed memory before processing addressable items + self.find_overlay_memory() + + # output compiler info + self.start_element(COMPILER) + self.write_attribute(NAME, ida_typeinf.get_compiler_name(ida_ida.inf_get_cc_id())) + self.close_tag() + self.display_cpu_time(timer) + + + def export_program_entry_points(self): + """ + Exports entry points for the program. + """ + nepts = idc.get_entry_qty() + if (nepts == 0): + return + self.update_status(PROGRAM_ENTRY_POINTS) + timer = time.clock() + self.start_element(PROGRAM_ENTRY_POINTS, True) + for i in range(nepts): + self.start_element(PROGRAM_ENTRY_POINT) + addr = idc.get_entry(idc.get_entry_ordinal(i)) + self.write_address_attribute(ADDRESS, addr) + self.close_tag() + self.end_element(PROGRAM_ENTRY_POINTS) + self.display_cpu_time(timer) + + + def export_register_values(self): + """ + Exports segment register value ranges. + """ + first = ida_idp.ph_get_reg_first_sreg() + last = ida_idp.ph_get_reg_last_sreg() + 1 + has_segregareas = False + for j in range(first, last): + nsegregareas = ida_segregs.get_sreg_ranges_qty(j) + if nsegregareas != 0: + has_segregareas = True + break + if not has_segregareas: + return + self.update_status(REGISTER_VALUES) + timer = time.clock() + self.start_element(REGISTER_VALUES, True) + sr = ida_segregs.sreg_range_t() + for j in range(first, last): + nsegregareas = ida_segregs.get_sreg_ranges_qty(j) + if nsegregareas == 0: + continue + for i in range(nsegregareas): + success = ida_segregs.getn_sreg_range(sr, j, i) + if not success: + continue + value = sr.val + if value == idc.BADSEL: + continue + regname = ida_idp.ph.regnames[j] + if regname is None: + continue + if regname.lower() == "cs": + continue + if (ida_idp.ph.id == ida_idp.PLFM_TMS and + regname.lower() == "ds"): + continue + self.start_element(REGISTER_VALUE_RANGE) + self.write_attribute(REGISTER, ida_idp.ph.regnames[j]) + self.write_numeric_attribute(VALUE, value) + self.write_address_attribute(START_ADDRESS, sr.start_ea) + length = (sr.end_ea - sr.start_ea) * self.cbsize + self.write_numeric_attribute(LENGTH, length) + self.close_tag() + self.end_element(REGISTER_VALUES) + self.display_cpu_time(timer) + + + def export_regular_cmt(self, cmt: str) -> None: + """ + Exports the regular comment for an item. + + Args: + cmt: String containing the regular comment. + """ + self.write_comment_element(REGULAR_CMT, cmt) + + + def export_repeatable_cmt(self, cmt: str) -> None: + """ + Exports the repeatable comment for an item. + + Args: + cmt: String containing the repeatable comment. + """ + self.write_comment_element(REPEATABLE_CMT, cmt) + + + def export_stack_frame(self, function: ida_funcs.func_t) -> None: + """ + Export information about a function stack frame including + variables allocated on the stack. + + Args: + function: IDA function instance + """ + sframe = get_struc(function.frame) + if sframe is None or sframe.get_udt_nmembers() <= 0: + return + self.start_element(STACK_FRAME) + self.write_numeric_attribute(LOCAL_VAR_SIZE, function.frsize) + self.write_numeric_attribute(REGISTER_SAVE_SIZE, function.frregs) + retsize = ida_frame.get_frame_retsize(function) + self.write_numeric_attribute(RETURN_ADDR_SIZE, retsize) + self.write_numeric_attribute(BYTES_PURGED, function.argsize) + has_stack_vars = self.check_stack_frame(sframe) + self.close_tag(has_stack_vars) + if has_stack_vars: + self.export_stack_vars(function, sframe) + self.end_element(STACK_FRAME) + + + def export_stack_reference(self, addr): + """ + Exports references to stack variables at the address. + + Args: + addr: Integer containing instruction address. + """ + f = idc.get_full_flags(addr) + for op in range(ida_ida.UA_MAXOP): + if idc.is_code(f) and ida_bytes.is_stkvar(f, op): + insn = ida_ua.insn_t() + ida_ua.decode_insn(insn, addr) + opnd = insn.ops[op] + # TODO:How to handle opnd.type for stack references + optype = opnd.type + if optype == idc.o_void: + continue + # TODO:How to handle op_t_get_addr for stack references + svidx = ida_typeinf.tinfo_t().get_stkvar(insn, opnd, opnd.value) + if svidx == -1: + continue + function = ida_funcs.get_func(addr) + self.start_element(STACK_REFERENCE) + self.write_address_attribute(ADDRESS, addr) + self.write_numeric_attribute(OPERAND_INDEX, op, 10) + offset = opnd.addr + spoff = offset - function.frregs + if offset > 0x7FFFFFFF: + offset -= 0x100000000 + if spoff > 0x7FFFFFFF: + spoff -= 0x100000000 + self.write_numeric_attribute(STACK_PTR_OFFSET, spoff, + 16, True) + if (function.flags & idc.FUNC_FRAME) != 0: + self.write_numeric_attribute(FRAME_PTR_OFFSET, + offset, 16, True) + self.close_tag() + + + def export_stack_vars(self, function: ida_funcs.func_t, sframe: ida_typeinf.tinfo_t): + """ + Exports the stack variables (parameters and locals) in a stack frame. + + Args: + function: IDA function instance. + sframe: IDA stack frame instance. + """ + for i in range(sframe.get_udt_nmembers()): + member = get_member_by_idx(sframe.get_tid(), i) + if member is None: + continue + mname = member.name + if mname is None or len(mname) < 0: + continue + if mname == " s" or mname == " r": + continue + spoff = member.offset//8 - function.frsize - function.frregs + froff = member.offset//8 - function.frsize + self.start_element(STACK_VAR) + self.write_numeric_attribute(STACK_PTR_OFFSET, spoff, 16, True) + if function.flags & idc.FUNC_FRAME != 0: + self.write_numeric_attribute(FRAME_PTR_OFFSET, froff, 16, True) + pre = mname[0:4] + if pre != "var_" and pre != "arg_": + self.write_attribute(NAME, mname) + _, size, f, _, _ = ida_typeinf.get_idainfo_by_type(member.type) + mtype = self.get_member_type(member) + msize = size + if idc.is_struct(f): + msize = idc.get_struc_id(mtype) + elif not idc.is_strlit(f): + mtibuf = ida_nalt.opinfo_t() + mti, _, _, mtibuf, _ = ida_typeinf.get_idainfo_by_type(member.type) + #msize = idaapi.get_data_type_size(f, mtibuf) + msize = member.type.get_array_element().get_size() if member.type.is_array() else size + msize = ida_typeinf.BADSIZE if msize is None else msize + if size < msize: size = msize + if (not idc.is_strlit(f) and not ida_bytes.is_align(f) + and size != msize): + mtype = "%s[%d]" % (mtype, size//msize) + self.write_attribute(DATATYPE, mtype) + self.write_numeric_attribute(SIZE, size*self.cbsize) + regcmt = member.cmt if member.is_regcmt() else None + rptcmt = member.cmt if not member.is_regcmt() else None + if regcmt is not None: + regcmt = ida_lines.tag_remove(regcmt + " ") + if rptcmt is not None: + rptcmt = ida_lines.tag_remove(rptcmt + " ") + has_regcmt = regcmt is not None and len(regcmt) > 0 + has_rptcmt = rptcmt is not None and len(rptcmt) > 0 + has_content = has_regcmt or has_rptcmt + self.close_tag(has_content) + if has_content: + if has_regcmt: + self.export_regular_cmt(regcmt) + if has_rptcmt: + self.export_repeatable_cmt(rptcmt) + self.end_element(STACK_VAR) + + + def export_structures(self): + """ + Exports information about all structures and unions. + """ + structs = idautils.Structs() + for struct in structs: + (oridinal, sid, sname) = struct + if sname is None: + # Skip unnamed structs. Alternatively the exporter could + # generate a unique temporary name. + continue + s = get_struc(sid) + if s is None: + continue + stype = UNION if s.is_union() else STRUCTURE + self.start_element(stype) + self.write_attribute(NAME, sname) + size = idc.get_struc_size(sid)*self.cbsize + self.write_numeric_attribute(SIZE, size) + if s.is_varstruct(): + self.write_attribute(VARIABLE_LENGTH, "y") + regcmt = s.get_type_cmt() + rptcmt = s.get_type_rptcmt() + has_contents = regcmt is not None or rptcmt is not None or s.get_udt_nmembers() > 0 + self.close_tag(has_contents) + if (has_contents): + if regcmt is not None: + self.export_regular_cmt(regcmt) + if rptcmt is not None: + self.export_repeatable_cmt(rptcmt) + if s.get_udt_nmembers() > 0: + self.export_members(s) + self.end_element(stype) + + + def export_symbol(self, addr: int, name: str, stype: str="") -> None: + """ + Exports name for an address as a SYMBOL element. If the name is a + demangled name, add the mangled name as the MANGLED attribute. + + Args: + addr: Integer representing the symbol address. + name: String containing the symbol name. + stype: String indicating symbol type (global or local) + """ + self.start_element(SYMBOL) + self.write_address_attribute(ADDRESS, addr) + self.write_attribute(NAME, name) + self.write_attribute(TYPE, stype) + mangled = idc.get_name(addr, idc.GN_STRICT) + if name is not None and mangled != name: + self.write_attribute("MANGLED", mangled) + self.close_tag() + + + def export_symbol_table(self): + """ + Exports user-defined and non-default names as SYMBOL elements. + """ + addr = self.min_ea + if not ida_bytes.has_any_name(idc.get_full_flags(addr)): + addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_any_name) + if addr == BADADDR: + return + self.update_status(SYMBOL_TABLE) + self.start_element(SYMBOL_TABLE, True) + timer = time.clock() + while addr != BADADDR: + # only export meaningful names (user and auto) + f = idc.get_full_flags(addr) + if (ida_bytes.has_user_name(f) or + ida_bytes.has_auto_name(f)): + # check for global name + name = self.get_symbol_name(addr) + if name is not None and len(name) > 0: + self.export_symbol(addr, name) + # check for local name + if ida_nalt.has_lname(addr): + name = idc.get_name(addr, idc.GN_LOCAL) + if name is not None and len(name) > 0: + self.export_symbol(addr, name, 'local') + # get next address with any name + addr = ida_bytes.next_that(addr, self.max_ea, + ida_bytes.has_any_name) + self.end_element(SYMBOL_TABLE) + self.display_cpu_time(timer) + + + def export_typeinfo_cmt(self, cmt): + """ + Exports comment containing type information for data and functions. + + Args: + cmt: String containing type info. + """ + # older versions of IDAPython returned a '\n' at end of cmt + if(len(cmt) > 0): + while cmt[-1] == '\n': + cmt = cmt[:-1] + + self.write_comment_element(TYPEINFO_CMT, cmt) + + + def export_user_memory_reference(self, addr): + """ + Exports a user-specified memory reference at the address. + + Args: + addr: Integer representing the instruction address. + """ + for xref in idautils.XrefsTo(addr, ida_xref.XREF_FAR): + if xref.user == 1: + self.start_element(MEMORY_REFERENCE) + self.write_address_attribute(ADDRESS, xref.frm) + self.write_address_attribute(TO_ADDRESS, xref.to) + self.write_attribute(USER_DEFINED, "y") + self.close_tag() + + + def find_overlay_memory(self) -> None: + """ + Determines if any memory blocks (segments) are overlays. + + A segment is an overlay if it translates to the same logical + address as another segment. This is rare, but may occur, for + example when a processor has a small logical address space + (i.e. a 16-bit address is limited to 64K) and multiple physical + segments are mapped into the same logical segment. + """ + self.overlay = dict() + self.has_overlays = False + nsegs = ida_segment.get_segm_qty() + if nsegs == 0: + return + s = ida_segment.getnseg(0) + start = self.translate_address(s.start_ea) + self.overlay[start] = False + for i in range(1, nsegs): + s = ida_segment.getnseg(i) + space = self.get_space_name(s.start_ea) + saddr = self.translate_address(s.start_ea) + eaddr = self.translate_address(s.end_ea-1) + is_overlay = False + for j in range(i): + s2 = ida_segment.getnseg(j) + space2 = self.get_space_name(s2.start_ea) + if space == space2: + start = self.translate_address(s2.start_ea) + end = self.translate_address(s2.end_ea - 1) + if ((saddr >= start and saddr <= end) or + (eaddr >= start and eaddr <= end)): + is_overlay = True + self.has_overlays = True + break + self.overlay[saddr] = is_overlay + + + def get_address_string(self, addr: int) -> str: + """ + Returns a string representing the address. + + The representation is typically a hex string of the address, + but may include a segment or space name prefixe based on the + processor or architecture. + + Args: + addr: Integer representing a program address. + """ + temp = "0x%X" % (addr - ida_segment.get_segm_base(ida_segment.getseg(addr))) + space = self.get_space_name(addr) + if space is not None: + temp = "%s:%04X" % (space, + addr - ida_segment.get_segm_base(ida_segment.getseg(addr))) + else: + if (ida_idp.ph_get_id() == ida_idp.PLFM_386 and + ida_segment.getseg(addr).bitness == 0): + base = ida_segment.get_segm_para(ida_segment.getseg(addr)) + temp = "%04X:%04X" % (base, addr - (base << 4)) + if ida_idp.ph_get_id() == ida_idp.PLFM_C166: + temp = "0x%X" % addr + if self.has_overlays and self.is_overlay(addr): + oname = ida_segment.get_segm_name(ida_segment.getseg(addr)) + if len(oname) > 0: + temp = oname + "::" + temp + return temp + + + def get_data_value(self, addr) -> int: + """ + Returns the data item value at an address based on its size. + + Args: + addr: Integer representing a program address. + """ + size = idc.get_item_size(addr)*self.cbsize + if size == 1: return ida_bytes.get_byte(addr) + if size == 2: return ida_bytes.get_16bit(addr) + if size == 4: return ida_bytes.get_32bit(addr) + if size == 8: return ida_bytes.get_64bit(addr) + return 0 + + + def get_datatype(self, addr: int) -> str: + """ + Returns the datatype at an address. + + The type could be a basic type (byte, word, dword, etc.), + a structure, an array, a pointer, or a string type. + + Args: + addr: Integer representing a program address. + """ + f = idc.get_full_flags(addr) + t = self.get_type(f) + if ida_bytes.is_struct(f): + opndbuf = ida_nalt.opinfo_t() + opnd = ida_bytes.get_opinfo(opndbuf, addr, 0, f) + return idc.get_struc_name(opnd.tid) + if idc.is_strlit(f): + str_type = idc.get_str_type(addr) + #print(ida_bytes.print_strlit_type(str_type)) + if str_type == ida_nalt.STRTYPE_TERMCHR: return "string" + if str_type == ida_nalt.STRTYPE_PASCAL: return "string1" + if str_type == ida_nalt.STRTYPE_LEN2: return "string2" + if str_type == ida_nalt.STRTYPE_LEN4: return "string4" + if str_type == ida_nalt.STRTYPE_C_16: return "unicode" + if str_type == ida_nalt.STRTYPE_C_16: return "unicode2" + if str_type == ida_nalt.STRTYPE_C_32: return "unicode4" + return "string" + if ida_bytes.is_off0(f): return "pointer" + return t + + + def get_format(self, flags: int) -> str: + """ + Returns the display format of a data item based on its flags. + + Args: + flags: Integer representing IDA item flags + + Returns: + String representing IDA display format. + """ + if ida_bytes.is_char0(flags): return "char" + radix = ida_bytes.get_radix(flags, 0) + if radix == 2: return "binary" + if radix == 8: return "octal" + if radix == 10: return "decimal" + return "hex" # default + + + def get_member_type(self, m: ida_typeinf.udm_t) -> str: + """ + Returns the datatype of a structure member. + + Args: + m: IDA member instance. + + Returns: + String representing member datatype. + """ + _, _, f, _, _ = ida_typeinf.get_idainfo_by_type(m.type) + t = self.get_type(f) + if ida_bytes.is_off0(f): + t = "pointer" + if not ida_bytes.is_struct(f): + return t + s = get_sptr(m) + if (s is None): + return t + sname = idc.get_struc_name(s.get_tid()) + if (sname is None): + return t + return sname + + + def get_options(self): + """ + Displays the options menu and retrieves the option settings. + """ + fmt = "HELP\n" + fmt += "XML plugin (Python)\n" + fmt += "IDA SDK: "+ str(IDA_SDK_VERSION) + "\n" + fmt += "\n" + fmt += "The XML interface provides a dump of the IDA-Pro database as " + fmt += "a XML \"PROGRAM\" document. The XML PROGRAM document contains " + fmt += "information from the idb file in a readable text format, and " + fmt += "can be viewed with a text editor or web browser.\n\n" + fmt += "ENDHELP\n" + fmt += "Export as XML PROGRAM document...." + fmt += "\n <##Options##Memory Sections:{MemorySections}>" + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n {cGroup1}>" + fmt += "\n\n" + + Opts = { 'cGroup1': ida_kernwin.Form.ChkGroupControl (( + "MemorySections", + "MemoryContent", + "RegisterValues", + "DataTypes", + "CodeBlocks", + "DataDefinitions", + "Comments", + "EntryPoints", + "Symbols", + "Functions", + "MemoryReferences", + "StackReferences", + "Manual" + ))} + + self.options = ida_kernwin.Form(fmt, Opts) + self.options.Compile() + + self.options.MemorySections.checked = True + self.options.MemoryContent.checked = True + self.options.DataTypes.checked = True + self.options.RegisterValues.checked = True + self.options.CodeBlocks.checked = True + self.options.DataDefinitions.checked = True + self.options.Symbols.checked = True + self.options.EntryPoints.checked = True + self.options.Functions.checked = True + self.options.Comments.checked = True + self.options.MemoryReferences.checked = True + self.options.StackReferences.checked = False + self.options.Manual.checked = True + + if (not self.autorun): + ok = self.options.Execute() + if (ok == 0): + raise Cancelled + + + def get_space_name(self, addr): + """ + Returns the memory space name associated with an address. + + Args: + addr: Integer representing a program address. + + Returns: + String containg the memory space name. + None if single address space architecture. + + Used for Harvard architectures (Intel 8051 and TMS, add others + as needed). + """ + pid = ida_idp.ph_get_id() + stype = ida_segment.segtype(addr) + if pid == ida_idp.PLFM_8051: + if stype == idc.SEG_CODE: + return "CODE" + else: + if stype == idc.SEG_IMEM: + iaddr = addr - ida_segment.get_segm_base(ida_segment.getseg(addr)) + if iaddr < 0x80: + return "INTMEM" + else: + return "SFR" + else: + return "EXTMEM" + if pid == ida_idp.PLFM_TMS: + if stype == idc.SEG_CODE: + return "CODE" + else: + return "DATA" + return None + + + def get_symbol_name(self, ea): + """ + Returns the symbol name for the address. + + Args: + ea: Integer representing the symbol address. + + Returns: + String containing the symbol name. + + The demangled name will be returned if it exists, otherwise the + displayed name is returned. Spaces (' ') will be replaced with '_'. + """ + name = ida_name.get_demangled_name(ea, DEMANGLED_FORM, + ida_ida.inf_get_demnames(), idc.GN_STRICT) + if name is None or len(name) == 0 or name == "'string'": + name = idc.get_name(ea) + if name is not None: + name = name.replace(" ","_") + return name + + + def get_type(self, flags: int) -> str: + """ + Returns a datatype string based on the item flags. + + Args: + flags: IDA item flags. + + Returns: + String representing item datatype. + """ + if (self.cbsize == 2): + if ida_bytes.is_byte(flags) : return "word" + if ida_bytes.is_word(flags) : return "dword" + if ida_bytes.is_byte(flags) : return "byte" + if ida_bytes.is_word(flags) : return "word" + if ida_bytes.is_dword(flags) : return "dword" + if ida_bytes.is_qword(flags) : return "qword" + if ida_bytes.is_oword(flags) : return "oword" + if ida_bytes.is_tbyte(flags) : return "tbyte" + if ida_bytes.is_float(flags) : return "float" + if ida_bytes.is_double(flags) : return "double" + if ida_bytes.is_pack_real(flags): return "packed" + if idc.is_strlit(flags) : return "ascii" + if ida_bytes.is_struct(flags) : return "structure" + if ida_bytes.is_align(flags) : return "align" + return "unknown" + + + def is_imm_op(self, addr, op): + """ + Returns true if instruction operand at address is an immediate value. + + Args: + addr: Integer representing instruction address. + op: Integer representing operand index (0-based). + + Returns: + True if instruction operand at address is an immediate value. + False otherwise. + """ + insn = ida_ua.insn_t() + ida_ua.decode_insn(insn, addr) + return insn.ops[op].type == idc.o_imm + + + def is_overlay(self, addr): + """ + Checks if memory block (segment) is an overlay. + + Args: + addr: Integer representing a program address. + + Returns: + True if memory block (segment) is an overlay. + """ + if ida_idp.ph_get_id() == ida_idp.PLFM_C166: + return False + s = ida_segment.getseg(addr) + if s.startEA in self.overlay: + return self.overlay[s.startEA] + return False + + + def is_signed_data(self, flags: int) -> bool: + return (flags & ida_bytes.FF_SIGN) != 0 + + + def start_element(self, tag, close=False): + """ + Outputs the start of a new element on a new indented line. + + Args: + tag: String representing the element tag + close: Boolean indicating if tag is should be closed. + """ + if ida_kernwin.user_cancelled(): + raise Cancelled + self.write_to_xmlfile("\n" + (" " * self.indent_level) + "<" + tag) + if (close): + self.close_tag(True) + self.update_counter(tag) + + + def translate_address(self, addr): + """ + Returns the translated logical address. + + The logical address is adjusted for the segment base address. + For 16-bit segmented memory, return the 20-bit address. + + Args: + addr: Integer representing a program address. + + Returns: + Integer representing the logical address. + """ + if not self.seg_addr: + return addr - ida_segment.get_segm_base(ida_segment.getseg(addr)) + base = ida_segment.get_segm_para(ida_segment.getseg(addr)) + return (base << 16) + (addr - (base << 4)) + + + def write_address_attribute(self, name, addr): + """ + Outputs an address attribute for an element. + + Args: + name: String representing attribute name. + addr: Integer representing a program address. + """ + self.write_attribute(name, self.get_address_string(addr)) + + + def write_attribute(self, name, value): + """ + Outputs an attribute (name and value) for an element. + + Args: + name: String representing attribute name. + value: String representing attribute value. + """ + if name is None or value is None: + return + if (len(name) == 0) or (len(value) == 0): + return + attr = " " + name + '="' + self.check_for_entities(value) + '"' + self.write_to_xmlfile(attr) + + + def write_comment_element(self, name, cmt): + """ + Outputs the tag and text for a comment element. + Comment elements can be REGULAR_CMT, REPEATABLE_CMT, or TYPEINFO_CMT. + + Args: + name: String representing the comment element name. + cmt: String containing the comment. + """ + self.start_element(name, True) + self.write_text(cmt) + self.end_element(name, False) + + + def write_numeric_attribute(self, name, value, base=16, signedhex=False): + """ + Outputs a numeric value attribute (name and value) for an element. + + Args: + name: String representing the attribute name. + value: Integer representing the attribute value. + base: Integer representing numeric base to use for value. + signedhex: Boolean indicating if hex representation of + value is signed. + """ + if base == 10: + temp = "%d" % value + else: + if signedhex and value < 0: + temp = "-0x%X" % abs(value) + else: + temp = "0x%X" % value + self.write_attribute(name, temp) + + + def write_text(self, text): + """ + Outputs the parsed character text for an element. + The text is checked for special characters. + + Args: + text: String representing the element text. + """ + self.write_to_xmlfile(self.check_for_entities(text)) + + + def write_to_xmlfile(self, buf): + """ + Writes the buffer to the XML file. + + Args: + buf: String containg data to write to XML file. + """ + self.xmlfile.write(buf) + self.dbg(buf) + + + def write_xml_declaration(self): + """ + Writes the XML Declarations at the start of the XML file. + """ + self.dbg("\n") + xml_declaration = "" + xml_declaration += "\n\n" + self.write_to_xmlfile(xml_declaration) + + +class XmlImporter(IdaXml): + """ + XmlImporter class contains methods to import an XML PROGRAM + document into IDA. + """ + def __init__(self, as_plugin, arg=0): + """ + Initializes the XmlImporter attributes + + Args: + as_plugin: + debug: + """ + IdaXml.__init__(self, arg) + self.plugin = as_plugin + self.timers = dict() + self.addr_mode = 1 + self.create = True + self.dataseg = None + self.deferred = [] + self.callbacks = { + 'start' : { + BOOKMARKS : self.update_import, + CODE : self.update_import, + COMMENTS : self.update_import, + COMPILER : self.import_compiler, + DATA : self.update_import, + DATATYPES : self.update_import, + EQUATES : self.update_import, + FUNCTIONS : self.update_import, + INFO_SOURCE : self.import_info_source, + MARKUP : self.update_import, + MEMORY_MAP : self.import_memory_map, + PROCESSOR : self.import_processor, + PROGRAM : self.import_program, + PROGRAM_ENTRY_POINTS: self.update_import, + REGISTER_VALUES : self.update_import, + SYMBOL_TABLE : self.update_import }, + 'end' : { + BOOKMARK : self.import_bookmark, + CODE_BLOCK : self.import_codeblock, + COMMENT : self.import_comment, + DEFINED_DATA : self.import_defined_data, + DESCRIPTION : self.import_description, + ENUM : self.import_enum, + EQUATE_GROUP : self.import_equate_group, + EQUATE_REFERENCE : self.import_equate_reference, + FUNCTION : self.import_function, + FUNCTION_DEF : self.import_function_def, + MANUAL_INSTRUCTION : self.import_manual_instruction, + MANUAL_OPERAND : self.import_manual_operand, + MEMORY_REFERENCE : self.import_memory_reference, + MEMORY_SECTION : self.import_memory_section, + PROGRAM_ENTRY_POINT : self.import_program_entry_point, + REGISTER_VALUE_RANGE: self.import_register_value_range, + STACK_REFERENCE : self.import_stack_reference, + STRUCTURE : self.import_structure, + SYMBOL : self.import_symbol, + TYPE_DEF : self.import_typedef, + UNION : self.import_union, + # end element for elapse time + BOOKMARKS : self.display_timer, + CODE : self.display_timer, + COMMENTS : self.display_timer, + DATA : self.display_timer, + DATATYPES : self.process_deferred, + EQUATES : self.display_timer, + FUNCTIONS : self.display_timer, + MARKUP : self.display_timer, + MEMORY_MAP : self.display_timer, + PROGRAM : self.display_total_time, + PROGRAM_ENTRY_POINTS: self.display_timer, + REGISTER_VALUES : self.display_timer, + SYMBOL_TABLE : self.display_timer } + } + + + def import_xml(self): + """ + Imports the XML PROGRAM file into the database. + """ + global event, element + self.display_version('Importer' if self.plugin else 'Loader') + displayMenu = not self.autorun + self.get_options(displayMenu) + if self.plugin: + self.filename=ida_kernwin.ask_file(0, "*.xml", + "Enter name of xml file:") + else: + self.filename = idc.get_input_file_path() + if self.filename is None or len(self.filename) == 0: + return + idc.msg('\nImporting from: ' + self.filename + '\n') + if not self.plugin: + ida_kernwin.hide_wait_box() + ida_kernwin.show_wait_box("Importing XML PROGRAM document....") + n = 0 + for event,element in cElementTree.iterparse(self.filename, + events=("start","end")): + if ida_kernwin.user_cancelled(): + raise Cancelled + + if self.debug and event == 'start': + msg = '' + if element.tag is not None: + msg += str(element.tag) + ' ' + if element.attrib is not None: + msg += str(element.attrib) + ' ' + if element.text is not None: + msg += str(element.text) + if len(msg) > 0: + idc.msg('\n' + msg) + + if event in self.callbacks: + if element.tag in self.callbacks[event]: + if event == 'start': + self.timers[element.tag] = time.clock() + self.callbacks[event][element.tag](element) + if event == 'end': + element.clear() + if event == 'end': + n += 1 + end = time.clock() + ida_kernwin.hide_wait_box() + self.display_summary('Import' if self.plugin else "Load") + idc.msg('\nXML Elements parsed: ' + str(n) + '\n\n') + return 1 + + + def get_options(self, display): + """ + Displays the options menu and retrieves the option settings. + """ + fmt = "HELP\n" + fmt += "XML PROGRAM loader/importer plugin (Python)\n" + fmt += "IDA SDK: "+ str(IDA_SDK_VERSION) + "\n\n" + fmt += "The XML PROGRAM loader loads elements from a " + fmt += "XML document to create an idb database.\n\n" + fmt += "ENDHELP\n" + fmt += "Import from XML PROGRAM document...." + fmt += "\n <##Options##Code Blocks:{CodeBlocks}>" + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n " + fmt += "\n {cGroup1}>" + fmt += "\n\n" + + Opts = { 'cGroup1': ida_kernwin.Form.ChkGroupControl (( + "CodeBlocks", + "EntryPoints", + "RegisterValues", + "DataTypes", + "DataDefinitions", + "Symbols", + "Comments", + "Bookmarks", + "Functions", + "MemoryReferences", + "EquateReferences", + "Manual" + ))} + + self.options = ida_kernwin.Form(fmt, Opts) + self.options.Compile() + + self.options.CodeBlocks.checked = True + self.options.EntryPoints.checked = True + self.options.RegisterValues.checked = True + self.options.DataTypes.checked = True + self.options.DataDefinitions.checked = True + self.options.Symbols.checked = True + self.options.Functions.checked = True + self.options.Comments.checked = True + self.options.Bookmarks.checked = True + self.options.MemoryReferences.checked = True + self.options.EquateReferences.checked = True + self.options.Manual.checked = True + + if display: + ok = self.options.Execute() + if (ok == 0): + raise Cancelled + + + def display_timer(self, element): + """ + Displays the elapsed processing time for XML elements. + + Args: + element: XML element object value containing the element tag. + """ + if element.tag == MEMORY_MAP and self.plugin: + return + if element.tag in self.timers: + idc.msg('elapsed time: %.4f' % + (time.clock()-self.timers[element.tag])) + + + def display_total_time(self, element): + """ + Displays the total processing time. + + Args: + element: XML element object value (not used). + """ + TOTAL = 'Total ' + idc.msg('\n%35selapsed time: %.4f' % + (TOTAL,time.clock()-self.timers[PROGRAM])) + + + + def get_address(self, element, attr): + """ + Returns the address value for an element. + + Args: + element: XML element object. + attr: String containing the address attribute name. + + Returns: + Numeric value representing the address. + """ + addrstr = element.get(attr) + if '::' in addrstr: + # overlayed addresses not currently handled + return BADADDR + elif ':' in addrstr: + [segstr, offset_str] = str.split(addrstr,':') + offset = int(offset_str,16) + if self.is_int(segstr): + sgmt = int(segstr,16) + addr = (sgmt << 4) + offset + else: + # multiple address spaces not currently implemented + addr = BADADDR + return addr + else: + return int(element.get(attr), 16) + + + def get_attribute(self, element, attr): + """ + Returns the attribute value string. + + Args: + element: XML element object. + attr: String containing the attribute name. + + Returns: + String representing the attribute value. + """ + return element.get(attr) + + + def get_attribute_value(self, element, attr): + """ + Returns the numeric attribute value. + + Args: + element: XML element object. + attr: String containing the attribute name. + + Returns: + Numeric value representing the attribute value. + """ + val = element.get(attr) + try: + if val.upper().startswith('0X') or val.upper().startswith('-0X'): + return int(val, 16) + return int(val) + except Exception: + idc.msg('\nUnable to decode string as value: ' + val) + return 0 + + + def get_cbsize(self): + """ + Returns the size of the addressable codebyte for the processor. + + Returns: + Integer representing the number of 8-bit bytes in an + addressable codebyte. + """ + return (ida_idp.ph_get_cnbits()+7)//8 + + + def get_datatype_flags(self, datatype: str, size): + """ + Returns the flags bitmask for the datatype. + + Args: + datatype: String representing the datatype. + size: Integer representing the datatype size. + + Returns: + Integer representing the bitmask. + """ + if datatype.lower().startswith("byte"): return ida_bytes.byte_flag() + if datatype.lower().startswith("word"): return ida_bytes.word_flag() + if datatype.lower().startswith("dword"): return ida_bytes.dword_flag() + if datatype.lower().startswith("qword"): return ida_bytes.qword_flag() + if datatype.lower().startswith("oword"): return ida_bytes.oword_flag() + if datatype.lower().startswith("tbyte"): return ida_bytes.tbyte_flag() + if datatype.lower().startswith("float"): return ida_bytes.float_flag() + if datatype.lower().startswith("double"): return ida_bytes.double_flag() + if datatype.lower().startswith("packed"): return ida_bytes.packreal_flag() + if self.is_string_type(datatype): return ida_bytes.strlit_flag() + if self.is_enumeration(datatype): return ida_bytes.enum_flag() + if self.is_structure(datatype): return ida_bytes.stru_flag() + #if size == 4: return ida_bytes.dword_flag() + return 0 + + + def get_string_type(self, datatype: str) -> int: + if datatype.lower() == 'mbcstring': + return ida_nalt.STRTYPE_C_16 + if datatype.lower().find('unicode') != -1: + if datatype.lower().find('pascal') != -1: + return ida_nalt.STRTYPE_LEN2_16 + return ida_nalt.STRTYPE_C_16 + if datatype.lower().find('pascal') != -1: + return ida_nalt.STRTYPE_C_16 + return ida_nalt.STRTYPE_TERMCHR + + + def has_attribute(self, element, attr): + """ + Returns true if the XML element contains the named attribute. + + Args: + element: XML element object + attr: String containing name of the attribute + + Returns: + True if the element contains the named attribute, otherwise False. + """ + return attr in element.attrib + + + def is_enumeration(self, datatype: str) -> bool: + """ + Returns true if datatype is an existing enumeration in the database. + + Args: + datatype: String representing the datatype. + + Returns: + True if the datatype is an enumeration in the database, + otherwise False. + """ + return idc.get_enum(datatype) != BADNODE + + + def is_int(self, s) -> bool: + try: + int(s, 16) + return True + except Exception: + return False + + + def is_pointer_type(self, dtype) -> bool: + """ + Returns true if the datatype represents a pointer. + + Args: + dtype: String representing the datatype. + + Returns: + True if the datatype represents a pointer, otherwise False. + """ + return dtype.lower().startswith("pointer") or dtype.endswith('*') + + + def is_string_type(self, datatype) -> bool: + """ + Returns true if the datatype represents a string type. + + Args: + datatype: String representing the datatype. + + Returns: + True if the datatype represents a string, otherwise False. + """ + return datatype.lower().startswith("unicode") or datatype.lower().startswith("string") + + + def is_structure(self, datatype) -> bool: + """ + Returns true if the datatype represents a structure in the database. + + Args: + dtype: String representing the datatype. + + Returns: + True if the datatype represents an existing structure, + otherwise False. + """ + return idc.get_struc_id(datatype) != BADNODE + + + def import_address_range(self, address_range): + """ + Processes ADDRESS_RANGE element. + + Args: + address_range: XML element object containing start and end address + attributes for the address range. + + Returns: + Tuple containing two integers, the start and end address values. + """ + start = self.get_address(address_range,START) + end = self.get_address(address_range, END) + self.update_counter(ADDRESS_RANGE) + return (start, end) + + + def import_bit_mask(self, bitmask, eid): + """ + Processes a BIT_MASK element as an enum bitmask member. + + Args: + bitmask: XML element object representing the IDA enum bitmask. + eid: Integer representing the IDA enum id + """ + name = self.get_attribute(bitmask,NAME) + value = self.get_attribute_value(bitmask,VALUE) + idc.set_bmask_name(eid, value, name) + cid = idc.get_enum_member_by_name(name) + self.update_counter(BIT_MASK) + regcmt = bitmask.find(REGULAR_CMT) + if regcmt is not None: + idc.set_enum_member_cmt(cid, regcmt.text, False) + self.update_counter(BIT_MASK + ':' + REGULAR_CMT) + rptcmt = bitmask.find(REPEATABLE_CMT) + if rptcmt is not None: + idc.set_enum_member_cmt(cid, rptcmt.text, True) + self.update_counter(BIT_MASK + ':' + REPEATABLE_CMT) + + + def import_bookmark(self, bookmark): + """ + Processes a BOOKMARK element. + + Args: + bookmark: XML element object containing bookmark data. + """ + if not self.options.Bookmarks.checked: + return + try: + addr = self.get_address(bookmark, ADDRESS) + if self.has_attribute(bookmark, TYPE): + typ = self.get_attribute(bookmark, TYPE) + category = '' + if self.has_attribute(bookmark, CATEGORY): + category = self.get_attribute(bookmark, CATEGORY) + description = '' + if self.has_attribute(bookmark, DESCRIPTION): + description = self.get_attribute(bookmark, DESCRIPTION) + if not idc.is_mapped(addr): + msg = ("import_bookmark: address %X not enabled in database" + % addr) + print(msg) + return + self.update_counter(BOOKMARK) + for slot in range(ida_moves.MAX_MARK_SLOT): + ea = idc.get_bookmark(slot) + if ea == BADADDR: + idc.put_bookmark(addr, 0, 0, 0, slot, description) + break + except Exception: + msg = "** Exception occurred in import_bookmark **" + print("\n" + msg + "\n", sys.exc_type, sys.exc_value) + + + def import_cmts(self, element, sid, typ): + """ + Processes REGULAR_CMT and REPEATABLE_CMT elements for structures. + + Args: + element: XML element object containing a REGULAR_CMT or + REPEATABLE_CMT element + sid: Integer representing the structure id + typ: String indicating structure type (STRUCTURE or UNION) + """ + regcmt = element.find(REGULAR_CMT) + if regcmt is not None: + idc.set_struc_cmt(sid, regcmt.text, False) + self.update_counter(typ + ':' + REGULAR_CMT) + rptcmt = element.find(REPEATABLE_CMT) + if rptcmt is not None: + idc.set_struc_cmt(sid, rptcmt.text, True) + self.update_counter(typ + ':' + REPEATABLE_CMT) + + + def import_codeblock(self, code_block): + """ + Processes a CODE_BLOCK element by disassembling the address range. + + Args: + code_block: XML element containing codeblock start and end + addresses. + """ + if not self.options.CodeBlocks.checked: + return + start = self.get_address(code_block, START) + end = self.get_address(code_block, END) + ida_bytes.del_items(start, 3, end-start+1) + addr = start + while (addr <= end): + length = ida_ua.create_insn(addr) + addr += ida_bytes.get_item_size(addr) * self.get_cbsize() + self.update_counter(CODE_BLOCK) + + + def import_comment(self, comment): + """ + Processes a COMMENT element by creating the comment at the address. + + Args: + comment: XML element containing the comment address, type, + and text. + """ + if not self.options.Comments.checked: + return + addr = self.get_address(comment, ADDRESS) + ctype = self.get_attribute(comment,TYPE) + text = comment.text + if ctype == 'pre': + ida_lines.add_extra_cmt(addr, True, text) + elif ctype == 'end-of-line': + idc.set_cmt(addr, text, False) + elif ctype == 'repeatable': + idc.set_cmt(addr, text, True) + elif ctype == 'post': + ida_lines.add_extra_cmt(addr, False, text) + self.update_counter(COMMENT+':' + ctype) + + + def import_compiler(self, compiler): + """ + Processes the COMPILER element containing the compiler name. + + Args: + compiler: XML element containing the compiler name. + """ + name = self.get_attribute(compiler, NAME) + self.update_counter(COMPILER) + if self.plugin: + return + comp = idc.COMP_UNK + if name == "Visual C++": comp = ida_typeinf.COMP_MS + elif name == "Borland C++": comp = ida_typeinf.COMP_BC + elif name == "Watcom C++": comp = ida_typeinf.COMP_WATCOM + elif name == "GNU C++": comp = ida_typeinf.COMP_GNU + elif name == "Visual Age C++": comp = ida_typeinf.COMP_VISAGE + elif name == "Delphi": comp = ida_typeinf.COMP_BP + ida_typeinf.set_compiler_id(comp) + + + def import_defined_data(self, defined_data): + """ + Processes a DEFINED_DATA element by creating a data item at the + specified address. + + Args: + defined_data: XML element containing the address and + datatype information for the data item + """ + if not self.options.DataDefinitions.checked: + return + addr = self.get_address(defined_data, ADDRESS) + datatype = self.get_attribute(defined_data, DATATYPE) + size = self.get_attribute_value(defined_data, SIZE) + self.update_counter(DEFINED_DATA) + ti = ida_nalt.opinfo_t() + if self.is_pointer_type(datatype): + #idaapi.set_refinfo(ti, 0, 0, 0, REF_OFF32) + flag = ida_bytes.dword_flag() | idc.FF_0OFF + #idaapi.set_typeinfo(addr, 0, flag, ti) + else: + flag = self.get_datatype_flags(datatype, size) + if flag == ida_bytes.strlit_flag(): + ida_bytes.create_strlit(addr, size, self.get_string_type(datatype)) + elif flag == ida_bytes.stru_flag(): + idc.create_struct(addr, size, datatype) + else: + idc.create_data(addr, flag, size, BADNODE) + typecmt = defined_data.find(TYPEINFO_CMT) + if typecmt is not None: + self.update_counter(DEFINED_DATA + ':' + TYPEINFO_CMT) + + + def import_description(self, description): + """ + Processes the DESCRIPTION element. + + Args: + description: DESCRIPTION XML element. + """ + self.update_counter(DESCRIPTION) + # TODO: import_description: decide what to do with DESCRIPTION + # print(description.text) + + + def import_enum(self, enum): + """ + Processes an ENUM element by creating the enumeration. + + Args: + enum: XML element containing the enumeration name and + member data. + """ + if not self.options.DataTypes.checked: + return + name = self.get_attribute(enum, NAME) + if self.has_attribute(enum,NAMESPACE): + namespace = self.get_attribute(enum, NAMESPACE) + if self.has_attribute(enum,SIZE): + size = self.get_attribute_value(enum, SIZE) + eid = idc.add_enum(BADNODE, name, + ida_bytes.hex_flag() | ida_bytes.dword_flag()) + self.update_counter(ENUM) + regcmt = enum.find(REGULAR_CMT) + if regcmt is not None: + idc.set_enum_cmt(eid, regcmt.text, False) + self.update_counter(ENUM + ':' + REGULAR_CMT) + rptcmt = enum.find(REPEATABLE_CMT) + if rptcmt is not None: + idc.set_enum_cmt(eid, rptcmt.text, True) + self.update_counter(ENUM + ':' + REPEATABLE_CMT) + display_settings = enum.find(DISPLAY_SETTINGS) + if display_settings is not None: + self.update_counter(ENUM + ':' + DISPLAY_SETTINGS) + enum_entries = enum.findall(ENUM_ENTRY) + for enum_entry in enum_entries: + self.import_enum_entry(enum_entry, eid) + + + def import_enum_entry(self, enum_entry, eid: int): + """ + Processes an ENUM_ENTRY by creating a member in the enumeration. + + Args: + enum_entry: XML element containing the member name and value. + eid: Integer representing the id of the enumeration. + """ + name = self.get_attribute(enum_entry, NAME) + value = self.get_attribute_value(enum_entry, VALUE) + idc.add_enum_member(eid, name, value) + cid = idc.get_enum_member_by_name(name) + self.update_counter(ENUM_ENTRY) + regcmt = enum_entry.find(REGULAR_CMT) + if regcmt is not None: + idc.set_enum_member_cmt(cid, regcmt.text, False) + self.update_counter(ENUM_ENTRY + ':' + REGULAR_CMT) + rptcmt = enum_entry.find(REPEATABLE_CMT) + if rptcmt is not None: + idc.set_enum_member_cmt(cid, rptcmt.text, True) + self.update_counter(ENUM_ENTRY + ':' + REPEATABLE_CMT) + + + def import_equate(self, equate, eid): + """ + Processes EQUATE element as member of an enumeration. + + Args: + enum_entry: XML element containing the equate name and value. + eid: Integer representing the id for the enumeration. + """ + name = self.get_attribute(equate,NAME) + value = self.get_attribute_value(equate,VALUE) + bm = -1 + if self.has_attribute(equate, BIT_MASK): + bm = self.get_attribute_value(equate, BIT_MASK) + idc.add_enum_member(eid, name, value, bm) + cid = idc.get_enum_member_by_name(name) + self.update_counter(EQUATE) + regcmt = equate.find(REGULAR_CMT) + if regcmt is not None: + idc.set_enum_member_cmt(cid, regcmt.text, False) + self.update_counter(EQUATE + ':' + REGULAR_CMT) + rptcmt = equate.find(REPEATABLE_CMT) + if rptcmt is not None: + idc.set_enum_member_cmt(cid, rptcmt.text, True) + self.update_counter(EQUATE + ':' + REPEATABLE_CMT) + + + def import_equate_group(self, equate_group): + """ + Processes EQUATE_GROUP as IDA enumeration type. + + Args: + equate_group: XML element containing the group name and + equate definitions. + """ + if not self.options.DataTypes.checked: + return + msg = EQUATE_GROUP + name = '' + if self.has_attribute(equate_group, NAME): + name = self.get_attribute(equate_group, NAME) + bf = '' + if self.has_attribute(equate_group, BIT_FIELD): + bf = self.get_attribute(equate_group, BIT_FIELD) + eid = idc.add_enum(BADADDR, name, ida_bytes.hex_flag()) + idc.set_enum_bf(eid, (bf == 'yes')) + self.update_counter(EQUATE_GROUP) + regcmt = equate_group.find(REGULAR_CMT) + if regcmt is not None: + idc.set_enum_cmt(eid, regcmt.text, False) + self.update_counter(EQUATE_GROUP + ':' + REGULAR_CMT) + rptcmt = equate_group.find(REPEATABLE_CMT) + if rptcmt is not None: + idc.set_enum_cmt(eid, rptcmt.text, True) + self.update_counter(EQUATE_GROUP + ':' + REPEATABLE_CMT) + equates = equate_group.findall(EQUATE) + for equate in equates: + self.import_equate(equate,eid) + bit_masks = equate_group.findall(BIT_MASK) + for bit_mask in bit_masks: + self.import_bit_mask(bit_mask, eid) + + + def import_equate_reference(self, equate_reference): + if (not self.options.DataTypes.checked or + not self.options.EquateReferences.checked): + return + self.update_counter(EQUATE_REFERENCE) + addr = self.get_address(equate_reference, ADDRESS) + name = '' + if self.has_attribute(equate_reference, NAME): + name = self.get_attribute(equate_reference, NAME) + if name == '': + return + opnd = 0 + if self.has_attribute(equate_reference, OPERAND_INDEX): + opnd = self.get_attribute_value(equate_reference, OPERAND_INDEX) + value = None + if self.has_attribute(equate_reference, VALUE): + value = self.get_attribute_value(equate_reference, VALUE) + cid = idc.get_enum_member_by_name(name) + if cid == BADNODE: + return + eid = idc.get_enum_member_enum(cid) + if eid == BADNODE: + return + idc.op_enum(addr, opnd, eid, 0) + + + def import_function(self, function): + """ + Creates a function using the FUNCTION attributes. + + Args: + function: XML element containing the function address and + attributes. + """ + if not self.options.Functions.checked: + return + try: + entry_point = self.get_address(function, ENTRY_POINT) + name = '' + if self.has_attribute(function, NAME): + name = self.get_attribute(function, NAME) + libfunc = 'n' + if self.has_attribute(function, LIBRARY_FUNCTION): + libfunc = self.get_attribute(function, LIBRARY_FUNCTION) + if not idc.is_mapped(entry_point): + msg = ("import_function: address %X not enabled in database" + % entry_point) + print(msg) + return + idc.add_func(entry_point, BADADDR) + self.update_counter(FUNCTION) + func = ida_funcs.get_func(entry_point) + if libfunc == 'y': + func.flags |= idc.FUNC_LIB + ranges = function.findall(ADDRESS_RANGE) + for addr_range in ranges: + (start, end) = self.import_address_range(addr_range) + ida_funcs.append_func_tail(func, start, end) + # TODO: auto_wait is probably not needed... + if AUTO_WAIT: + ida_auto.auto_wait() + regcmt = function.find(REGULAR_CMT) + if regcmt is not None: + self.update_counter(FUNCTION + ':' + REGULAR_CMT) + ida_funcs.set_func_cmt(func, regcmt.text, False) + rptcmt = function.find(REPEATABLE_CMT) + if rptcmt is not None: + self.update_counter(FUNCTION + ':' + REPEATABLE_CMT) + ida_funcs.set_func_cmt(func, rptcmt.text, True) + typecmt = function.find(TYPEINFO_CMT) + if typecmt is not None: + self.update_counter(FUNCTION + ':' + TYPEINFO_CMT) + # TODO: TYPECMTs + #idc.SetType(entry_point, typecmt.text + ';') + sf = function.find(STACK_FRAME) + if sf is not None: + self.import_stack_frame(sf, func) + register_vars = function.findall(REGISTER_VAR) + for register_var in register_vars: + self.import_register_var(register_var, func) + except Exception: + msg = "** Exception occurred in import_function **" + print("\n" + msg + "\n", sys.exc_type, sys.exc_value) + + + def import_function_def(self, function_def): + # import_function_def: NOT IMPLEMENTED + if not self.options.DataTypes.checked: + return + self.update_counter(FUNCTION_DEF) + + + def import_info_source(self, info_source): + """ + Processes INFO_SOURCE containing information about the + source of the XML PROGRAM file. + + Args: + info_source: XML element containing attributes that identify + the source of the PROGRAM data. + """ + if self.has_attribute(info_source, TOOL): + tool = self.get_attribute(info_source, TOOL) + if self.has_attribute(info_source, USER): + user = self.get_attribute(info_source, USER) + if self.has_attribute(info_source, FILE): + f = self.get_attribute(info_source, FILE) + if self.has_attribute(info_source, TIMESTAMP): + ts = self.get_attribute(info_source, TIMESTAMP) + self.update_counter(INFO_SOURCE) + + + def import_manual_instruction(self, manual_instruction): + """ + Creates a manual instruction. + + Args: + manual_instruction: XML element containing MANUAL_INSTRUCTION. + """ + if not self.options.Manual.checked: + return + addr = self.get_address(manual_instruction, ADDRESS) + idc.set_manual_insn(addr, manual_instruction.text) + self.update_counter(MANUAL_INSTRUCTION) + + + def import_manual_operand(self, manual_operand): + """ + Creates a manual operand at an address. + + Args: + manual_operand: MANUAL_OPERAND XML element. + """ + if not self.options.Manual.checked: + return + addr = self.get_address(manual_operand, ADDRESS) + op = self.get_attribute_value(manual_operand, OPERAND_INDEX) + if idc.is_mapped(addr): + ida_bytes.set_forced_operand(addr, op, manual_operand.text) + self.update_counter(MANUAL_OPERAND) + + + def process_deferred(self, element): + """ + Processes the list of deferred structure members when the + DATATYPES end element is encountered. + + Args: + element: XML end element for DATATYPES + """ + for (member, sptr) in self.deferred: + self.import_member(member, sptr, False) + self.display_timer(element) + + + def import_member(self, member, sptr, defer=True): + """ + Creates a member for a structure. + + Args: + member: MEMBER XML element. + sptr: + defer: boolean indicating if processing a member should be + deferred when the type is unknown. A member should + only be deferred on the first pass, not when processing + the deferred list. + """ + offset = self.get_attribute_value(member, OFFSET) + datatype = self.get_attribute(member, DATATYPE) + if self.has_attribute(member, DATATYPE_NAMESPACE): + dt_namespace = self.get_attribute(member, DATATYPE_NAMESPACE) + name = '' + if self.has_attribute(member, NAME): + name = self.get_attribute(member, NAME) + size = 0 + if self.has_attribute(member, SIZE): + size = self.get_attribute_value(member, SIZE) + ti = ida_nalt.opinfo_t() + if self.is_pointer_type(datatype): + flag = ida_bytes.dword_flag() | idc.FF_0OFF + r = ida_nalt.refinfo_t() + r.init(ida_nalt.get_reftype_by_size(4) | ida_nalt.REFINFO_NOBASE) + ti.ri = r + else: + flag = self.get_datatype_flags(datatype, size) + if flag == 0 and defer: + self.deferred.append((member, sptr)) + return + if flag == ida_bytes.enum_flag(): + t = idc.get_enum(datatype) + ti.ec.tid = t + ti.ec.serial = find_enum_member_serial(t, member.value, member.name) + if flag == ida_bytes.stru_flag(): + t = idc.get_struc_id(datatype) + ti.tid = t + error = idc.add_struc_member(sptr, name, offset, flag, ti, size) + mbr = get_member(sptr, offset) + self.import_member_cmts(member, mbr) + self.update_counter(MEMBER) + + + def import_member_cmts(self, member, mbr: ida_typeinf.udm_t): + """ + Processes REGULAR_CMT and REPEATABLE_CMT elements for members. + + Args: + element: XML element object containing a REGULAR_CMT or + REPEATABLE_CMT element + mbr: the member id + """ + regcmt = member.find(REGULAR_CMT) + if regcmt is not None: + idc.set_member_cmt(mbr.type.get_tid(), mbr.offset, regcmt.text, False) + self.update_counter(MEMBER + ':' + REGULAR_CMT) + rptcmt = member.find(REPEATABLE_CMT) + if rptcmt is not None: + idc.set_member_cmt(mbr.type.get_tid(), mbr.offset, rptcmt.text, True) + self.update_counter(MEMBER + ':' + REPEATABLE_CMT) + + + def import_members(self, element, sptr): + """ + Add data members to a structure. + + Args: + element: STRUCTURE XML element containing MEMBER sub-elements. + sptr: + """ + members = element.findall(MEMBER) + for member in members: + self.import_member(member, sptr) + + + def import_memory_contents(self, memory_contents, start, size): + """ + Processes MEMORY_CONTENTS to load data for a memory block. + + Args: + memory_contents: MEMORY_CONTENTS XML element. + """ + if memory_contents.get(START_ADDR) is None: + saddr = start + else: + saddr = self.get_address(memory_contents, START_ADDR) + fname = self.get_attribute(memory_contents, FILE_NAME) + offset = self.get_attribute_value(memory_contents, FILE_OFFSET) + if memory_contents.get(LENGTH) is None: + length = size + else: + length = self.get_attribute_value(memory_contents, LENGTH) + #(binfilename, ext) = os.path.splitext(self.filename) + #binfilename += ".bytes" + (binfilename, fileext) = os.path.split(self.filename) + binfilename += "/" + fname + binfile = ida_idaapi.loader_input_t() + binfile.open(binfilename) + binfile.file2base(offset,saddr,saddr+length,False) + binfile.close() + self.update_counter(MEMORY_CONTENTS) + + + def import_memory_map(self, memory_map): + """ + Processes the MEMORY_MAP element. + + Args: + memory_map: MEMORY_MAP XML element. + + MEMORY_MAP is only processed by the IDA loader. It is ignored when + run as an IDA plugin. + """ + # import memory sections only when run as loader + if self.plugin: + return + self.update_import(memory_map) + + + def import_memory_reference(self, memory_reference): + """ + Processes the MEMORY_REFERENCE element. + Currently nothing is done with MEMORY_REFERENCEs. + + Args: + memory_reference: MEMORY_REFERENCE XML element. + """ + if not self.options.MemoryReferences.checked: + return + # initialize implied attributes + user = None + op = None + primary = None + base_addr = None + addr = self.get_address(memory_reference, ADDRESS) + if self.has_attribute(memory_reference, OPERAND_INDEX): + op = self.get_attribute_value(memory_reference, OPERAND_INDEX) + if self.has_attribute(memory_reference, USER_DEFINED): + user = self.get_attribute(memory_reference, USER_DEFINED) + to_addr = self.get_address(memory_reference, TO_ADDRESS) + if self.has_attribute(memory_reference, BASE_ADDRESS): + base_addr = self.get_address(memory_reference, BASE_ADDRESS) + if self.has_attribute(memory_reference, PRIMARY): + primary = self.get_attribute(memory_reference, PRIMARY) + self.update_counter(MEMORY_REFERENCE) + # TODO: import_memory_reference: store refs? maybe only user-defined? + ''' + if user == 'y': + #print("%08X %08X" % (addr, to_addr), op, primary) + pass + ''' + + + def import_memory_section(self, memory_section): + """ + Creates a memory segment in the database. + + Args: + memory_section: MEMORY_SECTION XML element. + + MEMORY_SECTION is only processed by the IDA loader. It is ignored + when run as an IDA plugin. + """ + # TODO: import_memory_section - handle overlays? + # import memory sections only when run as loader + if self.plugin: + return + name = self.get_attribute(memory_section, NAME) + length = self.get_attribute_value(memory_section, LENGTH) + + s = ida_segment.segment_t() + addrstr = self.get_attribute(memory_section, START_ADDR) + seg_str = '' + if '::' in addrstr: + # overlay - skip for now + print(' ** Overlayed memory block %s skipped ** ' % name) + msg = 'Overlayed memory block %s skipped!' % name + msg += "\n\nXML Import does not currently support" + msg += "\noverlayed memory blocks." + idc.warning(msg) + return + elif ':' in addrstr: + [seg_str, offset_str] = str.split(addrstr,':') + offset = int(offset_str, 16) + if self.is_int(seg_str): + base = int(seg_str, 16) + sel = ida_segment.setup_selector(base) + start = self.get_address(memory_section, START_ADDR) + else: + raise MultipleAddressSpacesNotSupported + return + else: + sel = ida_segment.allocate_selector(0) + start = self.get_address(memory_section, START_ADDR) + + s.sel = sel + s.start_ea = start + s.end_ea = start+length + s.bitness = self.addr_mode + + perms = '' + if self.has_attribute(memory_section, PERMISSIONS): + perms = self.get_attribute(memory_section, PERMISSIONS) + s.perm = 0 + if 'r' in perms: s.perm |= ida_segment.SEGPERM_READ + if 'w' in perms: s.perm |= ida_segment.SEGPERM_WRITE + if 'x' in perms: s.perm |= ida_segment.SEGPERM_EXEC + ok = ida_segment.add_segm_ex(s, name, "", + idc.ADDSEG_OR_DIE | idc.ADDSEG_QUIET) + self.update_counter(MEMORY_SECTION) + for memory_contents in memory_section.findall(MEMORY_CONTENTS): + self.import_memory_contents(memory_contents, start, length) + + + def import_processor(self, processor): + """ + Processes the PROCESSOR element. + + Args: + processor: PROCESSOR XML element. + """ + name = self.get_attribute(processor, NAME) + self.update_counter(PROCESSOR) + if self.plugin: + return + address_model = self.get_attribute(processor, ADDRESS_MODEL) + if address_model is not None: + if str.lower(address_model) == '16-bit': + self.addr_mode = 0 + idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_PC_FLAT, 0) + idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_64BIT, 0) + elif str.lower(address_model) == '32-bit': + self.addr_mode = 1 + idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_PC_FLAT, 1) + idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_64BIT, 0) + elif str.lower(address_model) == '64-bit': + self.addr_mode = 2 + idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_PC_FLAT, 1) + idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_64BIT, 1) + + + def import_program(self, program): + """ + Processes the PROGRAM element. + + Args: + program: PROGRAM XML element. + """ + self.update_status(PROGRAM) + self.update_counter(PROGRAM) + if self.plugin: + return + name = self.get_attribute(program, NAME) + if self.has_attribute(program, EXE_PATH): + epath = self.get_attribute(program, EXE_PATH) + idc.set_root_filename(epath) + else: + idc.set_root_filename(name) + if self.has_attribute(program, EXE_FORMAT): + eformat = self.get_attribute(program, EXE_FORMAT) + RootNode = ida_netnode.netnode('Root Node') + RootNode.supset(ida_nalt.RIDX_FILE_FORMAT_NAME, eformat) + if self.has_attribute(program, IMAGE_BASE): + base = self.get_attribute_value(program, IMAGE_BASE) + ida_nalt.set_imagebase(base) + if self.has_attribute(program, INPUT_MD5): + input_md5 = self.get_attribute(program, INPUT_MD5) + # store original md5 in a special netnode + md5 = ida_netnode.netnode(INPUT_MD5, len(INPUT_MD5), True) + md5.supset(ida_nalt.RIDX_MD5, input_md5) + + + def import_program_entry_point(self, program_entry_point): + """ + Defines a program entry point. + + Args: + program_entry_point: PROGRAM_ENTRY_POINT XML element. + Contains the entry point address. + """ + if not self.options.EntryPoints.checked: + return + addr = self.get_address(program_entry_point, ADDRESS) + idc.add_entry(addr, addr, "", True) + self.update_counter(PROGRAM_ENTRY_POINT) + + + def import_register_value_range(self, register_value_range): + """ + Defines the address range for a register value. + + Args: + register_value_range: REGISTER_VALUE_RANGE XML element. + Contains the register, value, start address and range length. + """ + if not self.options.RegisterValues.checked: + return + self.update_counter(REGISTER_VALUE_RANGE) + reg = self.get_attribute(register_value_range, REGISTER) + if reg == 'cs': return + value = self.get_attribute_value(register_value_range, VALUE) + addr = self.get_address(register_value_range, START_ADDRESS) + length = self.get_attribute_value(register_value_range, LENGTH) + r = ida_idp.str2reg(reg) + if r >= ida_idp.ph_get_reg_first_sreg() and r <= ida_idp.ph_get_reg_last_sreg(): + ida_segregs.split_sreg_range(addr, r, value, idc.SR_user, True) + + + def import_register_var(self, register_var, func): + """ + Defines a register variable for a function. + + Args: + register_var: REGISTER_VAR XML element. + Contains register, variable name, and datatype. + func: IDA function object + """ + name = self.get_attribute(register_var, NAME) + reg = self.get_attribute(register_var, REGISTER) + if self.has_attribute(register_var, DATATYPE): + datatype = self.get_attribute(register_var, DATATYPE) + if self.has_attribute(register_var, DATATYPE_NAMESPACE): + namespace = self.get_attribute(register_var, DATATYPE_NAMESPACE) + idc.define_local_var(func.startEA, func.endEA, reg, name) + self.update_counter(REGISTER_VAR) + + + def import_stack_frame(self, stack_frame, func): + """ + Defines a stack frame for a function. + + Args: + stack_frame: STACK_FRAME element with STACK_VAR child elements. + """ + if self.has_attribute(stack_frame, LOCAL_VAR_SIZE): + lvsize = self.get_attribute_value(stack_frame, LOCAL_VAR_SIZE) + if self.has_attribute(stack_frame, PARAM_OFFSET): + param_offset = self.get_attribute_value(stack_frame, PARAM_OFFSET) + if self.has_attribute(stack_frame, REGISTER_SAVE_SIZE): + reg_save_size = self.get_attribute_value(stack_frame, + REGISTER_SAVE_SIZE) + if self.has_attribute(stack_frame, RETURN_ADDR_SIZE): + retaddr_size = self.get_attribute_value(stack_frame, + RETURN_ADDR_SIZE) + if self.has_attribute(stack_frame, BYTES_PURGED): + bytes_purged = self.get_attribute_value(stack_frame, BYTES_PURGED) + self.update_counter(STACK_FRAME) + for stack_var in stack_frame.findall(STACK_VAR): + self.import_stack_var(stack_var, func) + + + def import_stack_reference(self, stack_reference): + # import_stack_reference: NOT IMPLEMENTED + self.update_counter(STACK_REFERENCE) + pass + + + def import_stack_var(self, stack_var, func): + """ + Processes STACK_VAR element. + + Args: + stack_var: STACK_VAR XML element. + + Stack variables are created by IDA's function analysis. + Only the STACK_VAR NAME attribute is used to set the name for + a stack variable at the specified stack/frame offset. + """ + spoffset = self.get_attribute_value(stack_var, STACK_PTR_OFFSET) + datatype = self.get_attribute(stack_var, DATATYPE) + offset = spoffset + func.frsize + func.frregs + if self.has_attribute(stack_var, FRAME_PTR_OFFSET): + fpoffset = self.get_attribute_value(stack_var, FRAME_PTR_OFFSET) + offset = fpoffset + func.frsize + name = '' + if self.has_attribute(stack_var, NAME): + name = self.get_attribute(stack_var, NAME) + if self.has_attribute(stack_var, DATATYPE_NAMESPACE): + namespace = self.get_attribute(stack_var, DATATYPE_NAMESPACE) + if self.has_attribute(stack_var, SIZE): + size = self.get_attribute_value(stack_var, SIZE) + self.update_counter(STACK_VAR) + sf = get_frame(func) + if sf is not None and name != '': + idc.set_member_name(sf.get_tid(), offset, name) + + + def import_structure(self, structure): + """ + Adds a structure. + + Args: + structure: STRUCTURE XML element. + Contains the STRUCTURE attributes and child elements. + """ + if not self.options.DataTypes.checked: + return + name = self.get_attribute(structure, NAME) + dtyp = idc.get_struc_id(name) + if dtyp != BADNODE: + # duplicate name, try adding name space + if not self.has_attribute(structure, NAMESPACE): + return + namespace = self.get_attribute(structure, NAMESPACE) + name = namespace + '__' + name + name.replace('/','_') + name.replace('.','_') + dtyp = idc.get_struc_id(name) + # skip if still duplicate (could add sequence #) + if dtyp != BADNODE: + return + size = 0 + if self.has_attribute(structure, SIZE): + size = self.get_attribute_value(structure, SIZE) + if self.has_attribute(structure, VARIABLE_LENGTH): + vl = self.get_attribute_value(structure, VARIABLE_LENGTH) + isVariableLength = vl == 'y' + sid = idc.add_struc(-1, name, 0) + sptr = get_struc(sid) + self.update_counter(STRUCTURE) + self.import_cmts(structure, sid, STRUCTURE) + self.import_members(structure, sptr) + if (t := idc.get_struc_size(sid)) is not None and t < size: + t = ida_nalt.opinfo_t() + idc.add_struc_member(sid,"",size-1,ida_bytes.byte_flag(),t,1) + + + def import_symbol(self, symbol): + """ + Adds a symbol name at the specified address. + + Args: + symbol: SYMBOL XML element. + Contains symbol name and address. Optionally includes + type and mangled symbol. + """ + if not self.options.Symbols.checked: + return + addr = self.get_address(symbol, ADDRESS) + name = self.get_attribute(symbol, NAME) + if self.has_attribute(symbol, MANGLED): + name = self.get_attribute(symbol, MANGLED) + flag = idc.SN_NOWARN + if self.has_attribute(symbol, TYPE): + typ = self.get_attribute(symbol, TYPE) + if typ == 'local': flag |= idc.SN_LOCAL + idc.set_name(addr, name, flag) + self.update_counter(SYMBOL) + + + def import_typedef(self, type_def): + # import_typedef: NOT IMPLEMENTED + if not self.options.DataTypes.checked: + return + self.update_counter(TYPE_DEF) + + + def import_union(self, union): + """ + Adds a union datatype. + + Args: + union: UNION XML element. + Contains UNION attributes and child elements. + """ + if not self.options.DataTypes.checked: + return + name = self.get_attribute(union, NAME) + dtyp = idc.get_struc_id(name) + if dtyp != BADNODE: + # duplicate name, try adding name space + if not self.has_attribute(union, NAMESPACE): + return + namespace = self.get_attribute(union, NAMESPACE) + name = namespace + '__' + name + name.replace('/','_') + name.replace('.','_') + dtyp = idc.get_struc_id(name) + # skip if still duplicate (could add sequence #) + if dtyp != BADNODE: + return + size = 0 + if self.has_attribute(union, SIZE): + size = self.get_attribute_value(union, SIZE) + sid = idc.add_struc(BADADDR, name, True) + sptr = get_struc(sid) + self.update_counter(UNION) + self.import_cmts(union, sid, UNION) + self.import_members(union, sptr) + if (t := idc.get_struc_size(sid)) is not None and t < size: + t = ida_nalt.opinfo_t() + idc.add_struc_member(sid,"", size-1, ida_bytes.byte_flag(), t, 1) + + + def update_import(self, element): + """ + Update the element counter and processing status. + + Args: + element: XML element + + This function is used to process certain high-level elements + (such as COMMENTS, CODE_BLOCKS, SYMBOL_TABLE, FUNCTIONS, etc.) + that are used to group sub-elements. + """ + self.update_counter(element.tag) + self.update_status(element.tag) + + +# Global constants +# mangled name inhibit flags are not currently exposed in python api +# inhibit flags for symbol names +# DEMANGLE_FORM (MNG_SHORT_FORM | MNG_NOBASEDT | MNG_NOCALLC | MNG_NOCSVOL) +DEMANGLED_FORM = 0x0ea3ffe7 +# inhibit flags for typeinfo cmts +# DEMANGLED_TYPEINFO (MNG_LONG_FORM) +DEMANGLED_TYPEINFO = 0x06400007 + + +# Global XML string constants for elements and attributes +ADDRESS = 'ADDRESS' +ADDRESS_MODEL = 'ADDRESS_MODEL' +ADDRESS_RANGE = 'ADDRESS_RANGE' +BASE_ADDRESS = 'BASE_ADDRESS' +BIT_FIELD = 'BIT_FIELD' +BIT_MAPPED = 'BIT_MAPPED' +BIT_MASK = 'BIT_MASK' +BOOKMARK = 'BOOKMARK' +BOOKMARKS = 'BOOKMARKS' +BYTES = 'BYTES' +BYTES_PURGED = 'BYTES_PURGED' +CATEGORY = 'CATEGORY' +CODE = 'CODE' +CODE_BLOCK = 'CODE_BLOCK' +COMMENT = 'COMMENT' +COMMENTS = 'COMMENTS' +COMPILER = 'COMPILER' +DATA = 'DATA' +DATATYPE = 'DATATYPE' +DATATYPES = 'DATATYPES' +DATATYPE_NAMESPACE = 'DATATYPE_NAMESPACE' +DEFINED_DATA = 'DEFINED_DATA' +DESCRIPTION = 'DESCRIPTION' +DISPLAY_SETTINGS = 'DISPLAY_SETTINGS' +END = 'END' +ENDIAN = 'ENDIAN' +ENTRY_POINT = 'ENTRY_POINT' +ENUM = 'ENUM' +ENUM_ENTRY = 'ENUM_ENTRY' +EQUATE = 'EQUATE' +EQUATES = 'EQUATES' +EQUATE_GROUP = 'EQUATE_GROUP' +EQUATE_REFERENCE = 'EQUATE_REFERENCE' +EXE_FORMAT = 'EXE_FORMAT' +EXE_PATH = 'EXE_PATH' +EXT_LIBRARY = 'EXT_LIBRARY' +EXT_LIBRARY_REFERENCE = 'EXT_LIBRARY_REFERENCE' +EXT_LIBRARY_TABLE = 'EXT_LIBRARY_TABLE' +FAMILY = 'FAMILY' +FILE = 'FILE' +FILE_NAME = 'FILE_NAME' +FILE_OFFSET = 'FILE_OFFSET' +FOLDER = 'FOLDER' +FORMAT = 'FORMAT' +FRAGMENT = 'FRAGMENT' +FRAME_PTR_OFFSET = 'FRAME_PTR_OFFSET' +FUNCTION = 'FUNCTION' +FUNCTIONS = 'FUNCTIONS' +FUNCTION_DEF = 'FUNCTION_DEF' +IMAGE_BASE = 'IMAGE_BASE' +INPUT_MD5 = 'INPUT_MD5' +INFO_SOURCE = 'INFO_SOURCE' +LANGUAGE_PROVIDER = 'LANGUAGE_PROVIDER' +LENGTH = 'LENGTH' +LIB_ADDR = 'LIB_ADDR' +LIB_LABEL = 'LIB_LABEL' +LIB_ORDINAL = 'LIB_ORDINAL' +LIB_PROG_NAME = 'LIB_PROG_NAME' +LIBRARY_FUNCTION = 'LIBRARY_FUNCTION' +LOCAL_VAR_SIZE = 'LOCAL_VAR_SIZE' +MANGLED = 'MANGLED' +MANUAL_INSTRUCTION = 'MANUAL_INSTRUCTION' +MANUAL_OPERAND = 'MANUAL_OPERAND' +MARKUP = 'MARKUP' +MEMBER = 'MEMBER' +MEMORY_CONTENTS = 'MEMORY_CONTENTS' +MEMORY_MAP = 'MEMORY_MAP' +MEMORY_REFERENCE = 'MEMORY_REFERENCE' +MEMORY_SECTION = 'MEMORY_SECTION' +NAME = 'NAME' +NAMESPACE = 'NAMESPACE' +OFFSET = 'OFFSET' +OPERAND_INDEX = 'OPERAND_INDEX' +PARAM_OFFSET = 'PARAM_OFFSET' +PATH = 'PATH' +PERMISSIONS = 'PERMISSIONS' +PRIMARY = 'PRIMARY' +PROCESSOR = 'PROCESSOR' +PROGRAM = 'PROGRAM' +PROGRAM_ENTRY_POINT = 'PROGRAM_ENTRY_POINT' +PROGRAM_ENTRY_POINTS = 'PROGRAM_ENTRY_POINTS' +PROGRAM_TREES = 'PROGRAM_TREES' +PROPERTIES = 'PROPERTIES' +PROPERTY = 'PROPERTY' +REGISTER = 'REGISTER' +REGISTER_SAVE_SIZE = 'REGISTER_SAVE_SIZE' +REGISTER_VALUES = 'REGISTER_VALUES' +REGISTER_VALUE_RANGE = 'REGISTER_VALUE_RANGE' +REGISTER_VAR = 'REGISTER_VAR' +REGULAR_CMT = 'REGULAR_CMT' +RELOCATION = 'RELOCATION' +RELOCATION_TABLE = 'RELOCATION_TABLE' +REPEATABLE_CMT = 'REPEATABLE_CMT' +RETURN_ADDR_SIZE = 'RETURN_ADDR_SIZE' +RETURN_TYPE = 'RETURN_TYPE' +SHOW_TERMINATOR = 'SHOW_TERMINATOR' +SIGNED = 'SIGNED' +SIZE = 'SIZE' +SOURCE_ADDRESS = 'SOURCE_ADDRESS' +SOURCE_TYPE = 'SOURCE_TYPE' +STACK_FRAME = 'STACK_FRAME' +STACK_PTR_OFFSET = 'STACK_PTR_OFFSET' +STACK_REFERENCE = 'STACK_REFERENCE' +STACK_VAR = 'STACK_VAR' +START = 'START' +START_ADDR = 'START_ADDR' +START_ADDRESS = 'START_ADDRESS' +STRUCTURE = 'STRUCTURE' +SYMBOL = 'SYMBOL' +SYMBOL_TABLE = 'SYMBOL_TABLE' +TIMESTAMP = 'TIMESTAMP' +TOOL = 'TOOL' +TO_ADDRESS = 'TO_ADDRESS' +TREE = 'TREE' +TYPE = 'TYPE' +TYPEINFO_CMT = 'TYPEINFO_CMT' +TYPE_DEF = 'TYPE_DEF' +UNION = 'UNION' +USER = 'USER' +USER_DEFINED = 'USER_DEFINED' +VALUE = 'VALUE' +VARIABLE_LENGTH = 'VARIABLE_LENGTH' +ZERO_PAD = 'ZERO_PAD' diff --git a/GhidraBuild/IDAPro/README.html b/GhidraBuild/IDAPro/README.html index 31b227ef02..d43daca9ea 100644 --- a/GhidraBuild/IDAPro/README.html +++ b/GhidraBuild/IDAPro/README.html @@ -10,7 +10,7 @@ The plugins also facilitate transfer from Ghidra to IDA.

- The plugin is provided in Python for IDA versions 6 and 7. + The plugin is provided in Python for IDA versions 6, 7, and 9. See the README file within each for further instruction.

diff --git a/GhidraBuild/IDAPro/certification.manifest b/GhidraBuild/IDAPro/certification.manifest index d04b9197b3..12ed1a902a 100644 --- a/GhidraBuild/IDAPro/certification.manifest +++ b/GhidraBuild/IDAPro/certification.manifest @@ -1,4 +1,5 @@ ##VERSION: 2.0 Python/6xx/README.html||GHIDRA||||END| Python/7xx/README.html||GHIDRA||||END| +Python/9xx/README.html||GHIDRA||||END| README.html||GHIDRA||||END|