ghidra/GhidraBuild/IDAPro/Python/9xx/python/idaxml.py
2025-08-20 12:10:48 +08:00

3783 lines
138 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

## ###
# IP: GHIDRA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
#---------------------------------------------------------------------
# idaxml.py - IDA XML classes
#---------------------------------------------------------------------
"""
"""
from __future__ import print_function
from typing import Optional
import ida_auto
import ida_bytes
import ida_diskio
import ida_fpro
import ida_frame
import ida_funcs
import ida_ida
import ida_idaapi
import ida_idp
import ida_hexrays
import ida_kernwin
import ida_lines
import ida_loader
import ida_moves
import ida_nalt
import ida_name
import ida_netnode
import ida_pro
import ida_segment
import ida_segregs
import ida_typeinf
import ida_ua
import ida_xref
import idautils
import idc
import datetime
import os
import sys
import time
from xml.etree import cElementTree
if sys.version_info.major >= 3:
import copy
setattr(time, 'clock', time.perf_counter)
_exc_info = copy.copy(sys.exc_info)
setattr(sys, 'exc_value', _exc_info()[1])
setattr(sys, 'exc_type', _exc_info()[0])
DEBUG = 0 # print debug statements
IDAXML_VERSION = "5.0.2"
BASELINE_IDA_VERSION = 900
BASELINE_STR = '9.00'
IDA_SDK_VERSION = ida_pro.IDA_SDK_VERSION
BADADDR = idc.BADADDR
BADNODE = ida_netnode.BADNODE
PLUGIN = True
LOADER = not PLUGIN
AUTO_WAIT = True
def is_ida_version_supported():
'''
Determines if IDA version is supported by this idaxml module.
Returns:
True if IDA version is supported, else False.
'''
supported = IDA_SDK_VERSION >= BASELINE_IDA_VERSION
if not supported:
idc.msg('\nThe IDA XML plugins and loader are not supported ' +
'by this version of IDA.\n')
idc.msg('Please use IDA ' + BASELINE_STR + ' or greater ' +
'with this version of XML.\n')
return supported
def get_struc(sid: int) -> Optional[ida_typeinf.tinfo_t]:
try:
tif = ida_typeinf.tinfo_t()
tif.get_type_by_tid(tid=sid)
return tif if tif.is_udt() else None
except ValueError:
return None
def get_member(sid: int, offset: int) -> Optional[ida_typeinf.udm_t]:
struc_tif = get_struc(sid)
if struc_tif is None:
return None
udm = ida_typeinf.udm_t()
udm.offset = offset
idx = struc_tif.find_udm(udm, ida_typeinf.STRMEM_AUTO)
return udm if idx != -1 else None
def get_member_by_idx(sid: int, idx: int) -> Optional[ida_typeinf.udm_t]:
mid = idc.get_member_by_idx(sid, idx)
member = ida_typeinf.udm_t()
return member if mid != -1 and ida_typeinf.tinfo_t().get_udm_by_tid(member, mid) != -1 else None
def get_frame(pfn) -> Optional[ida_typeinf.tinfo_t]:
sf = ida_typeinf.tinfo_t()
return sf if sf.get_func_frame(pfn) else None
def get_sptr(udm: ida_typeinf.udm_t) -> Optional[ida_typeinf.tinfo_t]:
tif = udm.type
return tif if tif.is_udt() and tif.is_struct() else None
def _iter_enum_ids():
"""Iterate the IDs of all enums in the IDB"""
limit = ida_typeinf.get_ordinal_limit()
for ordinal in range(1, limit):
tif = ida_typeinf.tinfo_t()
tif.get_numbered_type(None, ordinal)
if tif.is_enum():
yield tif.get_tid()
def get_struc_qty():
count = 0
limit = ida_typeinf.get_ordinal_limit()
for i in range(1, limit):
tif = ida_typeinf.tinfo_t()
if not tif.get_numbered_type(i, ida_typeinf.BTF_STRUCT):
continue
else:
count += 1
return count
def get_enum_member_tid(eid: int, i: int) -> int:
try:
tif = ida_typeinf.tinfo_t()
tif.get_type_by_tid(tid=eid)
except ValueError:
return BADADDR
edm = ida_typeinf.edm_t()
return edm.get_tid() if tif.get_edm(edm, i) >= 0 else BADADDR
def find_enum_member_serial(enum_id: int, member_value: int, member_name: str):
"""
Return the serial index of the enumconstant named `member_name`
(or holding `member_value`) inside `enum_id`.
Returns -1 on failure.
"""
try:
tif = ida_typeinf.tinfo_t()
tif.get_type_by_tid(tid=enum_id)
except ValueError:
return -1
ei = ida_typeinf.enum_type_data_t()
if not tif.get_enum_details(ei):
return -1
for i, m in enumerate(ei):
if m.name == member_name or m.value == member_value:
return ei.get_serial(i)
return -1
class Cancelled(Exception):
pass
class FileError(Exception):
pass
class MultipleAddressSpacesNotSupported(Exception):
pass
class IdaXml:
def __init__(self, arg):
self.autorun = False if arg == 0 else True
self.debug = DEBUG
self.elements = {}
self.counters = []
self.tags = []
self.xmlfile = 0
self.options = None
def cleanup(self):
"""
Frees memory and closes message box and XML file at termination.
"""
if self.options is not None:
self.options.Free()
ida_kernwin.hide_wait_box()
self.close_xmlfile()
def close_xmlfile(self):
"""
Closes the XML data file for the XML Exporter.
"""
if self.xmlfile != 0:
self.xmlfile.close()
self.xmlfile = 0
def dbg(self, message):
"""
Outputs debug message if debug flag is enabled.
Args:
message: String containing the debug message.
"""
if (self.debug):
idc.msg(message)
def display_summary(self, what):
"""
Displays summary in IDA output window.
"""
summary = ''
total = 0
for tag in self.tags:
count = self.counters[self.elements[tag]]
summary += "\n%-26s %8d" % (tag, count)
total += count
summary = "\n--------------------------------------" + summary
summary += "\n--------------------------------------"
summary += ("\n%-26s %8d" % ("Total XML Elements:",total))
idc.msg(summary)
if not self.autorun: # and self.plugin:
frmt = "TITLE XML " + what + " Successful!\n"
frmt += "ICON INFO\n"
frmt += "AUTOHIDE NONE\n"
frmt += "HIDECANCEL\n"
fileline = '\n\nFile: %s' % self.filename
details = '\nSee output window for details...'
ida_kernwin.info("%s" % (frmt + fileline + details))
def display_version(self, what):
"""
Displays XML version info in IDA output window.
Args:
what: String indicating Exporter, Importer, or Loader
"""
if os.path.isfile(os.path.join(ida_diskio.idadir('python'), 'idaxml.py')):
f = os.path.join(ida_diskio.idadir('python'), 'idaxml.py')
elif os.path.isfile(os.path.join(ida_diskio.idadir('python'), '3', 'idaxml.py')):
f = os.path.join(ida_diskio.idadir('python'), '3', 'idaxml.py')
elif os.path.isfile(os.path.join(ida_diskio.idadir('python'), '2', 'idaxml.py')):
f = os.path.join(ida_diskio.idadir('python'), '2', 'idaxml.py')
elif os.path.isfile(os.path.join(ida_diskio.get_user_idadir(), 'python', 'idaxml.py')):
f = os.path.join(ida_diskio.get_user_idadir(), 'python', 'idaxml.py')
else:
msg = "Error opening file " + os.path.join(ida_diskio.idadir('python'), 'idaxml.py') + " !\n"
idc.msg(msg)
raise FileError
ftime = time.localtime(os.path.getmtime(f))
ts = time.strftime('%b %d %Y %H:%M:%S', ftime)
version = "\nXML " + what + " v" + IDAXML_VERSION
version += " : SDK " + str(IDA_SDK_VERSION)
version += " : Python : "+ ts + '\n'
idc.msg(version)
def open_file(self, filename, mode):
"""
Opens filename to specified mode.
Args:
filename: String representing absolute filepath.
mode: String representing mode for open.
Returns
File handle.
Exceptions:
Displays a warning and raises FileError exception
if open fails.
"""
try:
f = open(filename, mode)
return f
except Exception:
fmt = "TITLE ERROR!\n"
fmt += "ICON ERROR\n"
fmt += "AUTOHIDE NONE\n"
fmt += "HIDECANCEL\n"
fmt += "Error opening file" + filename + "!\n"
idc.warning(fmt)
raise FileError
def update_counter(self, tag):
"""
Updates the counter for the element tag.
Args:
tag: String representing element tag.
"""
if tag in self.elements:
self.counters[self.elements[tag]] += 1
else:
self.elements[tag] = len(self.elements)
self.counters.append(1)
self.tags.append(tag)
def update_status(self, tag):
"""
Displays the processing status in the IDA window.
Args:
tag: String representing XML element tag
"""
status = 'Processing ' + tag
idc.msg('\n%-35s' % status)
ida_kernwin.hide_wait_box()
ida_kernwin.show_wait_box(status)
class XmlExporter(IdaXml):
"""
XML Exporter contains methods to export an IDA database as a
XML PROGRAM document.
"""
def __init__(self, arg):
"""
Initializes the XmlExporter attributes
Args:
arg: Integer, non-zero value enables auto-run feature for
IDA batch (no gui) processing mode. Default is 0.
"""
IdaXml.__init__(self, arg)
self.indent_level = 0
self.seg_addr = False
self.has_overlays = False
self.hexrays = False
# initialize class variables from database
self.min_ea = ida_ida.inf_get_min_ea()
self.max_ea = ida_ida.inf_get_max_ea()
self.cbsize = (ida_idp.ph_get_cnbits()+7)//8
self.processor = str.upper(ida_idp.get_idp_name())
self.batch = ida_kernwin.cvar.batch
def export_xml(self):
"""
Exports the IDA database to a XML PROGRAM document file.
"""
self.display_version('Exporter')
self.check_and_load_decompiler()
self.get_options()
if (self.autorun):
(self.filename, ext) = os.path.splitext(idc.get_idb_path())
self.filename += ".xml"
else:
self.filename=ida_kernwin.ask_file(1, "*.xml",
"Enter name of export xml file:")
if self.filename is None or len(self.filename) == 0:
raise Cancelled
self.xmlfile = self.open_file(self.filename, "w")
ida_kernwin.show_wait_box("Exporting XML <PROGRAM> document ....")
idc.msg("\n------------------------------------------------" +
"-----------")
idc.msg("\nExporting XML <PROGRAM> document ....")
begin = time.clock()
self.write_xml_declaration()
self.export_program()
# export database items based on options
if (self.options.DataTypes.checked or
self.options.DataDefinitions.checked or
self.options.Functions.checked ):
self.export_datatypes()
if (self.options.MemorySections.checked or
self.options.MemoryContent.checked ):
self.export_memory_map()
if (self.options.RegisterValues.checked ):
self.export_register_values()
if (self.options.CodeBlocks.checked ):
self.export_code()
if (self.options.DataDefinitions.checked ):
self.export_data()
if (self.options.Comments.checked ):
self.export_comments()
self.export_bookmarks()
if (self.options.EntryPoints.checked ):
self.export_program_entry_points()
if (self.options.Symbols.checked ):
self.export_symbol_table()
if (self.options.Functions.checked ):
self.export_functions()
if (self.options.MemoryReferences.checked or
self.options.StackReferences.checked or
self.options.Manual.checked or
self.options.DataTypes.checked ):
self.export_markup()
self.end_element(PROGRAM)
idc.msg('\n%35s' % 'Total ')
self.display_cpu_time(begin)
ida_kernwin.hide_wait_box()
self.display_summary('Export')
idc.msg('\nDatabase exported to: ' + self.filename + '\n')
# TODO: Test decompiler comments in batch and gui modes
def check_and_load_decompiler(self):
"""
Checks for the presence of a decompiler plugin for the database.
Note: The decompiler must be loaded by the XML Exporter plugin
if it is running in batch mode. IDA will load the decompiler
plugin automatically if not in batch mode.
Note: There was no support for decompiler plugins in IDAPython until
IDA 6.6, so skip if this is an older version.
Note: Currently the 4 decompiler plugins for the x86, x64,
ARM32, and ARM64 are supported.
"""
if self.batch == 0:
self.hexrays = ida_hexrays.init_hexrays_plugin()
return
plugin = ''
if self.processor == 'PC':
if ida_ida.inf_is_64bit():
plugin = "hexx64"
elif ida_ida.inf_is_32bit_exactly():
plugin = 'hexrays'
elif self.processor == 'ARM':
if ida_ida.inf_is_64bit():
plugin = "hexarm64"
elif ida_ida.inf_is_32bit_exactly():
plugin = "hexarm"
if len(plugin) > 0:
try:
ida_loader.load_plugin(plugin)
self.hexrays = ida_hexrays.init_hexrays_plugin()
except Exception:
return
def check_char(self, ch: str) -> str:
"""
Replaces a special XML character with an entity string.
Args:
ch: String containing the character to check.
Returns:
String containing either the character or the entity
substition string.
"""
if isinstance(ch, int):
ch = chr(ch)
if ((ord(ch) < 0x20) and (ord(ch) != 0x09 and
ord(ch) != 0x0A and ord(ch) != 0x0D)): return ''
elif ch == '&' : return '&amp;'
elif ch == '<' : return "&lt;"
elif ch == '>' : return "&gt;"
elif ch == '\'' : return "&apos;"
elif ch == '"' : return "&quot;"
elif ch == '\x7F': return ''
elif ord(ch) > 0x7F: return '&#x' + format(ord(ch),"x") + ";"
return ch
def check_for_entities(self, text: str) -> str:
"""
Checks all characters in a string for special XML characters.
Args:
text: String to check for special XML characters.
Returns:
String containing original string with substitutions for
any special XML characters.
"""
return ''.join(map(self.check_char, text))
def check_if_seg_contents(self, seg):
"""
Determines if any address in a segment contains a value.
Args:
seg: IDA segment object
Returns:
True if any address in a segment contains a value.
False if no address in a segment contains a value.
"""
for addr in idautils.Heads(seg.start_ea, seg.end_ea):
if idc.has_value(idc.get_full_flags(addr)):
return True
return False
def check_stack_frame(self, sframe: ida_typeinf.tinfo_t) -> bool:
"""
Determines if stack frame contains any parameters or local variables.
Args:
sframe: IDA stack frame for a function.
Returns:
True if stack frame has parameters or local variables.
False if stack frame has no parameters or local variables.
"""
n = sframe.get_udt_nmembers()
for i in range(n):
member = get_member_by_idx(sframe.get_tid(), i)
if member is None:
continue
mname = member.name
if mname is not None and len(mname) > 0:
if mname != " s" and mname != " r":
return True
return False
def close_binfile(self):
"""
Closes the binary data file for the XML Exporter.
"""
if self.binfile != 0:
self.binfile.close()
self.binfile = 0
def close_tag(self, has_contents=False):
"""
Closes the start tag for an XML element.
Args:
has_contents: Boolean indicating if the element has
sub-elements or text.
"""
if has_contents:
self.write_to_xmlfile(">")
self.indent_level += 1
else:
self.write_to_xmlfile(" />")
def display_cpu_time(self, start):
"""
Displays the elapsed CPU time since the start time.
Args:
start: Floating-point value representing start time in seconds.
"""
idc.msg('CPU time: %6.4f' % (time.clock() - start))
def end_element(self, tag, newline=True):
"""
Writes the element end tag to the XML file.
Args:
tag: String containing the element name.
newline: Boolean indicating if end tag should go on new line.
"""
self.indent_level -= 1
if newline:
start = '\n' + (" " * self.indent_level)
else:
start = ''
self.write_to_xmlfile(start + "</" + tag + ">")
'''
# BIT_MASK not currently supported for ENUM
def export_bitmask(self, eid, mask):
"""
Exports an enum bitmask member as BIT_MASK element.
Args:
eid: Integer representing the IDA enum id
mask: Integer representing the IDA enum mask value
"""
name = idc.get_bmask_name(eid, mask)
if name is None:
return
self.start_element(BIT_MASK)
self.write_attribute(NAME, name)
self.write_numeric_attribute(VALUE, mask)
regcmt = idc.get_bmask_cmt(eid, mask, False)
rptcmt = idc.get_bmask_cmt(eid, mask, True)
has_comment = regcmt is not None or rptcmt is not None
self.close_tag(has_comment)
if regcmt is not None and len(regcmt) > 0:
self.export_regular_cmt(regcmt)
if rptcmt is not None and len(rptcmt) > 0:
self.export_repeatable_cmt(rptcmt)
if (has_comment):
self.end_element(BIT_MASK)
'''
def export_bookmarks(self):
"""
Exports marked location descriptions as BOOKMARK elements.
"""
found = False
timer = time.clock()
for slot in range(0,1025):
address = idc.get_bookmark(slot)
description = idc.get_bookmark_desc(slot)
if address == BADADDR:
continue
if description is None:
continue
if not found:
found = True
self.update_status(BOOKMARKS)
self.start_element(BOOKMARKS, True)
self.start_element(BOOKMARK)
self.write_address_attribute(ADDRESS, address)
self.write_attribute(DESCRIPTION, description)
self.close_tag()
if found:
self.end_element(BOOKMARKS)
self.display_cpu_time(timer)
def export_c_comments(self):
"""
Exports block and end-of-line comments entered in the decompiler
interface.
"""
if not self.hexrays:
return
functions = idautils.Functions()
if functions is None:
return
for addr in functions:
try:
if ida_segment.is_spec_ea(addr):
continue
ccmts = ida_hexrays.restore_user_cmts(addr)
if ccmts is None:
continue
p = ida_hexrays.user_cmts_begin(ccmts)
while p != ida_hexrays.user_cmts_end(ccmts):
cmk = ida_hexrays.user_cmts_first(p)
cmv = ida_hexrays.user_cmts_second(p)
if cmk.itp < (ida_hexrays.ITP_COLON+1):
self.export_comment(cmk.ea, "end-of-line", cmv.c_str())
else:
self.export_comment(cmk.ea, "pre", cmv.c_str())
p=ida_hexrays.user_cmts_next(p)
ida_hexrays.user_cmts_free(ccmts)
except Exception:
continue
def export_code(self):
"""
Exports the address ranges of code sequences as CODE_BLOCK(s)
with START and END address attributes.
"""
addr = self.min_ea
if not idc.is_code(idc.get_full_flags(addr)):
addr = ida_bytes.next_that(addr, self.max_ea, idc.is_code)
if (addr == BADADDR):
return
self.update_status(CODE)
timer = time.clock()
data = ida_bytes.next_that(addr, self.max_ea, idc.is_data)
unknown = ida_bytes.next_unknown(addr, self.max_ea)
self.start_element(CODE, True)
while (addr != BADADDR):
start = addr
end = min(data, unknown)
if (end == BADADDR):
if (ida_segment.getseg(start).end_ea < self.max_ea):
codeend = ida_segment.getseg(start).end_ea - 1
addr = ida_segment.getseg(idc.next_addr(codeend)).start_ea
if not idc.is_code(idc.get_full_flags(addr)):
addr = ida_bytes.next_that(addr, self.max_ea,
idc.is_code)
else:
codeend = self.max_ea - 1
addr = BADADDR
else:
if (ida_segment.getseg(start).end_ea < end):
codeend = ida_segment.getseg(start).end_ea - 1
addr = ida_segment.getseg(idc.next_addr(codeend)).start_ea
if not idc.is_code(ida_bytes.get_full_flags(addr)):
addr = ida_bytes.next_that(addr, self.max_ea,
idc.is_code)
else:
codeend = idc.get_item_end(ida_bytes.prev_that(end,
start, idc.is_code)) - 1
addr = ida_bytes.next_that(end, self.max_ea, idc.is_code)
if (data < addr):
data = ida_bytes.next_that(addr, self.max_ea,
idc.is_data)
if (unknown < addr):
unknown = ida_bytes.next_unknown(addr, self.max_ea)
self.start_element(CODE_BLOCK)
self.write_address_attribute(START, start)
self.write_address_attribute(END, codeend)
self.close_tag()
self.end_element(CODE)
self.display_cpu_time(timer)
def export_comment(self, addr, cmt_type, cmt):
"""
Exports a <COMMENT> element with ADDRESS and TYPE attributes.
The comment is exported as the element text (parsed character data).
Args:
addr: Integers representing address of comment.
cmt_type: String indicating the comment type.
cmt: String containing the comment.
"""
self.start_element(COMMENT)
self.write_address_attribute(ADDRESS, addr)
self.write_attribute(TYPE, cmt_type)
self.close_tag(True)
# tag_remove seems to be losing last character
# work around is to add a space
cmt_text = ida_lines.tag_remove(cmt + ' ')
if sys.version_info.major >= 3:
self.write_text(cmt_text)
else:
self.write_text(cmt_text.decode('utf-8'))
self.end_element(COMMENT, False)
def export_comments(self):
"""
Exports all comments in the IDA database as <COMMENT> elements.
"""
addr = self.min_ea
if not ida_bytes.has_cmt(idc.get_full_flags(addr)):
addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_cmt)
if (addr == BADADDR):
return
self.update_status(COMMENTS)
timer = time.clock()
self.start_element(COMMENTS, True)
while (addr != BADADDR):
cmt = idc.get_cmt(addr, False)
if (cmt is not None):
self.export_comment(addr, "end-of-line", cmt)
cmt = idc.get_cmt(addr, True)
if (cmt is not None):
self.export_comment(addr, "repeatable", cmt)
addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_cmt)
addr = self.min_ea
if not ida_bytes.has_extra_cmts(idc.get_full_flags(addr)):
addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_extra_cmts)
while (addr != BADADDR):
extra = idc.get_extra_cmt(addr, idc.E_PREV)
if (extra is not None):
self.export_extra_comment(addr, "pre", idc.E_PREV)
extra = idc.get_extra_cmt(addr, idc.E_NEXT)
if (extra is not None):
self.export_extra_comment(addr, "post", idc.E_NEXT)
addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_extra_cmts)
self.export_c_comments()
self.end_element(COMMENTS)
self.display_cpu_time(timer)
def export_data(self):
"""
Exports the data items in the database as <DEFINED_DATA> elements.
"""
addr = self.min_ea
if not idc.is_data(idc.get_full_flags(addr)):
addr = ida_bytes.next_that(addr, self.max_ea, idc.is_data)
if (addr == BADADDR):
return
timer = time.clock()
self.update_status(DATA)
self.start_element(DATA, True)
while (addr != BADADDR):
f = idc.get_full_flags(addr)
if ida_bytes.is_align(f):
addr = ida_bytes.next_that(addr, self.max_ea, idc.is_data)
continue
dtype = self.get_datatype(addr)
size = idc.get_item_size(addr)
ti = ida_nalt.opinfo_t()
msize = ida_bytes.get_data_elsize(addr, f, ti)
if ida_bytes.is_struct(f):
s = idc.get_struc_id(dtype)
msize = t if (t := idc.get_struc_size(s)) is not None else ida_typeinf.BADSIZE
if msize == 0:
msize = 1
if not idc.is_strlit(f) and size != msize:
dtype = "%s[%d]" % (dtype, size//msize)
self.start_element(DEFINED_DATA)
self.write_address_attribute(ADDRESS, addr)
self.write_attribute(DATATYPE, dtype)
self.write_numeric_attribute(SIZE, size*self.cbsize)
#TODO consider using GetTrueNameEx and Demangle
demangled = ida_name.get_demangled_name(addr,
DEMANGLED_TYPEINFO, ida_ida.inf_get_demnames(), idc.GN_STRICT)
outbuf = ''
# TODO: How to handle print_type for data mangled names?
#outbuf = idaapi.print_type(addr, False)
if demangled == "'string'":
demangled = None
has_typeinfo = ((demangled is not None and len(demangled) > 0) or
(outbuf is not None and len(outbuf) > 0))
#TODO export_data: add DISPLAY_SETTINGS
self.close_tag(has_typeinfo)
if has_typeinfo:
if demangled is not None and len(demangled) > 0:
self.export_typeinfo_cmt(demangled)
elif len(outbuf) > 0:
self.export_typeinfo_cmt(outbuf)
self.end_element(DEFINED_DATA)
addr = ida_bytes.next_that(addr, self.max_ea, idc.is_data)
self.end_element(DATA)
self.display_cpu_time(timer)
def export_datatypes(self):
"""
Exports the structures and enums in IDA database.
"""
# skip if no structures/unions to export
if get_struc_qty() == 0: return
self.update_status(DATATYPES)
timer = time.clock()
self.start_element(DATATYPES, True)
self.export_structures()
self.export_enums()
self.end_element(DATATYPES)
self.display_cpu_time(timer)
def export_enum_member(self, cid: int, bf: bool, radix: int, signness) -> None:
"""
Exports a member of an enum.
Args:
cid: Integer representing id of enum member
bf: Boolean indicates if a bitfield
radix: Integer representing numeric display format
signness: Boolean indicating if signed value
"""
cname = idc.get_enum_member_name(cid)
if cname is None or len(cname) == 0:
return
regcmt = idc.get_enum_member_cmt(cid, False)
rptcmt = idc.get_enum_member_cmt(cid, True)
has_comment = regcmt is not None
self.start_element(ENUM_ENTRY)
self.write_attribute(NAME, cname)
value = idc.get_enum_member_value(cid)
self.write_numeric_attribute(VALUE, value, radix, signness)
# BIT_MASK attribute not currently supported for ENUM_ENTRY
#if bf:
# self.write_numeric_attribute(BIT_MASK, mask)
self.close_tag(has_comment)
if regcmt is not None and len(regcmt) > 0:
self.export_regular_cmt(regcmt)
if rptcmt is not None and len(rptcmt) > 0:
self.export_repeatable_cmt(rptcmt)
if (has_comment):
self.end_element(ENUM_ENTRY)
def export_enum_members(self, eid: int, bf: bool, eflags: int) -> None:
"""
Exports the members of an enum.
This function can only be called by IDA versions newer than 6.3
Args:
eid: Integer representing id of enum
bf: Boolean indicates if a bitfield
eflags: Integer representing the enum flags
"""
mask = idc.get_first_bmask(eid) if bf else 0xFFFFFFFF
while mask != -1:
# ENUM BIT_MASK exporting not currently supported
#self.export_bitmask(eid, mask)
mask = idc.get_next_bmask(eid, mask)
for idx in range(idc.get_enum_size(eid)):
cid = get_enum_member_tid(eid, idx)
if cid == BADADDR:
break
self.export_enum_member(cid, bf,
ida_bytes.get_radix(eflags, 0),
self.is_signed_data(eflags))
def export_enum_reference(self, addr: int, op: int) -> None:
"""
Exports the enum reference for an operand at an address.
Args:
addr: Integer representing the instruction address.
op: Integer representing the operand index (0-based)
"""
(eid, serial) = ida_bytes.get_enum_id(addr, op)
insn = ida_ua.insn_t()
ida_ua.decode_insn(insn, addr)
value = insn.ops[op].value
cid = BADNODE
last = idc.get_last_bmask(eid)
if idc.is_bf(eid):
last = idc.get_last_bmask(eid)
mask = idc.get_first_bmask(eid)
while cid == BADNODE:
cid = idc.get_enum_member(eid, (value & mask), 0, mask)
if cid != BADNODE or mask == last:
break
mask = idc.get_next_bmask(eid, mask)
else:
cid = idc.get_enum_member(eid, value, 0, last)
if cid == BADNODE:
return
self.start_element(EQUATE_REFERENCE)
self.write_address_attribute(ADDRESS, addr)
self.write_numeric_attribute(OPERAND_INDEX, op, 10)
self.write_numeric_attribute(VALUE, idc.get_enum_member_value(cid))
cname = idc.get_enum_member_name(cid)
if cname is not None and len(cname) > 0:
self.write_attribute(NAME, cname)
if idc.is_bf(eid):
self.write_numeric_attribute("BIT_MASK", mask)
self.close_tag()
def export_enum_references(self, addr):
"""
Finds and exports enum references at an address.
Args:
addr: Integer representing the instruction address.
"""
f = idc.get_full_flags(addr)
for op in range(2):
if ida_bytes.is_enum(f, op):
self.export_enum_reference(addr, op)
def export_enums(self):
"""
Exports enumerations.
"""
for eid in _iter_enum_ids():
self.start_element(ENUM)
ename = idc.get_enum_name(eid)
if (ename is None or len(ename) == 0):
continue
self.write_attribute(NAME, ename)
ewidth = idc.get_enum_width(eid)
if ewidth != 0 and ewidth <= 64:
self.write_numeric_attribute(SIZE, ewidth, 10)
eflags = idc.get_enum_flag(eid)
bf = idc.is_bf(eid)
# BIT_FIELD attribute not supported for ENUM export
#if bf:
# self.write_attribute(BIT_FIELD, "yes")
regcmt = idc.get_enum_cmt(eid)
tif = ida_typeinf.tinfo_t()
tif.get_type_by_tid(tid=eid)
rptcmt = tif.get_type_rptcmt()
has_children = ((idc.get_enum_size(eid) > 0) or
(regcmt is not None) or (rptcmt is not None) or
(ida_bytes.get_radix(eflags, 0) != 16) or
(self.is_signed_data(eflags)))
self.close_tag(has_children)
if (ida_bytes.get_radix(eflags, 0) != 16 or
self.is_signed_data(eflags)):
self.start_element(DISPLAY_SETTINGS)
if ida_bytes.get_radix(eflags, 0) != 16:
self.write_attribute(FORMAT, self.get_format(eflags))
if self.is_signed_data(eflags):
self.write_attribute(SIGNED, "yes")
self.close_tag()
if regcmt is not None:
self.export_regular_cmt(regcmt)
if rptcmt is not None:
self.export_repeatable_cmt(rptcmt)
self.export_enum_members(eid, bf, eflags)
if (has_children):
self.end_element(ENUM)
def export_extra_comment(self, addr, cmt_type, extra):
"""
Exports pre- and post- comments for an address.
Args:
addr: Integer representing the instruction address.
cmt_type: String indicating comment type
extra: Integer representing extra comment index
"""
cmt = ''
nextline = idc.get_extra_cmt(addr, extra)
while (nextline is not None):
# workaround for tag_remove bug is to add space
cmt += ida_lines.tag_remove(nextline + ' ')
extra += 1
nextline = idc.get_extra_cmt(addr, extra)
if (nextline is not None):
cmt += '\n'
self.export_comment(addr, cmt_type, cmt)
def export_functions(self):
"""
Exports information about all functions.
"""
functions = idautils.Functions()
if functions is None:
return
self.update_status(FUNCTIONS)
timer = time.clock()
self.start_element(FUNCTIONS, True)
for addr in functions:
function = ida_funcs.get_func(addr)
if ida_segment.is_spec_ea(function.start_ea):
continue
self.start_element(FUNCTION)
self.write_address_attribute(ENTRY_POINT, function.start_ea)
if ida_bytes.has_user_name(idc.get_full_flags(addr)):
name = self.get_symbol_name(addr)
if name is not None and len(name) > 0:
self.write_attribute(NAME, name)
if function.flags & idc.FUNC_LIB != 0:
self.write_attribute(LIBRARY_FUNCTION, "y")
self.close_tag(True)
fchunks = idautils.Chunks(addr)
for (startEA, endEA) in fchunks:
self.start_element(ADDRESS_RANGE)
self.write_address_attribute(START, startEA)
self.write_address_attribute(END, endEA-1)
self.close_tag()
regcmt = ida_funcs.get_func_cmt(function, False)
if regcmt is not None:
self.export_regular_cmt(regcmt)
rptcmt = ida_funcs.get_func_cmt(function, True)
if rptcmt is not None:
self.export_repeatable_cmt(rptcmt)
demangled = ida_name.get_demangled_name(addr,
DEMANGLED_TYPEINFO,
ida_ida.inf_get_demnames(), True)
if demangled is not None and demangled == "'string'":
demangled = None
outbuf = ''
# TODO: How to handle print_type for function typeinfo cmts
#outbuf = idaapi.print_type(addr, False)
has_typeinfo = (demangled is not None or (outbuf is not None and
len(outbuf) > 0))
if demangled is not None:
self.export_typeinfo_cmt(demangled)
elif has_typeinfo:
self.export_typeinfo_cmt(outbuf[:-1])
self.export_stack_frame(function)
self.end_element(FUNCTION)
self.end_element(FUNCTIONS)
self.display_cpu_time(timer)
def export_manual_instruction(self, addr):
"""
Exports user-entered "manual instruction" at an address.
Args:
addr: Integer representing instruction address.
"""
text = idc.get_manual_insn(addr)
if text is None or len(text) == 0:
return
self.start_element(MANUAL_INSTRUCTION)
self.write_address_attribute(ADDRESS, addr)
self.close_tag(True)
self.write_text(text)
self.end_element(MANUAL_INSTRUCTION, False)
def export_manual_operand(self, addr):
"""
Exports user-entered "manual operands" at an address.
Args:
addr: Integer representing instruction address.
"""
for op in range(ida_ida.UA_MAXOP):
if ida_bytes.is_forced_operand(addr, op):
text = idc.get_forced_operand(addr, op)
if text is not None and len(text) > 0:
self.start_element(MANUAL_OPERAND)
self.write_address_attribute(ADDRESS, addr)
self.write_numeric_attribute(OPERAND_INDEX, op, 10)
self.close_tag(True)
self.write_text(text)
self.end_element(MANUAL_OPERAND, False)
def export_markup(self):
"""
Exports markup for instructions and data items including references
and manual instructions and operands.
"""
self.update_status(MARKUP)
timer = time.clock()
self.start_element(MARKUP, True)
addr = self.min_ea
while addr != BADADDR:
f = idc.get_full_flags(addr)
if self.options.MemoryReferences.checked:
if ida_bytes.has_xref(f):
self.export_user_memory_reference(addr)
if ida_bytes.is_off(f, ida_bytes.OPND_ALL):
self.export_memory_references(addr)
if (self.options.Functions.checked and
self.options.StackReferences.checked and
ida_bytes.is_stkvar(f, ida_bytes.OPND_ALL)):
self.export_stack_reference(addr)
if (self.options.DataTypes.checked and
ida_bytes.is_enum(f, ida_bytes.OPND_ALL)):
self.export_enum_references(addr)
if self.options.Manual.checked:
# TODO: Ask about OPND_ALL and retrieving additional manual operands
#if ida_bytes.is_forced_operand(addr, ida_bytes.OPND_ALL):
if (ida_bytes.is_forced_operand(addr, 0) or
ida_bytes.is_forced_operand(addr, 1)):
self.export_manual_operand(addr)
if ida_bytes.is_manual_insn(addr):
self.export_manual_instruction(addr)
addr = idc.next_head(addr, self.max_ea)
self.end_element(MARKUP)
self.display_cpu_time(timer)
def export_members(self, s: ida_typeinf.tinfo_t):
"""
Exports the members of a structure or union.
Args:
s: IDA structure/union instance
"""
nmembers = s.get_udt_nmembers()
for n in range(nmembers):
m = get_member_by_idx(s.get_tid(), n)
if m is None:
continue
offset = m.offset//8
if s.is_union():
offset = 0
self.start_element(MEMBER)
self.write_numeric_attribute(OFFSET, offset)
mname = m.name
if len(mname) > 0:
self.write_attribute(NAME, mname)
dtype = self.get_member_type(m)
if m.type.is_varmember():
msize = 0
size = 0
else:
mtibuf = ida_nalt.opinfo_t()
mti, size, _, mtibuf, _ = ida_typeinf.get_idainfo_by_type(m.type)
#if IDA_SDK_VERSION < 640:
# msize = idaapi.get_type_size0(None, dtype)
# if msize is None or msize == 0:
# msize = ida_struct.get_member_size(m)
#else:
#msize = idaapi.get_data_type_size(m.flag, mtibuf)
msize = m.type.get_array_element().get_size() if m.type.is_array() else size
msize = ida_typeinf.BADSIZE if msize is None else msize
if size < msize: size = msize
if (size != msize):
arraytype = self.get_member_type(m)
dtype = "%s[%d]" % (arraytype, size//msize)
self.write_attribute(DATATYPE, dtype)
self.write_numeric_attribute(SIZE, size*self.cbsize)
regcmt = m.cmt if m.is_regcmt() else None
rptcmt = m.cmt if not m.is_regcmt() else None
hascmt = regcmt is not None or rptcmt is not None
self.close_tag(hascmt)
if (hascmt):
if regcmt is not None:
self.export_regular_cmt(regcmt)
if rptcmt is not None:
self.export_repeatable_cmt(rptcmt)
self.end_element(MEMBER)
def export_memory_contents(self, binfilename, binfile, start, end):
"""
Exports the binary memory contents in the database.
A MEMORY_CONTENTS element is generated for each contiguous address
range where each address in the range contains a value.
The binary values are store in a separate file (not the XML file),
and the MEMORY_CONTENTS element identifies the file and the
offset in the file where the address range is located.
Args:
binfilename: String containing the absolute filepath
binfile: IDA file instance for binary file
start: Integer representing the starting address
end: Integer representing the ending address
"""
length = 0
startaddr = start
for addr in range(start, end):
# reset start address when length == 0
if (length == 0):
startaddr = addr
has_val = ida_bytes.has_value(idc.get_full_flags(addr))
if has_val:
length += self.cbsize
next_address = idc.next_addr(addr)
if ((not has_val) or (next_address != addr+1) or
(next_address == end)):
if length > 0:
offset = binfile.tell()
ida_loader.base2file(binfile.get_fp(), offset, startaddr,
startaddr+length)
self.start_element(MEMORY_CONTENTS)
self.write_address_attribute(START_ADDR, startaddr)
self.write_attribute(FILE_NAME, binfilename)
self.write_numeric_attribute(FILE_OFFSET, offset)
self.write_numeric_attribute(LENGTH, length)
self.close_tag(False)
length=0
def export_memory_map(self):
"""
Exports information about all memory blocks in the database.
A MEMORY_SECTION is generated for each block (segment). If the
memory block is initialized (has values), the contents are exported
using the MEMORY_CONTENTS element.
"""
nsegs = ida_segment.get_segm_qty()
if (nsegs == 0):
return
self.update_status(MEMORY_MAP)
timer = time.clock()
binfilename = ''
if (self.options.MemoryContent.checked):
(binfilename, ext) = os.path.splitext(self.filename)
binfilename += ".bytes"
self.binfile = ida_fpro.qfile_t()
self.binfile.open(binfilename,'wb')
self.start_element(MEMORY_MAP, True)
for i in range(nsegs):
self.export_memory_section(ida_segment.getnseg(i), binfilename)
self.end_element(MEMORY_MAP)
if (self.options.MemoryContent.checked):
self.close_binfile()
self.display_cpu_time(timer)
def export_memory_reference(self, addr, op):
"""
Exports the memory reference for operand at the address.
Args:
addr: Integer representing the instruction address.
op: Integer representing the operand index (0-based)
"""
f = idc.get_full_flags(addr)
ri = ida_nalt.refinfo_t()
if ida_nalt.get_refinfo(ri, addr, op) == 1:
if ri.target != BADADDR:
target = ri.target
elif idc.is_code(f):
insn = ida_ua.insn_t()
ida_ua.decode_insn(insn, addr)
target = (insn.ops[op].value - ri.tdelta + ri.base) & ((1 << 64) - 1)
elif idc.is_data(f):
target = (self.get_data_value(addr) - ri.tdelta + ri.base) & ((1 << 64) - 1)
else:
return
else:
return
if not ida_bytes.is_mapped(target):
return
self.start_element(MEMORY_REFERENCE)
self.write_address_attribute(ADDRESS, addr)
self.write_numeric_attribute(OPERAND_INDEX, op, 10)
self.write_address_attribute(TO_ADDRESS, target)
self.write_attribute(PRIMARY, "y")
self.close_tag()
def export_memory_references(self, addr):
"""
Exports the memory references for any operands at the address.
Args:
addr: Integer representing the instruction address.
"""
f = idc.get_full_flags(addr)
for op in range(ida_ida.UA_MAXOP):
if ida_bytes.is_off(f, op) and (idc.is_data(f) or
(idc.is_code(f) and
self.is_imm_op(addr, op))):
self.export_memory_reference(addr, op)
def export_memory_section(self, seg, binfilename):
"""
Exports segment information as a MEMORY_SECTIONS element.
Args:
seg: IDA segment instance
binfilename: String containing absolute filepath for binary file.
"""
segname = ida_segment.get_segm_name(seg)
self.start_element(MEMORY_SECTION)
self.write_attribute(NAME, segname)
self.write_address_attribute(START_ADDR, seg.start_ea)
length = (seg.end_ea - seg.start_ea)*self.cbsize
self.write_numeric_attribute(LENGTH, length)
perms = ""
if (seg.perm != 0):
if (seg.perm & ida_segment.SEGPERM_READ != 0):
perms += 'r'
if (seg.perm & ida_segment.SEGPERM_WRITE != 0):
perms += 'w'
if (seg.perm & ida_segment.SEGPERM_EXEC != 0):
perms += 'x'
if (len(perms) > 0):
self.write_attribute(PERMISSIONS, perms)
has_contents = (self.options.MemoryContent.checked and
self.check_if_seg_contents(seg))
self.close_tag(has_contents)
if (has_contents):
self.export_memory_contents(os.path.basename(binfilename),
self.binfile, seg.start_ea, seg.end_ea)
self.end_element(MEMORY_SECTION)
def export_program(self):
"""
Exports basic information about the program as the PROGRAM,
INFO_SOURCE, PROCESSOR, and COMPILER elements.
"""
# output the PROGRAM element
self.update_status(PROGRAM)
timer = time.clock()
self.start_element(PROGRAM)
self.write_attribute(NAME, idc.get_root_filename())
self.write_attribute(EXE_PATH, idc.get_input_file_path())
etype = ida_loader.get_file_type_name()
if (len(etype) > 0):
self.write_attribute(EXE_FORMAT, etype)
# check for presence of INPUT_MD5 netnode
md5 = ida_netnode.netnode(INPUT_MD5)
if md5 == BADNODE:
input_md5 = idc.retrieve_input_file_md5()
else:
input_md5 = md5.supval(ida_nalt.RIDX_MD5)
if input_md5 is not None:
self.write_attribute(INPUT_MD5,input_md5)
self.close_tag(True)
# output the INFO_SOURCE element
self.start_element(INFO_SOURCE)
tool = 'IDA-Pro ' + ida_kernwin.get_kernel_version()
tool += ' XML plugin v' + IDAXML_VERSION + ' (Python) SDK ' + str(IDA_SDK_VERSION)
self.write_attribute(TOOL, tool)
user = os.getenv("USERNAME", "UNKNOWN")
if (user == "UNKNOWN"):
user = os.getenv("USER", "UNKNOWN")
self.write_attribute(USER, user)
self.write_attribute(FILE, idc.get_idb_path())
ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
self.write_attribute(TIMESTAMP, ts)
self.close_tag()
# output the PROCESSOR element
self.start_element(PROCESSOR)
self.write_attribute(NAME, ida_ida.inf_get_procname())
if ida_ida.inf_is_be():
byte_order = "big"
else:
byte_order = "little"
self.write_attribute(ENDIAN, byte_order)
self.seg_addr = False
bitness = 1
model_warning = False
nsegs = ida_segment.get_segm_qty()
if (nsegs > 0):
bitness = ida_segment.getnseg(0).bitness
for i in range(1,nsegs):
seg = ida_segment.getnseg(i)
if (seg.bitness != bitness):
model_warning = True
if (seg.bitness > bitness):
bitness = seg.bitness
addr_model = "32-bit"
if (bitness == 0):
addr_model = "16-bit"
elif (bitness == 2):
addr_model = "64-bit"
self.write_attribute(ADDRESS_MODEL, addr_model)
self.close_tag()
if (model_warning):
idc.msg("WARNING: Segments do not have same " +
"addressing model!\n")
if (ida_idp.ph.id == ida_idp.PLFM_386 and bitness == 0):
self.seg_addr = True
# find any overlayed memory before processing addressable items
self.find_overlay_memory()
# output compiler info
self.start_element(COMPILER)
self.write_attribute(NAME, ida_typeinf.get_compiler_name(ida_ida.inf_get_cc_id()))
self.close_tag()
self.display_cpu_time(timer)
def export_program_entry_points(self):
"""
Exports entry points for the program.
"""
nepts = idc.get_entry_qty()
if (nepts == 0):
return
self.update_status(PROGRAM_ENTRY_POINTS)
timer = time.clock()
self.start_element(PROGRAM_ENTRY_POINTS, True)
for i in range(nepts):
self.start_element(PROGRAM_ENTRY_POINT)
addr = idc.get_entry(idc.get_entry_ordinal(i))
self.write_address_attribute(ADDRESS, addr)
self.close_tag()
self.end_element(PROGRAM_ENTRY_POINTS)
self.display_cpu_time(timer)
def export_register_values(self):
"""
Exports segment register value ranges.
"""
first = ida_idp.ph_get_reg_first_sreg()
last = ida_idp.ph_get_reg_last_sreg() + 1
has_segregareas = False
for j in range(first, last):
nsegregareas = ida_segregs.get_sreg_ranges_qty(j)
if nsegregareas != 0:
has_segregareas = True
break
if not has_segregareas:
return
self.update_status(REGISTER_VALUES)
timer = time.clock()
self.start_element(REGISTER_VALUES, True)
sr = ida_segregs.sreg_range_t()
for j in range(first, last):
nsegregareas = ida_segregs.get_sreg_ranges_qty(j)
if nsegregareas == 0:
continue
for i in range(nsegregareas):
success = ida_segregs.getn_sreg_range(sr, j, i)
if not success:
continue
value = sr.val
if value == idc.BADSEL:
continue
regname = ida_idp.ph.regnames[j]
if regname is None:
continue
if regname.lower() == "cs":
continue
if (ida_idp.ph.id == ida_idp.PLFM_TMS and
regname.lower() == "ds"):
continue
self.start_element(REGISTER_VALUE_RANGE)
self.write_attribute(REGISTER, ida_idp.ph.regnames[j])
self.write_numeric_attribute(VALUE, value)
self.write_address_attribute(START_ADDRESS, sr.start_ea)
length = (sr.end_ea - sr.start_ea) * self.cbsize
self.write_numeric_attribute(LENGTH, length)
self.close_tag()
self.end_element(REGISTER_VALUES)
self.display_cpu_time(timer)
def export_regular_cmt(self, cmt: str) -> None:
"""
Exports the regular comment for an item.
Args:
cmt: String containing the regular comment.
"""
self.write_comment_element(REGULAR_CMT, cmt)
def export_repeatable_cmt(self, cmt: str) -> None:
"""
Exports the repeatable comment for an item.
Args:
cmt: String containing the repeatable comment.
"""
self.write_comment_element(REPEATABLE_CMT, cmt)
def export_stack_frame(self, function: ida_funcs.func_t) -> None:
"""
Export information about a function stack frame including
variables allocated on the stack.
Args:
function: IDA function instance
"""
sframe = get_struc(function.frame)
if sframe is None or sframe.get_udt_nmembers() <= 0:
return
self.start_element(STACK_FRAME)
self.write_numeric_attribute(LOCAL_VAR_SIZE, function.frsize)
self.write_numeric_attribute(REGISTER_SAVE_SIZE, function.frregs)
retsize = ida_frame.get_frame_retsize(function)
self.write_numeric_attribute(RETURN_ADDR_SIZE, retsize)
self.write_numeric_attribute(BYTES_PURGED, function.argsize)
has_stack_vars = self.check_stack_frame(sframe)
self.close_tag(has_stack_vars)
if has_stack_vars:
self.export_stack_vars(function, sframe)
self.end_element(STACK_FRAME)
def export_stack_reference(self, addr):
"""
Exports references to stack variables at the address.
Args:
addr: Integer containing instruction address.
"""
f = idc.get_full_flags(addr)
for op in range(ida_ida.UA_MAXOP):
if idc.is_code(f) and ida_bytes.is_stkvar(f, op):
insn = ida_ua.insn_t()
ida_ua.decode_insn(insn, addr)
opnd = insn.ops[op]
# TODO:How to handle opnd.type for stack references
optype = opnd.type
if optype == idc.o_void:
continue
# TODO:How to handle op_t_get_addr for stack references
svidx = ida_typeinf.tinfo_t().get_stkvar(insn, opnd, opnd.value)
if svidx == -1:
continue
function = ida_funcs.get_func(addr)
self.start_element(STACK_REFERENCE)
self.write_address_attribute(ADDRESS, addr)
self.write_numeric_attribute(OPERAND_INDEX, op, 10)
offset = opnd.addr
spoff = offset - function.frregs
if offset > 0x7FFFFFFF:
offset -= 0x100000000
if spoff > 0x7FFFFFFF:
spoff -= 0x100000000
self.write_numeric_attribute(STACK_PTR_OFFSET, spoff,
16, True)
if (function.flags & idc.FUNC_FRAME) != 0:
self.write_numeric_attribute(FRAME_PTR_OFFSET,
offset, 16, True)
self.close_tag()
def export_stack_vars(self, function: ida_funcs.func_t, sframe: ida_typeinf.tinfo_t):
"""
Exports the stack variables (parameters and locals) in a stack frame.
Args:
function: IDA function instance.
sframe: IDA stack frame instance.
"""
for i in range(sframe.get_udt_nmembers()):
member = get_member_by_idx(sframe.get_tid(), i)
if member is None:
continue
mname = member.name
if mname is None or len(mname) < 0:
continue
if mname == " s" or mname == " r":
continue
spoff = member.offset//8 - function.frsize - function.frregs
froff = member.offset//8 - function.frsize
self.start_element(STACK_VAR)
self.write_numeric_attribute(STACK_PTR_OFFSET, spoff, 16, True)
if function.flags & idc.FUNC_FRAME != 0:
self.write_numeric_attribute(FRAME_PTR_OFFSET, froff, 16, True)
pre = mname[0:4]
if pre != "var_" and pre != "arg_":
self.write_attribute(NAME, mname)
_, size, f, _, _ = ida_typeinf.get_idainfo_by_type(member.type)
mtype = self.get_member_type(member)
msize = size
if idc.is_struct(f):
msize = idc.get_struc_id(mtype)
elif not idc.is_strlit(f):
mtibuf = ida_nalt.opinfo_t()
mti, _, _, mtibuf, _ = ida_typeinf.get_idainfo_by_type(member.type)
#msize = idaapi.get_data_type_size(f, mtibuf)
msize = member.type.get_array_element().get_size() if member.type.is_array() else size
msize = ida_typeinf.BADSIZE if msize is None else msize
if size < msize: size = msize
if (not idc.is_strlit(f) and not ida_bytes.is_align(f)
and size != msize):
mtype = "%s[%d]" % (mtype, size//msize)
self.write_attribute(DATATYPE, mtype)
self.write_numeric_attribute(SIZE, size*self.cbsize)
regcmt = member.cmt if member.is_regcmt() else None
rptcmt = member.cmt if not member.is_regcmt() else None
if regcmt is not None:
regcmt = ida_lines.tag_remove(regcmt + " ")
if rptcmt is not None:
rptcmt = ida_lines.tag_remove(rptcmt + " ")
has_regcmt = regcmt is not None and len(regcmt) > 0
has_rptcmt = rptcmt is not None and len(rptcmt) > 0
has_content = has_regcmt or has_rptcmt
self.close_tag(has_content)
if has_content:
if has_regcmt:
self.export_regular_cmt(regcmt)
if has_rptcmt:
self.export_repeatable_cmt(rptcmt)
self.end_element(STACK_VAR)
def export_structures(self):
"""
Exports information about all structures and unions.
"""
structs = idautils.Structs()
for struct in structs:
(oridinal, sid, sname) = struct
if sname is None:
# Skip unnamed structs. Alternatively the exporter could
# generate a unique temporary name.
continue
s = get_struc(sid)
if s is None:
continue
stype = UNION if s.is_union() else STRUCTURE
self.start_element(stype)
self.write_attribute(NAME, sname)
size = idc.get_struc_size(sid)*self.cbsize
self.write_numeric_attribute(SIZE, size)
if s.is_varstruct():
self.write_attribute(VARIABLE_LENGTH, "y")
regcmt = s.get_type_cmt()
rptcmt = s.get_type_rptcmt()
has_contents = regcmt is not None or rptcmt is not None or s.get_udt_nmembers() > 0
self.close_tag(has_contents)
if (has_contents):
if regcmt is not None:
self.export_regular_cmt(regcmt)
if rptcmt is not None:
self.export_repeatable_cmt(rptcmt)
if s.get_udt_nmembers() > 0:
self.export_members(s)
self.end_element(stype)
def export_symbol(self, addr: int, name: str, stype: str="") -> None:
"""
Exports name for an address as a SYMBOL element. If the name is a
demangled name, add the mangled name as the MANGLED attribute.
Args:
addr: Integer representing the symbol address.
name: String containing the symbol name.
stype: String indicating symbol type (global or local)
"""
self.start_element(SYMBOL)
self.write_address_attribute(ADDRESS, addr)
self.write_attribute(NAME, name)
self.write_attribute(TYPE, stype)
mangled = idc.get_name(addr, idc.GN_STRICT)
if name is not None and mangled != name:
self.write_attribute("MANGLED", mangled)
self.close_tag()
def export_symbol_table(self):
"""
Exports user-defined and non-default names as SYMBOL elements.
"""
addr = self.min_ea
if not ida_bytes.has_any_name(idc.get_full_flags(addr)):
addr = ida_bytes.next_that(addr, self.max_ea, ida_bytes.has_any_name)
if addr == BADADDR:
return
self.update_status(SYMBOL_TABLE)
self.start_element(SYMBOL_TABLE, True)
timer = time.clock()
while addr != BADADDR:
# only export meaningful names (user and auto)
f = idc.get_full_flags(addr)
if (ida_bytes.has_user_name(f) or
ida_bytes.has_auto_name(f)):
# check for global name
name = self.get_symbol_name(addr)
if name is not None and len(name) > 0:
self.export_symbol(addr, name)
# check for local name
if ida_nalt.has_lname(addr):
name = idc.get_name(addr, idc.GN_LOCAL)
if name is not None and len(name) > 0:
self.export_symbol(addr, name, 'local')
# get next address with any name
addr = ida_bytes.next_that(addr, self.max_ea,
ida_bytes.has_any_name)
self.end_element(SYMBOL_TABLE)
self.display_cpu_time(timer)
def export_typeinfo_cmt(self, cmt):
"""
Exports comment containing type information for data and functions.
Args:
cmt: String containing type info.
"""
# older versions of IDAPython returned a '\n' at end of cmt
if(len(cmt) > 0):
while cmt[-1] == '\n':
cmt = cmt[:-1]
self.write_comment_element(TYPEINFO_CMT, cmt)
def export_user_memory_reference(self, addr):
"""
Exports a user-specified memory reference at the address.
Args:
addr: Integer representing the instruction address.
"""
for xref in idautils.XrefsTo(addr, ida_xref.XREF_FAR):
if xref.user == 1:
self.start_element(MEMORY_REFERENCE)
self.write_address_attribute(ADDRESS, xref.frm)
self.write_address_attribute(TO_ADDRESS, xref.to)
self.write_attribute(USER_DEFINED, "y")
self.close_tag()
def find_overlay_memory(self) -> None:
"""
Determines if any memory blocks (segments) are overlays.
A segment is an overlay if it translates to the same logical
address as another segment. This is rare, but may occur, for
example when a processor has a small logical address space
(i.e. a 16-bit address is limited to 64K) and multiple physical
segments are mapped into the same logical segment.
"""
self.overlay = dict()
self.has_overlays = False
nsegs = ida_segment.get_segm_qty()
if nsegs == 0:
return
s = ida_segment.getnseg(0)
start = self.translate_address(s.start_ea)
self.overlay[start] = False
for i in range(1, nsegs):
s = ida_segment.getnseg(i)
space = self.get_space_name(s.start_ea)
saddr = self.translate_address(s.start_ea)
eaddr = self.translate_address(s.end_ea-1)
is_overlay = False
for j in range(i):
s2 = ida_segment.getnseg(j)
space2 = self.get_space_name(s2.start_ea)
if space == space2:
start = self.translate_address(s2.start_ea)
end = self.translate_address(s2.end_ea - 1)
if ((saddr >= start and saddr <= end) or
(eaddr >= start and eaddr <= end)):
is_overlay = True
self.has_overlays = True
break
self.overlay[saddr] = is_overlay
def get_address_string(self, addr: int) -> str:
"""
Returns a string representing the address.
The representation is typically a hex string of the address,
but may include a segment or space name prefixe based on the
processor or architecture.
Args:
addr: Integer representing a program address.
"""
temp = "0x%X" % (addr - ida_segment.get_segm_base(ida_segment.getseg(addr)))
space = self.get_space_name(addr)
if space is not None:
temp = "%s:%04X" % (space,
addr - ida_segment.get_segm_base(ida_segment.getseg(addr)))
else:
if (ida_idp.ph_get_id() == ida_idp.PLFM_386 and
ida_segment.getseg(addr).bitness == 0):
base = ida_segment.get_segm_para(ida_segment.getseg(addr))
temp = "%04X:%04X" % (base, addr - (base << 4))
if ida_idp.ph_get_id() == ida_idp.PLFM_C166:
temp = "0x%X" % addr
if self.has_overlays and self.is_overlay(addr):
oname = ida_segment.get_segm_name(ida_segment.getseg(addr))
if len(oname) > 0:
temp = oname + "::" + temp
return temp
def get_data_value(self, addr) -> int:
"""
Returns the data item value at an address based on its size.
Args:
addr: Integer representing a program address.
"""
size = idc.get_item_size(addr)*self.cbsize
if size == 1: return ida_bytes.get_byte(addr)
if size == 2: return ida_bytes.get_16bit(addr)
if size == 4: return ida_bytes.get_32bit(addr)
if size == 8: return ida_bytes.get_64bit(addr)
return 0
def get_datatype(self, addr: int) -> str:
"""
Returns the datatype at an address.
The type could be a basic type (byte, word, dword, etc.),
a structure, an array, a pointer, or a string type.
Args:
addr: Integer representing a program address.
"""
f = idc.get_full_flags(addr)
t = self.get_type(f)
if ida_bytes.is_struct(f):
opndbuf = ida_nalt.opinfo_t()
opnd = ida_bytes.get_opinfo(opndbuf, addr, 0, f)
return idc.get_struc_name(opnd.tid)
if idc.is_strlit(f):
str_type = idc.get_str_type(addr)
#print(ida_bytes.print_strlit_type(str_type))
if str_type == ida_nalt.STRTYPE_TERMCHR: return "string"
if str_type == ida_nalt.STRTYPE_PASCAL: return "string1"
if str_type == ida_nalt.STRTYPE_LEN2: return "string2"
if str_type == ida_nalt.STRTYPE_LEN4: return "string4"
if str_type == ida_nalt.STRTYPE_C_16: return "unicode"
if str_type == ida_nalt.STRTYPE_C_16: return "unicode2"
if str_type == ida_nalt.STRTYPE_C_32: return "unicode4"
return "string"
if ida_bytes.is_off0(f): return "pointer"
return t
def get_format(self, flags: int) -> str:
"""
Returns the display format of a data item based on its flags.
Args:
flags: Integer representing IDA item flags
Returns:
String representing IDA display format.
"""
if ida_bytes.is_char0(flags): return "char"
radix = ida_bytes.get_radix(flags, 0)
if radix == 2: return "binary"
if radix == 8: return "octal"
if radix == 10: return "decimal"
return "hex" # default
def get_member_type(self, m: ida_typeinf.udm_t) -> str:
"""
Returns the datatype of a structure member.
Args:
m: IDA member instance.
Returns:
String representing member datatype.
"""
_, _, f, _, _ = ida_typeinf.get_idainfo_by_type(m.type)
t = self.get_type(f)
if ida_bytes.is_off0(f):
t = "pointer"
if not ida_bytes.is_struct(f):
return t
s = get_sptr(m)
if (s is None):
return t
sname = idc.get_struc_name(s.get_tid())
if (sname is None):
return t
return sname
def get_options(self):
"""
Displays the options menu and retrieves the option settings.
"""
fmt = "HELP\n"
fmt += "XML plugin (Python)\n"
fmt += "IDA SDK: "+ str(IDA_SDK_VERSION) + "\n"
fmt += "\n"
fmt += "The XML interface provides a dump of the IDA-Pro database as "
fmt += "a XML \"PROGRAM\" document. The XML PROGRAM document contains "
fmt += "information from the idb file in a readable text format, and "
fmt += "can be viewed with a text editor or web browser.\n\n"
fmt += "ENDHELP\n"
fmt += "Export as XML PROGRAM document...."
fmt += "\n <##Options##Memory Sections:{MemorySections}>"
fmt += "\n <Memory Content:{MemoryContent}>"
fmt += "\n <Segment Register Value Ranges:{RegisterValues}>"
fmt += "\n <Data Types:{DataTypes}>"
fmt += "\n <Code Blocks:{CodeBlocks}>"
fmt += "\n <Data Definitions:{DataDefinitions}>"
fmt += "\n <Comments:{Comments}>"
fmt += "\n <Entry Points:{EntryPoints}>"
fmt += "\n <Symbols:{Symbols}>"
fmt += "\n <Functions:{Functions}>"
fmt += "\n <Memory References:{MemoryReferences}>"
fmt += "\n <Stack References:{StackReferences}>"
fmt += "\n <Manual Instructions/Operands:{Manual}>{cGroup1}>"
fmt += "\n\n"
Opts = { 'cGroup1': ida_kernwin.Form.ChkGroupControl ((
"MemorySections",
"MemoryContent",
"RegisterValues",
"DataTypes",
"CodeBlocks",
"DataDefinitions",
"Comments",
"EntryPoints",
"Symbols",
"Functions",
"MemoryReferences",
"StackReferences",
"Manual"
))}
self.options = ida_kernwin.Form(fmt, Opts)
self.options.Compile()
self.options.MemorySections.checked = True
self.options.MemoryContent.checked = True
self.options.DataTypes.checked = True
self.options.RegisterValues.checked = True
self.options.CodeBlocks.checked = True
self.options.DataDefinitions.checked = True
self.options.Symbols.checked = True
self.options.EntryPoints.checked = True
self.options.Functions.checked = True
self.options.Comments.checked = True
self.options.MemoryReferences.checked = True
self.options.StackReferences.checked = False
self.options.Manual.checked = True
if (not self.autorun):
ok = self.options.Execute()
if (ok == 0):
raise Cancelled
def get_space_name(self, addr):
"""
Returns the memory space name associated with an address.
Args:
addr: Integer representing a program address.
Returns:
String containg the memory space name.
None if single address space architecture.
Used for Harvard architectures (Intel 8051 and TMS, add others
as needed).
"""
pid = ida_idp.ph_get_id()
stype = ida_segment.segtype(addr)
if pid == ida_idp.PLFM_8051:
if stype == idc.SEG_CODE:
return "CODE"
else:
if stype == idc.SEG_IMEM:
iaddr = addr - ida_segment.get_segm_base(ida_segment.getseg(addr))
if iaddr < 0x80:
return "INTMEM"
else:
return "SFR"
else:
return "EXTMEM"
if pid == ida_idp.PLFM_TMS:
if stype == idc.SEG_CODE:
return "CODE"
else:
return "DATA"
return None
def get_symbol_name(self, ea):
"""
Returns the symbol name for the address.
Args:
ea: Integer representing the symbol address.
Returns:
String containing the symbol name.
The demangled name will be returned if it exists, otherwise the
displayed name is returned. Spaces (' ') will be replaced with '_'.
"""
name = ida_name.get_demangled_name(ea, DEMANGLED_FORM,
ida_ida.inf_get_demnames(), idc.GN_STRICT)
if name is None or len(name) == 0 or name == "'string'":
name = idc.get_name(ea)
if name is not None:
name = name.replace(" ","_")
return name
def get_type(self, flags: int) -> str:
"""
Returns a datatype string based on the item flags.
Args:
flags: IDA item flags.
Returns:
String representing item datatype.
"""
if (self.cbsize == 2):
if ida_bytes.is_byte(flags) : return "word"
if ida_bytes.is_word(flags) : return "dword"
if ida_bytes.is_byte(flags) : return "byte"
if ida_bytes.is_word(flags) : return "word"
if ida_bytes.is_dword(flags) : return "dword"
if ida_bytes.is_qword(flags) : return "qword"
if ida_bytes.is_oword(flags) : return "oword"
if ida_bytes.is_tbyte(flags) : return "tbyte"
if ida_bytes.is_float(flags) : return "float"
if ida_bytes.is_double(flags) : return "double"
if ida_bytes.is_pack_real(flags): return "packed"
if idc.is_strlit(flags) : return "ascii"
if ida_bytes.is_struct(flags) : return "structure"
if ida_bytes.is_align(flags) : return "align"
return "unknown"
def is_imm_op(self, addr, op):
"""
Returns true if instruction operand at address is an immediate value.
Args:
addr: Integer representing instruction address.
op: Integer representing operand index (0-based).
Returns:
True if instruction operand at address is an immediate value.
False otherwise.
"""
insn = ida_ua.insn_t()
ida_ua.decode_insn(insn, addr)
return insn.ops[op].type == idc.o_imm
def is_overlay(self, addr):
"""
Checks if memory block (segment) is an overlay.
Args:
addr: Integer representing a program address.
Returns:
True if memory block (segment) is an overlay.
"""
if ida_idp.ph_get_id() == ida_idp.PLFM_C166:
return False
s = ida_segment.getseg(addr)
if s.startEA in self.overlay:
return self.overlay[s.startEA]
return False
def is_signed_data(self, flags: int) -> bool:
return (flags & ida_bytes.FF_SIGN) != 0
def start_element(self, tag, close=False):
"""
Outputs the start of a new element on a new indented line.
Args:
tag: String representing the element tag
close: Boolean indicating if tag is should be closed.
"""
if ida_kernwin.user_cancelled():
raise Cancelled
self.write_to_xmlfile("\n" + (" " * self.indent_level) + "<" + tag)
if (close):
self.close_tag(True)
self.update_counter(tag)
def translate_address(self, addr):
"""
Returns the translated logical address.
The logical address is adjusted for the segment base address.
For 16-bit segmented memory, return the 20-bit address.
Args:
addr: Integer representing a program address.
Returns:
Integer representing the logical address.
"""
if not self.seg_addr:
return addr - ida_segment.get_segm_base(ida_segment.getseg(addr))
base = ida_segment.get_segm_para(ida_segment.getseg(addr))
return (base << 16) + (addr - (base << 4))
def write_address_attribute(self, name, addr):
"""
Outputs an address attribute for an element.
Args:
name: String representing attribute name.
addr: Integer representing a program address.
"""
self.write_attribute(name, self.get_address_string(addr))
def write_attribute(self, name, value):
"""
Outputs an attribute (name and value) for an element.
Args:
name: String representing attribute name.
value: String representing attribute value.
"""
if name is None or value is None:
return
if (len(name) == 0) or (len(value) == 0):
return
attr = " " + name + '="' + self.check_for_entities(value) + '"'
self.write_to_xmlfile(attr)
def write_comment_element(self, name, cmt):
"""
Outputs the tag and text for a comment element.
Comment elements can be REGULAR_CMT, REPEATABLE_CMT, or TYPEINFO_CMT.
Args:
name: String representing the comment element name.
cmt: String containing the comment.
"""
self.start_element(name, True)
self.write_text(cmt)
self.end_element(name, False)
def write_numeric_attribute(self, name, value, base=16, signedhex=False):
"""
Outputs a numeric value attribute (name and value) for an element.
Args:
name: String representing the attribute name.
value: Integer representing the attribute value.
base: Integer representing numeric base to use for value.
signedhex: Boolean indicating if hex representation of
value is signed.
"""
if base == 10:
temp = "%d" % value
else:
if signedhex and value < 0:
temp = "-0x%X" % abs(value)
else:
temp = "0x%X" % value
self.write_attribute(name, temp)
def write_text(self, text):
"""
Outputs the parsed character text for an element.
The text is checked for special characters.
Args:
text: String representing the element text.
"""
self.write_to_xmlfile(self.check_for_entities(text))
def write_to_xmlfile(self, buf):
"""
Writes the buffer to the XML file.
Args:
buf: String containg data to write to XML file.
"""
self.xmlfile.write(buf)
self.dbg(buf)
def write_xml_declaration(self):
"""
Writes the XML Declarations at the start of the XML file.
"""
self.dbg("\n")
xml_declaration = "<?xml version=\"1.0\" standalone=\"yes\"?>"
xml_declaration += "\n<?program_dtd version=\"1\"?>\n"
self.write_to_xmlfile(xml_declaration)
class XmlImporter(IdaXml):
"""
XmlImporter class contains methods to import an XML PROGRAM
document into IDA.
"""
def __init__(self, as_plugin, arg=0):
"""
Initializes the XmlImporter attributes
Args:
as_plugin:
debug:
"""
IdaXml.__init__(self, arg)
self.plugin = as_plugin
self.timers = dict()
self.addr_mode = 1
self.create = True
self.dataseg = None
self.deferred = []
self.callbacks = {
'start' : {
BOOKMARKS : self.update_import,
CODE : self.update_import,
COMMENTS : self.update_import,
COMPILER : self.import_compiler,
DATA : self.update_import,
DATATYPES : self.update_import,
EQUATES : self.update_import,
FUNCTIONS : self.update_import,
INFO_SOURCE : self.import_info_source,
MARKUP : self.update_import,
MEMORY_MAP : self.import_memory_map,
PROCESSOR : self.import_processor,
PROGRAM : self.import_program,
PROGRAM_ENTRY_POINTS: self.update_import,
REGISTER_VALUES : self.update_import,
SYMBOL_TABLE : self.update_import },
'end' : {
BOOKMARK : self.import_bookmark,
CODE_BLOCK : self.import_codeblock,
COMMENT : self.import_comment,
DEFINED_DATA : self.import_defined_data,
DESCRIPTION : self.import_description,
ENUM : self.import_enum,
EQUATE_GROUP : self.import_equate_group,
EQUATE_REFERENCE : self.import_equate_reference,
FUNCTION : self.import_function,
FUNCTION_DEF : self.import_function_def,
MANUAL_INSTRUCTION : self.import_manual_instruction,
MANUAL_OPERAND : self.import_manual_operand,
MEMORY_REFERENCE : self.import_memory_reference,
MEMORY_SECTION : self.import_memory_section,
PROGRAM_ENTRY_POINT : self.import_program_entry_point,
REGISTER_VALUE_RANGE: self.import_register_value_range,
STACK_REFERENCE : self.import_stack_reference,
STRUCTURE : self.import_structure,
SYMBOL : self.import_symbol,
TYPE_DEF : self.import_typedef,
UNION : self.import_union,
# end element for elapse time
BOOKMARKS : self.display_timer,
CODE : self.display_timer,
COMMENTS : self.display_timer,
DATA : self.display_timer,
DATATYPES : self.process_deferred,
EQUATES : self.display_timer,
FUNCTIONS : self.display_timer,
MARKUP : self.display_timer,
MEMORY_MAP : self.display_timer,
PROGRAM : self.display_total_time,
PROGRAM_ENTRY_POINTS: self.display_timer,
REGISTER_VALUES : self.display_timer,
SYMBOL_TABLE : self.display_timer }
}
def import_xml(self):
"""
Imports the XML PROGRAM file into the database.
"""
global event, element
self.display_version('Importer' if self.plugin else 'Loader')
displayMenu = not self.autorun
self.get_options(displayMenu)
if self.plugin:
self.filename=ida_kernwin.ask_file(0, "*.xml",
"Enter name of xml file:")
else:
self.filename = idc.get_input_file_path()
if self.filename is None or len(self.filename) == 0:
return
idc.msg('\nImporting from: ' + self.filename + '\n')
if not self.plugin:
ida_kernwin.hide_wait_box()
ida_kernwin.show_wait_box("Importing XML PROGRAM document....")
n = 0
for event,element in cElementTree.iterparse(self.filename,
events=("start","end")):
if ida_kernwin.user_cancelled():
raise Cancelled
if self.debug and event == 'start':
msg = ''
if element.tag is not None:
msg += str(element.tag) + ' '
if element.attrib is not None:
msg += str(element.attrib) + ' '
if element.text is not None:
msg += str(element.text)
if len(msg) > 0:
idc.msg('\n' + msg)
if event in self.callbacks:
if element.tag in self.callbacks[event]:
if event == 'start':
self.timers[element.tag] = time.clock()
self.callbacks[event][element.tag](element)
if event == 'end':
element.clear()
if event == 'end':
n += 1
end = time.clock()
ida_kernwin.hide_wait_box()
self.display_summary('Import' if self.plugin else "Load")
idc.msg('\nXML Elements parsed: ' + str(n) + '\n\n')
return 1
def get_options(self, display):
"""
Displays the options menu and retrieves the option settings.
"""
fmt = "HELP\n"
fmt += "XML PROGRAM loader/importer plugin (Python)\n"
fmt += "IDA SDK: "+ str(IDA_SDK_VERSION) + "\n\n"
fmt += "The XML PROGRAM loader loads elements from a "
fmt += "XML <PROGRAM> document to create an idb database.\n\n"
fmt += "ENDHELP\n"
fmt += "Import from XML PROGRAM document...."
fmt += "\n <##Options##Code Blocks:{CodeBlocks}>"
fmt += "\n <Entry Points:{EntryPoints}>"
fmt += "\n <Segment Register Value Ranges:{RegisterValues}>"
fmt += "\n <Data Types:{DataTypes}>"
fmt += "\n <Data Definitions:{DataDefinitions}>"
fmt += "\n <Symbols:{Symbols}>"
fmt += "\n <Comments:{Comments}>"
fmt += "\n <Bookmarks:{Bookmarks}>"
fmt += "\n <Functions:{Functions}>"
fmt += "\n <Memory References:{MemoryReferences}>"
fmt += "\n <Equate/Enum References:{EquateReferences}>"
fmt += "\n <Manual Instructions/Operands:{Manual}>{cGroup1}>"
fmt += "\n\n"
Opts = { 'cGroup1': ida_kernwin.Form.ChkGroupControl ((
"CodeBlocks",
"EntryPoints",
"RegisterValues",
"DataTypes",
"DataDefinitions",
"Symbols",
"Comments",
"Bookmarks",
"Functions",
"MemoryReferences",
"EquateReferences",
"Manual"
))}
self.options = ida_kernwin.Form(fmt, Opts)
self.options.Compile()
self.options.CodeBlocks.checked = True
self.options.EntryPoints.checked = True
self.options.RegisterValues.checked = True
self.options.DataTypes.checked = True
self.options.DataDefinitions.checked = True
self.options.Symbols.checked = True
self.options.Functions.checked = True
self.options.Comments.checked = True
self.options.Bookmarks.checked = True
self.options.MemoryReferences.checked = True
self.options.EquateReferences.checked = True
self.options.Manual.checked = True
if display:
ok = self.options.Execute()
if (ok == 0):
raise Cancelled
def display_timer(self, element):
"""
Displays the elapsed processing time for XML elements.
Args:
element: XML element object value containing the element tag.
"""
if element.tag == MEMORY_MAP and self.plugin:
return
if element.tag in self.timers:
idc.msg('elapsed time: %.4f' %
(time.clock()-self.timers[element.tag]))
def display_total_time(self, element):
"""
Displays the total processing time.
Args:
element: XML element object value (not used).
"""
TOTAL = 'Total '
idc.msg('\n%35selapsed time: %.4f' %
(TOTAL,time.clock()-self.timers[PROGRAM]))
def get_address(self, element, attr):
"""
Returns the address value for an element.
Args:
element: XML element object.
attr: String containing the address attribute name.
Returns:
Numeric value representing the address.
"""
addrstr = element.get(attr)
if '::' in addrstr:
# overlayed addresses not currently handled
return BADADDR
elif ':' in addrstr:
[segstr, offset_str] = str.split(addrstr,':')
offset = int(offset_str,16)
if self.is_int(segstr):
sgmt = int(segstr,16)
addr = (sgmt << 4) + offset
else:
# multiple address spaces not currently implemented
addr = BADADDR
return addr
else:
return int(element.get(attr), 16)
def get_attribute(self, element, attr):
"""
Returns the attribute value string.
Args:
element: XML element object.
attr: String containing the attribute name.
Returns:
String representing the attribute value.
"""
return element.get(attr)
def get_attribute_value(self, element, attr):
"""
Returns the numeric attribute value.
Args:
element: XML element object.
attr: String containing the attribute name.
Returns:
Numeric value representing the attribute value.
"""
val = element.get(attr)
try:
if val.upper().startswith('0X') or val.upper().startswith('-0X'):
return int(val, 16)
return int(val)
except Exception:
idc.msg('\nUnable to decode string as value: ' + val)
return 0
def get_cbsize(self):
"""
Returns the size of the addressable codebyte for the processor.
Returns:
Integer representing the number of 8-bit bytes in an
addressable codebyte.
"""
return (ida_idp.ph_get_cnbits()+7)//8
def get_datatype_flags(self, datatype: str, size):
"""
Returns the flags bitmask for the datatype.
Args:
datatype: String representing the datatype.
size: Integer representing the datatype size.
Returns:
Integer representing the bitmask.
"""
if datatype.lower().startswith("byte"): return ida_bytes.byte_flag()
if datatype.lower().startswith("word"): return ida_bytes.word_flag()
if datatype.lower().startswith("dword"): return ida_bytes.dword_flag()
if datatype.lower().startswith("qword"): return ida_bytes.qword_flag()
if datatype.lower().startswith("oword"): return ida_bytes.oword_flag()
if datatype.lower().startswith("tbyte"): return ida_bytes.tbyte_flag()
if datatype.lower().startswith("float"): return ida_bytes.float_flag()
if datatype.lower().startswith("double"): return ida_bytes.double_flag()
if datatype.lower().startswith("packed"): return ida_bytes.packreal_flag()
if self.is_string_type(datatype): return ida_bytes.strlit_flag()
if self.is_enumeration(datatype): return ida_bytes.enum_flag()
if self.is_structure(datatype): return ida_bytes.stru_flag()
#if size == 4: return ida_bytes.dword_flag()
return 0
def get_string_type(self, datatype: str) -> int:
if datatype.lower() == 'mbcstring':
return ida_nalt.STRTYPE_C_16
if datatype.lower().find('unicode') != -1:
if datatype.lower().find('pascal') != -1:
return ida_nalt.STRTYPE_LEN2_16
return ida_nalt.STRTYPE_C_16
if datatype.lower().find('pascal') != -1:
return ida_nalt.STRTYPE_C_16
return ida_nalt.STRTYPE_TERMCHR
def has_attribute(self, element, attr):
"""
Returns true if the XML element contains the named attribute.
Args:
element: XML element object
attr: String containing name of the attribute
Returns:
True if the element contains the named attribute, otherwise False.
"""
return attr in element.attrib
def is_enumeration(self, datatype: str) -> bool:
"""
Returns true if datatype is an existing enumeration in the database.
Args:
datatype: String representing the datatype.
Returns:
True if the datatype is an enumeration in the database,
otherwise False.
"""
return idc.get_enum(datatype) != BADNODE
def is_int(self, s) -> bool:
try:
int(s, 16)
return True
except Exception:
return False
def is_pointer_type(self, dtype) -> bool:
"""
Returns true if the datatype represents a pointer.
Args:
dtype: String representing the datatype.
Returns:
True if the datatype represents a pointer, otherwise False.
"""
return dtype.lower().startswith("pointer") or dtype.endswith('*')
def is_string_type(self, datatype) -> bool:
"""
Returns true if the datatype represents a string type.
Args:
datatype: String representing the datatype.
Returns:
True if the datatype represents a string, otherwise False.
"""
return datatype.lower().startswith("unicode") or datatype.lower().startswith("string")
def is_structure(self, datatype) -> bool:
"""
Returns true if the datatype represents a structure in the database.
Args:
dtype: String representing the datatype.
Returns:
True if the datatype represents an existing structure,
otherwise False.
"""
return idc.get_struc_id(datatype) != BADNODE
def import_address_range(self, address_range):
"""
Processes ADDRESS_RANGE element.
Args:
address_range: XML element object containing start and end address
attributes for the address range.
Returns:
Tuple containing two integers, the start and end address values.
"""
start = self.get_address(address_range,START)
end = self.get_address(address_range, END)
self.update_counter(ADDRESS_RANGE)
return (start, end)
def import_bit_mask(self, bitmask, eid):
"""
Processes a BIT_MASK element as an enum bitmask member.
Args:
bitmask: XML element object representing the IDA enum bitmask.
eid: Integer representing the IDA enum id
"""
name = self.get_attribute(bitmask,NAME)
value = self.get_attribute_value(bitmask,VALUE)
idc.set_bmask_name(eid, value, name)
cid = idc.get_enum_member_by_name(name)
self.update_counter(BIT_MASK)
regcmt = bitmask.find(REGULAR_CMT)
if regcmt is not None:
idc.set_enum_member_cmt(cid, regcmt.text, False)
self.update_counter(BIT_MASK + ':' + REGULAR_CMT)
rptcmt = bitmask.find(REPEATABLE_CMT)
if rptcmt is not None:
idc.set_enum_member_cmt(cid, rptcmt.text, True)
self.update_counter(BIT_MASK + ':' + REPEATABLE_CMT)
def import_bookmark(self, bookmark):
"""
Processes a BOOKMARK element.
Args:
bookmark: XML element object containing bookmark data.
"""
if not self.options.Bookmarks.checked:
return
try:
addr = self.get_address(bookmark, ADDRESS)
if self.has_attribute(bookmark, TYPE):
typ = self.get_attribute(bookmark, TYPE)
category = ''
if self.has_attribute(bookmark, CATEGORY):
category = self.get_attribute(bookmark, CATEGORY)
description = ''
if self.has_attribute(bookmark, DESCRIPTION):
description = self.get_attribute(bookmark, DESCRIPTION)
if not idc.is_mapped(addr):
msg = ("import_bookmark: address %X not enabled in database"
% addr)
print(msg)
return
self.update_counter(BOOKMARK)
for slot in range(ida_moves.MAX_MARK_SLOT):
ea = idc.get_bookmark(slot)
if ea == BADADDR:
idc.put_bookmark(addr, 0, 0, 0, slot, description)
break
except Exception as e:
msg = "** Exception occurred in import_bookmark **"
print(f"\n{msg}\n{type(e).__name__}: {e}")
def import_cmts(self, element, sid, typ):
"""
Processes REGULAR_CMT and REPEATABLE_CMT elements for structures.
Args:
element: XML element object containing a REGULAR_CMT or
REPEATABLE_CMT element
sid: Integer representing the structure id
typ: String indicating structure type (STRUCTURE or UNION)
"""
regcmt = element.find(REGULAR_CMT)
if regcmt is not None:
idc.set_struc_cmt(sid, regcmt.text, False)
self.update_counter(typ + ':' + REGULAR_CMT)
rptcmt = element.find(REPEATABLE_CMT)
if rptcmt is not None:
idc.set_struc_cmt(sid, rptcmt.text, True)
self.update_counter(typ + ':' + REPEATABLE_CMT)
def import_codeblock(self, code_block):
"""
Processes a CODE_BLOCK element by disassembling the address range.
Args:
code_block: XML element containing codeblock start and end
addresses.
"""
if not self.options.CodeBlocks.checked:
return
start = self.get_address(code_block, START)
end = self.get_address(code_block, END)
ida_bytes.del_items(start, 3, end-start+1)
addr = start
while (addr <= end):
length = ida_ua.create_insn(addr)
addr += ida_bytes.get_item_size(addr) * self.get_cbsize()
self.update_counter(CODE_BLOCK)
def import_comment(self, comment):
"""
Processes a COMMENT element by creating the comment at the address.
Args:
comment: XML element containing the comment address, type,
and text.
"""
if not self.options.Comments.checked:
return
addr = self.get_address(comment, ADDRESS)
ctype = self.get_attribute(comment,TYPE)
text = comment.text
if ctype == 'pre':
ida_lines.add_extra_cmt(addr, True, text)
elif ctype == 'end-of-line':
idc.set_cmt(addr, text, False)
elif ctype == 'repeatable':
idc.set_cmt(addr, text, True)
elif ctype == 'post':
ida_lines.add_extra_cmt(addr, False, text)
self.update_counter(COMMENT+':' + ctype)
def import_compiler(self, compiler):
"""
Processes the COMPILER element containing the compiler name.
Args:
compiler: XML element containing the compiler name.
"""
name = self.get_attribute(compiler, NAME)
self.update_counter(COMPILER)
if self.plugin:
return
comp = idc.COMP_UNK
if name == "Visual C++": comp = ida_typeinf.COMP_MS
elif name == "Borland C++": comp = ida_typeinf.COMP_BC
elif name == "Watcom C++": comp = ida_typeinf.COMP_WATCOM
elif name == "GNU C++": comp = ida_typeinf.COMP_GNU
elif name == "Visual Age C++": comp = ida_typeinf.COMP_VISAGE
elif name == "Delphi": comp = ida_typeinf.COMP_BP
ida_typeinf.set_compiler_id(comp)
def import_defined_data(self, defined_data):
"""
Processes a DEFINED_DATA element by creating a data item at the
specified address.
Args:
defined_data: XML element containing the address and
datatype information for the data item
"""
if not self.options.DataDefinitions.checked:
return
addr = self.get_address(defined_data, ADDRESS)
datatype = self.get_attribute(defined_data, DATATYPE)
size = self.get_attribute_value(defined_data, SIZE)
self.update_counter(DEFINED_DATA)
ti = ida_nalt.opinfo_t()
if self.is_pointer_type(datatype):
#idaapi.set_refinfo(ti, 0, 0, 0, REF_OFF32)
flag = ida_bytes.dword_flag() | idc.FF_0OFF
#idaapi.set_typeinfo(addr, 0, flag, ti)
else:
flag = self.get_datatype_flags(datatype, size)
if flag == ida_bytes.strlit_flag():
ida_bytes.create_strlit(addr, size, self.get_string_type(datatype))
elif flag == ida_bytes.stru_flag():
idc.create_struct(addr, size, datatype)
else:
idc.create_data(addr, flag, size, BADNODE)
typecmt = defined_data.find(TYPEINFO_CMT)
if typecmt is not None:
self.update_counter(DEFINED_DATA + ':' + TYPEINFO_CMT)
def import_description(self, description):
"""
Processes the DESCRIPTION element.
Args:
description: DESCRIPTION XML element.
"""
self.update_counter(DESCRIPTION)
# TODO: import_description: decide what to do with DESCRIPTION
# print(description.text)
def import_enum(self, enum):
"""
Processes an ENUM element by creating the enumeration.
Args:
enum: XML element containing the enumeration name and
member data.
"""
if not self.options.DataTypes.checked:
return
name = self.get_attribute(enum, NAME)
if self.has_attribute(enum,NAMESPACE):
namespace = self.get_attribute(enum, NAMESPACE)
if self.has_attribute(enum,SIZE):
size = self.get_attribute_value(enum, SIZE)
eid = idc.add_enum(BADNODE, name,
ida_bytes.hex_flag() | ida_bytes.dword_flag())
self.update_counter(ENUM)
regcmt = enum.find(REGULAR_CMT)
if regcmt is not None:
idc.set_enum_cmt(eid, regcmt.text, False)
self.update_counter(ENUM + ':' + REGULAR_CMT)
rptcmt = enum.find(REPEATABLE_CMT)
if rptcmt is not None:
idc.set_enum_cmt(eid, rptcmt.text, True)
self.update_counter(ENUM + ':' + REPEATABLE_CMT)
display_settings = enum.find(DISPLAY_SETTINGS)
if display_settings is not None:
self.update_counter(ENUM + ':' + DISPLAY_SETTINGS)
enum_entries = enum.findall(ENUM_ENTRY)
for enum_entry in enum_entries:
self.import_enum_entry(enum_entry, eid)
def import_enum_entry(self, enum_entry, eid: int):
"""
Processes an ENUM_ENTRY by creating a member in the enumeration.
Args:
enum_entry: XML element containing the member name and value.
eid: Integer representing the id of the enumeration.
"""
name = self.get_attribute(enum_entry, NAME)
value = self.get_attribute_value(enum_entry, VALUE)
idc.add_enum_member(eid, name, value)
cid = idc.get_enum_member_by_name(name)
self.update_counter(ENUM_ENTRY)
regcmt = enum_entry.find(REGULAR_CMT)
if regcmt is not None:
idc.set_enum_member_cmt(cid, regcmt.text, False)
self.update_counter(ENUM_ENTRY + ':' + REGULAR_CMT)
rptcmt = enum_entry.find(REPEATABLE_CMT)
if rptcmt is not None:
idc.set_enum_member_cmt(cid, rptcmt.text, True)
self.update_counter(ENUM_ENTRY + ':' + REPEATABLE_CMT)
def import_equate(self, equate, eid):
"""
Processes EQUATE element as member of an enumeration.
Args:
enum_entry: XML element containing the equate name and value.
eid: Integer representing the id for the enumeration.
"""
name = self.get_attribute(equate,NAME)
value = self.get_attribute_value(equate,VALUE)
bm = -1
if self.has_attribute(equate, BIT_MASK):
bm = self.get_attribute_value(equate, BIT_MASK)
idc.add_enum_member(eid, name, value, bm)
cid = idc.get_enum_member_by_name(name)
self.update_counter(EQUATE)
regcmt = equate.find(REGULAR_CMT)
if regcmt is not None:
idc.set_enum_member_cmt(cid, regcmt.text, False)
self.update_counter(EQUATE + ':' + REGULAR_CMT)
rptcmt = equate.find(REPEATABLE_CMT)
if rptcmt is not None:
idc.set_enum_member_cmt(cid, rptcmt.text, True)
self.update_counter(EQUATE + ':' + REPEATABLE_CMT)
def import_equate_group(self, equate_group):
"""
Processes EQUATE_GROUP as IDA enumeration type.
Args:
equate_group: XML element containing the group name and
equate definitions.
"""
if not self.options.DataTypes.checked:
return
msg = EQUATE_GROUP
name = ''
if self.has_attribute(equate_group, NAME):
name = self.get_attribute(equate_group, NAME)
bf = ''
if self.has_attribute(equate_group, BIT_FIELD):
bf = self.get_attribute(equate_group, BIT_FIELD)
eid = idc.add_enum(BADADDR, name, ida_bytes.hex_flag())
idc.set_enum_bf(eid, (bf == 'yes'))
self.update_counter(EQUATE_GROUP)
regcmt = equate_group.find(REGULAR_CMT)
if regcmt is not None:
idc.set_enum_cmt(eid, regcmt.text, False)
self.update_counter(EQUATE_GROUP + ':' + REGULAR_CMT)
rptcmt = equate_group.find(REPEATABLE_CMT)
if rptcmt is not None:
idc.set_enum_cmt(eid, rptcmt.text, True)
self.update_counter(EQUATE_GROUP + ':' + REPEATABLE_CMT)
equates = equate_group.findall(EQUATE)
for equate in equates:
self.import_equate(equate,eid)
bit_masks = equate_group.findall(BIT_MASK)
for bit_mask in bit_masks:
self.import_bit_mask(bit_mask, eid)
def import_equate_reference(self, equate_reference):
if (not self.options.DataTypes.checked or
not self.options.EquateReferences.checked):
return
self.update_counter(EQUATE_REFERENCE)
addr = self.get_address(equate_reference, ADDRESS)
name = ''
if self.has_attribute(equate_reference, NAME):
name = self.get_attribute(equate_reference, NAME)
if name == '':
return
opnd = 0
if self.has_attribute(equate_reference, OPERAND_INDEX):
opnd = self.get_attribute_value(equate_reference, OPERAND_INDEX)
value = None
if self.has_attribute(equate_reference, VALUE):
value = self.get_attribute_value(equate_reference, VALUE)
cid = idc.get_enum_member_by_name(name)
if cid == BADNODE:
return
eid = idc.get_enum_member_enum(cid)
if eid == BADNODE:
return
idc.op_enum(addr, opnd, eid, 0)
def import_function(self, function):
"""
Creates a function using the FUNCTION attributes.
Args:
function: XML element containing the function address and
attributes.
"""
if not self.options.Functions.checked:
return
try:
entry_point = self.get_address(function, ENTRY_POINT)
name = ''
if self.has_attribute(function, NAME):
name = self.get_attribute(function, NAME)
libfunc = 'n'
if self.has_attribute(function, LIBRARY_FUNCTION):
libfunc = self.get_attribute(function, LIBRARY_FUNCTION)
if not idc.is_mapped(entry_point):
msg = ("import_function: address %X not enabled in database"
% entry_point)
print(msg)
return
idc.add_func(entry_point, BADADDR)
self.update_counter(FUNCTION)
func = ida_funcs.get_func(entry_point)
if libfunc == 'y':
func.flags |= idc.FUNC_LIB
ranges = function.findall(ADDRESS_RANGE)
for addr_range in ranges:
(start, end) = self.import_address_range(addr_range)
ida_funcs.append_func_tail(func, start, end)
# TODO: auto_wait is probably not needed...
if AUTO_WAIT:
ida_auto.auto_wait()
regcmt = function.find(REGULAR_CMT)
if regcmt is not None:
self.update_counter(FUNCTION + ':' + REGULAR_CMT)
ida_funcs.set_func_cmt(func, regcmt.text, False)
rptcmt = function.find(REPEATABLE_CMT)
if rptcmt is not None:
self.update_counter(FUNCTION + ':' + REPEATABLE_CMT)
ida_funcs.set_func_cmt(func, rptcmt.text, True)
typecmt = function.find(TYPEINFO_CMT)
if typecmt is not None:
self.update_counter(FUNCTION + ':' + TYPEINFO_CMT)
# TODO: TYPECMTs
#idc.SetType(entry_point, typecmt.text + ';')
sf = function.find(STACK_FRAME)
if sf is not None:
self.import_stack_frame(sf, func)
register_vars = function.findall(REGISTER_VAR)
for register_var in register_vars:
self.import_register_var(register_var, func)
except Exception as e:
msg = "** Exception occurred in import_function **"
print(f"\n{msg}\n{type(e).__name__}: {e}")
def import_function_def(self, function_def):
# import_function_def: NOT IMPLEMENTED
if not self.options.DataTypes.checked:
return
self.update_counter(FUNCTION_DEF)
def import_info_source(self, info_source):
"""
Processes INFO_SOURCE containing information about the
source of the XML PROGRAM file.
Args:
info_source: XML element containing attributes that identify
the source of the PROGRAM data.
"""
if self.has_attribute(info_source, TOOL):
tool = self.get_attribute(info_source, TOOL)
if self.has_attribute(info_source, USER):
user = self.get_attribute(info_source, USER)
if self.has_attribute(info_source, FILE):
f = self.get_attribute(info_source, FILE)
if self.has_attribute(info_source, TIMESTAMP):
ts = self.get_attribute(info_source, TIMESTAMP)
self.update_counter(INFO_SOURCE)
def import_manual_instruction(self, manual_instruction):
"""
Creates a manual instruction.
Args:
manual_instruction: XML element containing MANUAL_INSTRUCTION.
"""
if not self.options.Manual.checked:
return
addr = self.get_address(manual_instruction, ADDRESS)
idc.set_manual_insn(addr, manual_instruction.text)
self.update_counter(MANUAL_INSTRUCTION)
def import_manual_operand(self, manual_operand):
"""
Creates a manual operand at an address.
Args:
manual_operand: MANUAL_OPERAND XML element.
"""
if not self.options.Manual.checked:
return
addr = self.get_address(manual_operand, ADDRESS)
op = self.get_attribute_value(manual_operand, OPERAND_INDEX)
if idc.is_mapped(addr):
ida_bytes.set_forced_operand(addr, op, manual_operand.text)
self.update_counter(MANUAL_OPERAND)
def process_deferred(self, element):
"""
Processes the list of deferred structure members when the
DATATYPES end element is encountered.
Args:
element: XML end element for DATATYPES
"""
for (member, sptr) in self.deferred:
self.import_member(member, sptr, False)
self.display_timer(element)
def import_member(self, member, sptr, defer=True):
"""
Creates a member for a structure.
Args:
member: MEMBER XML element.
sptr:
defer: boolean indicating if processing a member should be
deferred when the type is unknown. A member should
only be deferred on the first pass, not when processing
the deferred list.
"""
offset = self.get_attribute_value(member, OFFSET)
datatype = self.get_attribute(member, DATATYPE)
if self.has_attribute(member, DATATYPE_NAMESPACE):
dt_namespace = self.get_attribute(member, DATATYPE_NAMESPACE)
name = ''
if self.has_attribute(member, NAME):
name = self.get_attribute(member, NAME)
size = 0
if self.has_attribute(member, SIZE):
size = self.get_attribute_value(member, SIZE)
ti = ida_nalt.opinfo_t()
if self.is_pointer_type(datatype):
flag = ida_bytes.dword_flag() | idc.FF_0OFF
r = ida_nalt.refinfo_t()
r.init(ida_nalt.get_reftype_by_size(4) | ida_nalt.REFINFO_NOBASE)
ti.ri = r
else:
flag = self.get_datatype_flags(datatype, size)
if flag == 0 and defer:
self.deferred.append((member, sptr))
return
if flag == ida_bytes.enum_flag():
t = idc.get_enum(datatype)
ti.ec.tid = t
ti.ec.serial = find_enum_member_serial(t, member.value, member.name)
if flag == ida_bytes.stru_flag():
t = idc.get_struc_id(datatype)
ti.tid = t
error = idc.add_struc_member(sptr, name, offset, flag, ti, size)
mbr = get_member(sptr, offset)
self.import_member_cmts(member, mbr)
self.update_counter(MEMBER)
def import_member_cmts(self, member, mbr: ida_typeinf.udm_t):
"""
Processes REGULAR_CMT and REPEATABLE_CMT elements for members.
Args:
element: XML element object containing a REGULAR_CMT or
REPEATABLE_CMT element
mbr: the member id
"""
regcmt = member.find(REGULAR_CMT)
if regcmt is not None:
idc.set_member_cmt(mbr.type.get_tid(), mbr.offset, regcmt.text, False)
self.update_counter(MEMBER + ':' + REGULAR_CMT)
rptcmt = member.find(REPEATABLE_CMT)
if rptcmt is not None:
idc.set_member_cmt(mbr.type.get_tid(), mbr.offset, rptcmt.text, True)
self.update_counter(MEMBER + ':' + REPEATABLE_CMT)
def import_members(self, element, sptr):
"""
Add data members to a structure.
Args:
element: STRUCTURE XML element containing MEMBER sub-elements.
sptr:
"""
members = element.findall(MEMBER)
for member in members:
self.import_member(member, sptr)
def import_memory_contents(self, memory_contents, start, size):
"""
Processes MEMORY_CONTENTS to load data for a memory block.
Args:
memory_contents: MEMORY_CONTENTS XML element.
"""
if memory_contents.get(START_ADDR) is None:
saddr = start
else:
saddr = self.get_address(memory_contents, START_ADDR)
fname = self.get_attribute(memory_contents, FILE_NAME)
offset = self.get_attribute_value(memory_contents, FILE_OFFSET)
if memory_contents.get(LENGTH) is None:
length = size
else:
length = self.get_attribute_value(memory_contents, LENGTH)
#(binfilename, ext) = os.path.splitext(self.filename)
#binfilename += ".bytes"
(binfilename, fileext) = os.path.split(self.filename)
binfilename += "/" + fname
binfile = ida_idaapi.loader_input_t()
binfile.open(binfilename)
binfile.file2base(offset,saddr,saddr+length,False)
binfile.close()
self.update_counter(MEMORY_CONTENTS)
def import_memory_map(self, memory_map):
"""
Processes the MEMORY_MAP element.
Args:
memory_map: MEMORY_MAP XML element.
MEMORY_MAP is only processed by the IDA loader. It is ignored when
run as an IDA plugin.
"""
# import memory sections only when run as loader
if self.plugin:
return
self.update_import(memory_map)
def import_memory_reference(self, memory_reference):
"""
Processes the MEMORY_REFERENCE element.
Currently nothing is done with MEMORY_REFERENCEs.
Args:
memory_reference: MEMORY_REFERENCE XML element.
"""
if not self.options.MemoryReferences.checked:
return
# initialize implied attributes
user = None
op = None
primary = None
base_addr = None
addr = self.get_address(memory_reference, ADDRESS)
if self.has_attribute(memory_reference, OPERAND_INDEX):
op = self.get_attribute_value(memory_reference, OPERAND_INDEX)
if self.has_attribute(memory_reference, USER_DEFINED):
user = self.get_attribute(memory_reference, USER_DEFINED)
to_addr = self.get_address(memory_reference, TO_ADDRESS)
if self.has_attribute(memory_reference, BASE_ADDRESS):
base_addr = self.get_address(memory_reference, BASE_ADDRESS)
if self.has_attribute(memory_reference, PRIMARY):
primary = self.get_attribute(memory_reference, PRIMARY)
self.update_counter(MEMORY_REFERENCE)
# TODO: import_memory_reference: store refs? maybe only user-defined?
'''
if user == 'y':
#print("%08X %08X" % (addr, to_addr), op, primary)
pass
'''
def import_memory_section(self, memory_section):
"""
Creates a memory segment in the database.
Args:
memory_section: MEMORY_SECTION XML element.
MEMORY_SECTION is only processed by the IDA loader. It is ignored
when run as an IDA plugin.
"""
# TODO: import_memory_section - handle overlays?
# import memory sections only when run as loader
if self.plugin:
return
name = self.get_attribute(memory_section, NAME)
length = self.get_attribute_value(memory_section, LENGTH)
s = ida_segment.segment_t()
addrstr = self.get_attribute(memory_section, START_ADDR)
seg_str = ''
if '::' in addrstr:
# overlay - skip for now
print(' ** Overlayed memory block %s skipped ** ' % name)
msg = 'Overlayed memory block %s skipped!' % name
msg += "\n\nXML Import does not currently support"
msg += "\noverlayed memory blocks."
idc.warning(msg)
return
elif ':' in addrstr:
[seg_str, offset_str] = str.split(addrstr,':')
offset = int(offset_str, 16)
if self.is_int(seg_str):
base = int(seg_str, 16)
sel = ida_segment.setup_selector(base)
start = self.get_address(memory_section, START_ADDR)
else:
raise MultipleAddressSpacesNotSupported
return
else:
sel = ida_segment.allocate_selector(0)
start = self.get_address(memory_section, START_ADDR)
s.sel = sel
s.start_ea = start
s.end_ea = start+length
s.bitness = self.addr_mode
perms = ''
if self.has_attribute(memory_section, PERMISSIONS):
perms = self.get_attribute(memory_section, PERMISSIONS)
s.perm = 0
if 'r' in perms: s.perm |= ida_segment.SEGPERM_READ
if 'w' in perms: s.perm |= ida_segment.SEGPERM_WRITE
if 'x' in perms: s.perm |= ida_segment.SEGPERM_EXEC
ok = ida_segment.add_segm_ex(s, name, "",
idc.ADDSEG_OR_DIE | idc.ADDSEG_QUIET)
self.update_counter(MEMORY_SECTION)
for memory_contents in memory_section.findall(MEMORY_CONTENTS):
self.import_memory_contents(memory_contents, start, length)
def import_processor(self, processor):
"""
Processes the PROCESSOR element.
Args:
processor: PROCESSOR XML element.
"""
name = self.get_attribute(processor, NAME)
self.update_counter(PROCESSOR)
if self.plugin:
return
address_model = self.get_attribute(processor, ADDRESS_MODEL)
if address_model is not None:
if str.lower(address_model) == '16-bit':
self.addr_mode = 0
idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_PC_FLAT, 0)
idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_64BIT, 0)
elif str.lower(address_model) == '32-bit':
self.addr_mode = 1
idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_PC_FLAT, 1)
idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_64BIT, 0)
elif str.lower(address_model) == '64-bit':
self.addr_mode = 2
idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_PC_FLAT, 1)
idc.set_flag(idc.INF_LFLAGS, ida_ida.LFLG_64BIT, 1)
def import_program(self, program):
"""
Processes the PROGRAM element.
Args:
program: PROGRAM XML element.
"""
self.update_status(PROGRAM)
self.update_counter(PROGRAM)
if self.plugin:
return
name = self.get_attribute(program, NAME)
if self.has_attribute(program, EXE_PATH):
epath = self.get_attribute(program, EXE_PATH)
idc.set_root_filename(epath)
else:
idc.set_root_filename(name)
if self.has_attribute(program, EXE_FORMAT):
eformat = self.get_attribute(program, EXE_FORMAT)
RootNode = ida_netnode.netnode('Root Node')
RootNode.supset(ida_nalt.RIDX_FILE_FORMAT_NAME, eformat)
if self.has_attribute(program, IMAGE_BASE):
base = self.get_attribute_value(program, IMAGE_BASE)
ida_nalt.set_imagebase(base)
if self.has_attribute(program, INPUT_MD5):
input_md5 = self.get_attribute(program, INPUT_MD5)
# store original md5 in a special netnode
md5 = ida_netnode.netnode(INPUT_MD5, len(INPUT_MD5), True)
md5.supset(ida_nalt.RIDX_MD5, input_md5)
def import_program_entry_point(self, program_entry_point):
"""
Defines a program entry point.
Args:
program_entry_point: PROGRAM_ENTRY_POINT XML element.
Contains the entry point address.
"""
if not self.options.EntryPoints.checked:
return
addr = self.get_address(program_entry_point, ADDRESS)
idc.add_entry(addr, addr, "", True)
self.update_counter(PROGRAM_ENTRY_POINT)
def import_register_value_range(self, register_value_range):
"""
Defines the address range for a register value.
Args:
register_value_range: REGISTER_VALUE_RANGE XML element.
Contains the register, value, start address and range length.
"""
if not self.options.RegisterValues.checked:
return
self.update_counter(REGISTER_VALUE_RANGE)
reg = self.get_attribute(register_value_range, REGISTER)
if reg == 'cs': return
value = self.get_attribute_value(register_value_range, VALUE)
addr = self.get_address(register_value_range, START_ADDRESS)
length = self.get_attribute_value(register_value_range, LENGTH)
r = ida_idp.str2reg(reg)
if r >= ida_idp.ph_get_reg_first_sreg() and r <= ida_idp.ph_get_reg_last_sreg():
ida_segregs.split_sreg_range(addr, r, value, idc.SR_user, True)
def import_register_var(self, register_var, func):
"""
Defines a register variable for a function.
Args:
register_var: REGISTER_VAR XML element.
Contains register, variable name, and datatype.
func: IDA function object
"""
name = self.get_attribute(register_var, NAME)
reg = self.get_attribute(register_var, REGISTER)
if self.has_attribute(register_var, DATATYPE):
datatype = self.get_attribute(register_var, DATATYPE)
if self.has_attribute(register_var, DATATYPE_NAMESPACE):
namespace = self.get_attribute(register_var, DATATYPE_NAMESPACE)
idc.define_local_var(func.startEA, func.endEA, reg, name)
self.update_counter(REGISTER_VAR)
def import_stack_frame(self, stack_frame, func):
"""
Defines a stack frame for a function.
Args:
stack_frame: STACK_FRAME element with STACK_VAR child elements.
"""
if self.has_attribute(stack_frame, LOCAL_VAR_SIZE):
lvsize = self.get_attribute_value(stack_frame, LOCAL_VAR_SIZE)
if self.has_attribute(stack_frame, PARAM_OFFSET):
param_offset = self.get_attribute_value(stack_frame, PARAM_OFFSET)
if self.has_attribute(stack_frame, REGISTER_SAVE_SIZE):
reg_save_size = self.get_attribute_value(stack_frame,
REGISTER_SAVE_SIZE)
if self.has_attribute(stack_frame, RETURN_ADDR_SIZE):
retaddr_size = self.get_attribute_value(stack_frame,
RETURN_ADDR_SIZE)
if self.has_attribute(stack_frame, BYTES_PURGED):
bytes_purged = self.get_attribute_value(stack_frame, BYTES_PURGED)
self.update_counter(STACK_FRAME)
for stack_var in stack_frame.findall(STACK_VAR):
self.import_stack_var(stack_var, func)
def import_stack_reference(self, stack_reference):
# import_stack_reference: NOT IMPLEMENTED
self.update_counter(STACK_REFERENCE)
pass
def import_stack_var(self, stack_var, func):
"""
Processes STACK_VAR element.
Args:
stack_var: STACK_VAR XML element.
Stack variables are created by IDA's function analysis.
Only the STACK_VAR NAME attribute is used to set the name for
a stack variable at the specified stack/frame offset.
"""
spoffset = self.get_attribute_value(stack_var, STACK_PTR_OFFSET)
datatype = self.get_attribute(stack_var, DATATYPE)
offset = spoffset + func.frsize + func.frregs
if self.has_attribute(stack_var, FRAME_PTR_OFFSET):
fpoffset = self.get_attribute_value(stack_var, FRAME_PTR_OFFSET)
offset = fpoffset + func.frsize
name = ''
if self.has_attribute(stack_var, NAME):
name = self.get_attribute(stack_var, NAME)
if self.has_attribute(stack_var, DATATYPE_NAMESPACE):
namespace = self.get_attribute(stack_var, DATATYPE_NAMESPACE)
if self.has_attribute(stack_var, SIZE):
size = self.get_attribute_value(stack_var, SIZE)
self.update_counter(STACK_VAR)
sf = get_frame(func)
if sf is not None and name != '':
idc.set_member_name(sf.get_tid(), offset, name)
def import_structure(self, structure):
"""
Adds a structure.
Args:
structure: STRUCTURE XML element.
Contains the STRUCTURE attributes and child elements.
"""
if not self.options.DataTypes.checked:
return
name = self.get_attribute(structure, NAME)
dtyp = idc.get_struc_id(name)
if dtyp != BADNODE:
# duplicate name, try adding name space
if not self.has_attribute(structure, NAMESPACE):
return
namespace = self.get_attribute(structure, NAMESPACE)
name = namespace + '__' + name
name.replace('/','_')
name.replace('.','_')
dtyp = idc.get_struc_id(name)
# skip if still duplicate (could add sequence #)
if dtyp != BADNODE:
return
size = 0
if self.has_attribute(structure, SIZE):
size = self.get_attribute_value(structure, SIZE)
if self.has_attribute(structure, VARIABLE_LENGTH):
vl = self.get_attribute_value(structure, VARIABLE_LENGTH)
isVariableLength = vl == 'y'
sid = idc.add_struc(-1, name, 0)
sptr = get_struc(sid)
self.update_counter(STRUCTURE)
self.import_cmts(structure, sid, STRUCTURE)
self.import_members(structure, sptr)
if (t := idc.get_struc_size(sid)) is not None and t < size:
t = ida_nalt.opinfo_t()
idc.add_struc_member(sid,"",size-1,ida_bytes.byte_flag(),t,1)
def import_symbol(self, symbol):
"""
Adds a symbol name at the specified address.
Args:
symbol: SYMBOL XML element.
Contains symbol name and address. Optionally includes
type and mangled symbol.
"""
if not self.options.Symbols.checked:
return
addr = self.get_address(symbol, ADDRESS)
name = self.get_attribute(symbol, NAME)
if self.has_attribute(symbol, MANGLED):
name = self.get_attribute(symbol, MANGLED)
flag = idc.SN_NOWARN
if self.has_attribute(symbol, TYPE):
typ = self.get_attribute(symbol, TYPE)
if typ == 'local': flag |= idc.SN_LOCAL
idc.set_name(addr, name, flag)
self.update_counter(SYMBOL)
def import_typedef(self, type_def):
# import_typedef: NOT IMPLEMENTED
if not self.options.DataTypes.checked:
return
self.update_counter(TYPE_DEF)
def import_union(self, union):
"""
Adds a union datatype.
Args:
union: UNION XML element.
Contains UNION attributes and child elements.
"""
if not self.options.DataTypes.checked:
return
name = self.get_attribute(union, NAME)
dtyp = idc.get_struc_id(name)
if dtyp != BADNODE:
# duplicate name, try adding name space
if not self.has_attribute(union, NAMESPACE):
return
namespace = self.get_attribute(union, NAMESPACE)
name = namespace + '__' + name
name.replace('/','_')
name.replace('.','_')
dtyp = idc.get_struc_id(name)
# skip if still duplicate (could add sequence #)
if dtyp != BADNODE:
return
size = 0
if self.has_attribute(union, SIZE):
size = self.get_attribute_value(union, SIZE)
sid = idc.add_struc(BADADDR, name, True)
sptr = get_struc(sid)
self.update_counter(UNION)
self.import_cmts(union, sid, UNION)
self.import_members(union, sptr)
if (t := idc.get_struc_size(sid)) is not None and t < size:
t = ida_nalt.opinfo_t()
idc.add_struc_member(sid,"", size-1, ida_bytes.byte_flag(), t, 1)
def update_import(self, element):
"""
Update the element counter and processing status.
Args:
element: XML element
This function is used to process certain high-level elements
(such as COMMENTS, CODE_BLOCKS, SYMBOL_TABLE, FUNCTIONS, etc.)
that are used to group sub-elements.
"""
self.update_counter(element.tag)
self.update_status(element.tag)
# Global constants
# mangled name inhibit flags are not currently exposed in python api
# inhibit flags for symbol names
# DEMANGLE_FORM (MNG_SHORT_FORM | MNG_NOBASEDT | MNG_NOCALLC | MNG_NOCSVOL)
DEMANGLED_FORM = 0x0ea3ffe7
# inhibit flags for typeinfo cmts
# DEMANGLED_TYPEINFO (MNG_LONG_FORM)
DEMANGLED_TYPEINFO = 0x06400007
# Global XML string constants for elements and attributes
ADDRESS = 'ADDRESS'
ADDRESS_MODEL = 'ADDRESS_MODEL'
ADDRESS_RANGE = 'ADDRESS_RANGE'
BASE_ADDRESS = 'BASE_ADDRESS'
BIT_FIELD = 'BIT_FIELD'
BIT_MAPPED = 'BIT_MAPPED'
BIT_MASK = 'BIT_MASK'
BOOKMARK = 'BOOKMARK'
BOOKMARKS = 'BOOKMARKS'
BYTES = 'BYTES'
BYTES_PURGED = 'BYTES_PURGED'
CATEGORY = 'CATEGORY'
CODE = 'CODE'
CODE_BLOCK = 'CODE_BLOCK'
COMMENT = 'COMMENT'
COMMENTS = 'COMMENTS'
COMPILER = 'COMPILER'
DATA = 'DATA'
DATATYPE = 'DATATYPE'
DATATYPES = 'DATATYPES'
DATATYPE_NAMESPACE = 'DATATYPE_NAMESPACE'
DEFINED_DATA = 'DEFINED_DATA'
DESCRIPTION = 'DESCRIPTION'
DISPLAY_SETTINGS = 'DISPLAY_SETTINGS'
END = 'END'
ENDIAN = 'ENDIAN'
ENTRY_POINT = 'ENTRY_POINT'
ENUM = 'ENUM'
ENUM_ENTRY = 'ENUM_ENTRY'
EQUATE = 'EQUATE'
EQUATES = 'EQUATES'
EQUATE_GROUP = 'EQUATE_GROUP'
EQUATE_REFERENCE = 'EQUATE_REFERENCE'
EXE_FORMAT = 'EXE_FORMAT'
EXE_PATH = 'EXE_PATH'
EXT_LIBRARY = 'EXT_LIBRARY'
EXT_LIBRARY_REFERENCE = 'EXT_LIBRARY_REFERENCE'
EXT_LIBRARY_TABLE = 'EXT_LIBRARY_TABLE'
FAMILY = 'FAMILY'
FILE = 'FILE'
FILE_NAME = 'FILE_NAME'
FILE_OFFSET = 'FILE_OFFSET'
FOLDER = 'FOLDER'
FORMAT = 'FORMAT'
FRAGMENT = 'FRAGMENT'
FRAME_PTR_OFFSET = 'FRAME_PTR_OFFSET'
FUNCTION = 'FUNCTION'
FUNCTIONS = 'FUNCTIONS'
FUNCTION_DEF = 'FUNCTION_DEF'
IMAGE_BASE = 'IMAGE_BASE'
INPUT_MD5 = 'INPUT_MD5'
INFO_SOURCE = 'INFO_SOURCE'
LANGUAGE_PROVIDER = 'LANGUAGE_PROVIDER'
LENGTH = 'LENGTH'
LIB_ADDR = 'LIB_ADDR'
LIB_LABEL = 'LIB_LABEL'
LIB_ORDINAL = 'LIB_ORDINAL'
LIB_PROG_NAME = 'LIB_PROG_NAME'
LIBRARY_FUNCTION = 'LIBRARY_FUNCTION'
LOCAL_VAR_SIZE = 'LOCAL_VAR_SIZE'
MANGLED = 'MANGLED'
MANUAL_INSTRUCTION = 'MANUAL_INSTRUCTION'
MANUAL_OPERAND = 'MANUAL_OPERAND'
MARKUP = 'MARKUP'
MEMBER = 'MEMBER'
MEMORY_CONTENTS = 'MEMORY_CONTENTS'
MEMORY_MAP = 'MEMORY_MAP'
MEMORY_REFERENCE = 'MEMORY_REFERENCE'
MEMORY_SECTION = 'MEMORY_SECTION'
NAME = 'NAME'
NAMESPACE = 'NAMESPACE'
OFFSET = 'OFFSET'
OPERAND_INDEX = 'OPERAND_INDEX'
PARAM_OFFSET = 'PARAM_OFFSET'
PATH = 'PATH'
PERMISSIONS = 'PERMISSIONS'
PRIMARY = 'PRIMARY'
PROCESSOR = 'PROCESSOR'
PROGRAM = 'PROGRAM'
PROGRAM_ENTRY_POINT = 'PROGRAM_ENTRY_POINT'
PROGRAM_ENTRY_POINTS = 'PROGRAM_ENTRY_POINTS'
PROGRAM_TREES = 'PROGRAM_TREES'
PROPERTIES = 'PROPERTIES'
PROPERTY = 'PROPERTY'
REGISTER = 'REGISTER'
REGISTER_SAVE_SIZE = 'REGISTER_SAVE_SIZE'
REGISTER_VALUES = 'REGISTER_VALUES'
REGISTER_VALUE_RANGE = 'REGISTER_VALUE_RANGE'
REGISTER_VAR = 'REGISTER_VAR'
REGULAR_CMT = 'REGULAR_CMT'
RELOCATION = 'RELOCATION'
RELOCATION_TABLE = 'RELOCATION_TABLE'
REPEATABLE_CMT = 'REPEATABLE_CMT'
RETURN_ADDR_SIZE = 'RETURN_ADDR_SIZE'
RETURN_TYPE = 'RETURN_TYPE'
SHOW_TERMINATOR = 'SHOW_TERMINATOR'
SIGNED = 'SIGNED'
SIZE = 'SIZE'
SOURCE_ADDRESS = 'SOURCE_ADDRESS'
SOURCE_TYPE = 'SOURCE_TYPE'
STACK_FRAME = 'STACK_FRAME'
STACK_PTR_OFFSET = 'STACK_PTR_OFFSET'
STACK_REFERENCE = 'STACK_REFERENCE'
STACK_VAR = 'STACK_VAR'
START = 'START'
START_ADDR = 'START_ADDR'
START_ADDRESS = 'START_ADDRESS'
STRUCTURE = 'STRUCTURE'
SYMBOL = 'SYMBOL'
SYMBOL_TABLE = 'SYMBOL_TABLE'
TIMESTAMP = 'TIMESTAMP'
TOOL = 'TOOL'
TO_ADDRESS = 'TO_ADDRESS'
TREE = 'TREE'
TYPE = 'TYPE'
TYPEINFO_CMT = 'TYPEINFO_CMT'
TYPE_DEF = 'TYPE_DEF'
UNION = 'UNION'
USER = 'USER'
USER_DEFINED = 'USER_DEFINED'
VALUE = 'VALUE'
VARIABLE_LENGTH = 'VARIABLE_LENGTH'
ZERO_PAD = 'ZERO_PAD'