ghidra/Ghidra/Features/Decompiler/src/decompile/cpp/space.hh
caheckman 612c0d6f3e name to address space map
shortcut to address space map
more adjustments to shortcuts
allow null AddrSpace pointer in raw baselist
holes in the space indices
almost working
GT-2873 decompiler, other, and overlays
GT-2873 added OTHER space to java sleigh compiler, fixed decompiler
exception
isOtherSpace method
isOtherSpace java, addressing code review comments
GT-2873 added null check in decompiler reset
GT-2873 code review changes
Read and write space_other tag in SLA files
Version number for .sla file
GT-2873 fixups after merge
GT-2873 renamed Sparc registers: OTHER->OTHERWIN, WINWSTATE->WSTATE
GT-2873 added option in AddressInput to control OTHER space visibility
GT-2873 OTHER space now global
GT-2873 fixing comments refering to decompiler code in BasicCompilerSpec
2019-08-22 12:30:18 -04:00

493 lines
23 KiB
C++

/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file space.hh
/// \brief Classes for describing address spaces
#ifndef __CPUI_SPACE__
#define __CPUI_SPACE__
#include "error.hh"
#include "xml.hh"
/// \brief Fundemental address space types
///
/// Every address space must be one of the following core types
enum spacetype {
IPTR_CONSTANT = 0, ///< Special space to represent constants
IPTR_PROCESSOR = 1, ///< Normal spaces modelled by processor
IPTR_SPACEBASE = 2, ///< addresses = offsets off of base register
IPTR_INTERNAL = 3, ///< Internally managed temporary space
IPTR_FSPEC = 4, ///< Special internal FuncCallSpecs reference
IPTR_IOP = 5, ///< Special internal PcodeOp reference
IPTR_JOIN = 6 ///< Special virtual space to represent split variables
};
class AddrSpace;
class AddrSpaceManager;
class VarnodeData;
class Translate;
/// \brief A region where processor data is stored
///
/// An AddrSpace (Address Space) is an arbitrary sequence of
/// bytes where a processor can store data. As is usual with
/// most processors' concept of RAM, an integer offset
/// paired with an AddrSpace forms the address (See Address)
/// of a byte. The \e size of an AddrSpace indicates the number
/// of bytes that can be separately addressed and is usually
/// described by the number of bytes needed to encode the biggest
/// offset. I.e. a \e 4-byte address space means that there are
/// offsets ranging from 0x00000000 to 0xffffffff within the space
/// for a total of 2^32 addressable bytes within the space.
/// There can be multiple address spaces, and it is typical to have spaces
/// - \b ram Modelling the main processor address bus
/// - \b register Modelling a processors registers
///
/// The processor specification can set up any address spaces it
/// needs in an arbitrary manner, but \e all data manipulated by
/// the processor, which the specification hopes to model, must
/// be contained in some address space, including RAM, ROM,
/// general registers, special registers, i/o ports, etc.
///
/// The analysis engine also uses additional address spaces to
/// model special concepts. These include
/// - \b const There is a \e constant address space for
/// modelling constant values in pcode expressions
/// (See ConstantSpace)
/// - \b unique There is always a \e unique address space used
/// as a pool for temporary registers. (See UniqueSpace)
///
class AddrSpace {
friend class AddrSpaceManager; // Space container
public:
enum {
big_endian = 1, ///< Space is big endian if set, little endian otherwise
heritaged = 2, ///< This space is heritaged
does_deadcode = 4, ///< Dead-code analysis is done on this space
programspecific = 8, ///< Space is specific to a particular loadimage
reverse_justification = 16, ///< Justification within aligned word is opposite of endianness
overlay = 32, ///< This space is an overlay of another space
overlaybase = 64, ///< This is the base space for overlay space(s)
truncated = 128, ///< Space is truncated from its original size, expect pointers larger than this size
hasphysical = 256, ///< Has physical memory associated with it
is_otherspace = 512 ///< Quick check for the OtherSpace derived class
};
enum {
constant_space_index = 0, ///< Reserved index for the constant space
other_space_index = 1 ///< Reserved index for the other space
};
private:
spacetype type; ///< Type of space (PROCESSOR, CONSTANT, INTERNAL, ...)
AddrSpaceManager *manage; ///< Manager for processor using this space
const Translate *trans; ///< Processor translator (for register names etc) for this space
int4 refcount; ///< Number of managers using this space
uint4 flags; ///< Attributes of the space
uintb highest; ///< Highest (byte) offset into this space
char shortcut; ///< Shortcut character for printing
protected:
string name; ///< Name of this space
uint4 addressSize; ///< Size of an address into this space in bytes
uint4 wordsize; ///< Size of unit being addressed (1=byte)
int4 index; ///< An integer identifier for the space
int4 delay; ///< Delay in heritaging this space
int4 deadcodedelay; ///< Delay before deadcode removal is allowed on this space
void calcScaleMask(void); ///< Calculate scale and mask
void setFlags(uint4 fl); ///< Set a cached attribute
void clearFlags(uint4 fl); ///< Clear a cached attribute
void saveBasicAttributes(ostream &s) const; ///< Write the XML attributes of this space
void truncateSpace(uint4 newsize);
public:
AddrSpace(AddrSpaceManager *m,const Translate *t,spacetype tp,const string &nm,uint4 size,uint4 ws,int4 ind,uint4 fl,int4 dl);
AddrSpace(AddrSpaceManager *m,const Translate *t,spacetype tp); ///< For use with restoreXml
virtual ~AddrSpace(void) {} ///< The address space destructor
const string &getName(void) const; ///< Get the name
AddrSpaceManager *getManager(void) const; ///< Get the space manager
const Translate *getTrans(void) const; ///< Get the processor translator
spacetype getType(void) const; ///< Get the type of space
int4 getDelay(void) const; ///< Get number of heritage passes being delayed
int4 getDeadcodeDelay(void) const; ///< Get number of passes before deadcode removal is allowed
int4 getIndex(void) const; ///< Get the integer identifier
uint4 getWordSize(void) const; ///< Get the addressable unit size
uint4 getAddrSize(void) const; ///< Get the size of the space
uintb getHighest(void) const; ///< Get the highest byte-scaled address
uintb wrapOffset(uintb off) const; ///< Wrap -off- to the offset that fits into this space
char getShortcut(void) const; ///< Get the shortcut character
bool isHeritaged(void) const; ///< Return \b true if dataflow has been traced
bool doesDeadcode(void) const; ///< Return \b true if dead code analysis should be done on this space
bool hasPhysical(void) const; ///< Return \b true if data is physically stored in this
bool isBigEndian(void) const; ///< Return \b true if values in this space are big endian
bool isReverseJustified(void) const; ///< Return \b true if alignment justification does not match endianness
bool isOverlay(void) const; ///< Return \b true if this is an overlay space
bool isOverlayBase(void) const; ///< Return \b true if other spaces overlay this space
bool isOtherSpace(void) const; ///< Return \b true if \b this is the \e other address space
bool isTruncated(void) const; ///< Return \b true if this space is truncated from its original size
void printOffset(ostream &s,uintb offset) const; ///< Write an address offset to a stream
virtual int4 numSpacebase(void) const; ///< Number of base registers associated with this space
virtual const VarnodeData &getSpacebase(int4 i) const; ///< Get a base register that creates this virtual space
virtual const VarnodeData &getSpacebaseFull(int4 i) const; ///< Return original spacebase register before truncation
virtual bool stackGrowsNegative(void) const; ///< Return \b true if a stack in this space grows negative
virtual AddrSpace *getContain(void) const; ///< Return this space's containing space (if any)
virtual void saveXmlAttributes(ostream &s,uintb offset) const; ///< Save an address as XML
virtual void saveXmlAttributes(ostream &s,uintb offset,int4 size) const; ///< Save an address and size as XML
virtual uintb restoreXmlAttributes(const Element *el,uint4 &size) const; ///< Recover an offset and size
virtual void printRaw(ostream &s,uintb offset) const; ///< Write an address in this space to a stream
virtual uintb read(const string &s,int4 &size) const; ///< Read in an address (and possible size) from a string
virtual void saveXml(ostream &s) const; ///< Write the details of this space as XML
virtual void restoreXml(const Element *el); ///< Recover the details of this space from XML
static uintb addressToByte(uintb val,uint4 ws); ///< Scale from addressable units to byte units
static uintb byteToAddress(uintb val,uint4 ws); ///< Scale from byte units to addressable units
static int4 addressToByteInt(int4 val,uint4 ws); ///< Scale int4 from addressable units to byte units
static int4 byteToAddressInt(int4 val,uint4 ws); ///< Scale int4 from byte units to addressable units
};
/// \brief Special AddrSpace for representing constants during analysis.
///
/// The underlying RTL (See PcodeOp) represents all data in terms of
/// an Address, which is made up of an AddrSpace and offset pair.
/// In order to represent constants in the semantics of the RTL,
/// there is a special \e constant address space. An \e offset
/// within the address space encodes the actual constant represented
/// by the pair. I.e. the pair (\b const,4) represents the constant
/// \b 4 within the RTL. The \e size of the ConstantSpace has
/// no meaning, as we always want to be able to represent an arbitrarily
/// large constant. In practice, the size of a constant is limited
/// by the offset field of an Address.
class ConstantSpace : public AddrSpace {
public:
ConstantSpace(AddrSpaceManager *m,const Translate *t,const string &nm,int4 ind); ///< Only constructor
virtual void printRaw(ostream &s,uintb offset) const;
virtual void saveXml(ostream &s) const;
virtual void restoreXml(const Element *el);
};
/// \brief Special AddrSpace for special/user-defined address spaces
class OtherSpace : public AddrSpace {
public:
OtherSpace(AddrSpaceManager *m, const Translate *t, const string &nm, int4 ind); ///< Constructor
OtherSpace(AddrSpaceManager *m, const Translate *t); ///< For use with restoreXml
virtual void printRaw(ostream &s, uintb offset) const;
virtual void saveXml(ostream &s) const;
};
/// \brief The pool of temporary storage registers
///
/// It is convenient both for modelling processor instructions
/// in an RTL and for later transforming of the RTL to have a pool
/// of temporary registers that can hold data but that aren't a
/// formal part of the state of the processor. The UniqueSpace
/// provides a specific location for this pool. The analysis
/// engine always creates exactly one of these spaces named
/// \b unique.
class UniqueSpace : public AddrSpace {
public:
UniqueSpace(AddrSpaceManager *m,const Translate *t,const string &nm,int4 ind,uint4 fl); ///< Constructor
UniqueSpace(AddrSpaceManager *m,const Translate *t); ///< For use with restoreXml
virtual void saveXml(ostream &s) const;
};
/// \brief The pool of logically joined variables
///
/// Some logical variables are split across non-contiguous regions of memory. This space
/// creates a virtual place for these logical variables to exist. Any memory location within this
/// space is backed by 2 or more memory locations in other spaces that physically hold the pieces
/// of the logical value. The database controlling symbols is responsible for keeping track of
/// mapping the logical address in this space to its physical pieces. Offsets into this space do not
/// have an absolute meaning, the database may vary what offset is assigned to what set of pieces.
class JoinSpace : public AddrSpace {
public:
JoinSpace(AddrSpaceManager *m,const Translate *t,const string &nm,int4 ind);
virtual void saveXmlAttributes(ostream &s,uintb offset) const;
virtual void saveXmlAttributes(ostream &s,uintb offset,int4 size) const;
virtual uintb restoreXmlAttributes(const Element *el,uint4 &size) const;
virtual void printRaw(ostream &s,uintb offset) const;
virtual uintb read(const string &s,int4 &size) const;
virtual void saveXml(ostream &s) const;
virtual void restoreXml(const Element *el);
};
/// \brief An overlay space.
///
/// A different code and data layout that occupies the same memory as another address space.
/// Some compilers use this concept to increase the logical size of a program without increasing
/// its physical memory requirements. An overlay space allows the same physical location to contain
/// different code and be labeled with different symbols, depending on context.
/// From the point of view of reverse engineering, the different code and symbols are viewed
/// as a logically distinct space.
class OverlaySpace : public AddrSpace {
AddrSpace *baseSpace; ///< Space being overlayed
public:
OverlaySpace(AddrSpaceManager *m,const Translate *t); ///< Constructor
AddrSpace *getBaseSpace(void) const; ///< Get the address space being overlayed
virtual void saveXml(ostream &s) const;
virtual void restoreXml(const Element *el);
};
/// An internal method for derived classes to set space attributes
/// \param fl is the set of attributes to be set
inline void AddrSpace::setFlags(uint4 fl) {
flags |= fl;
}
/// An internal method for derived classes to clear space attibutes
/// \param fl is the set of attributes to clear
inline void AddrSpace::clearFlags(uint4 fl) {
flags &= ~fl;
}
/// Every address space has a (unique) name, which is referred
/// to especially in configuration files via XML.
/// \return the name of this space
inline const string &AddrSpace::getName(void) const {
return name;
}
/// Every address space is associated with a manager of (all possible) spaces.
/// This method recovers the address space manager object.
/// \return a pointer to the address space manager
inline AddrSpaceManager *AddrSpace::getManager(void) const {
return manage;
}
/// Every address space is associated with a processor which may have additional objects
/// like registers etc. associated with it. This method returns a pointer to that processor
/// translator
/// \return a pointer to the Translate object
inline const Translate *AddrSpace::getTrans(void) const {
return trans;
}
///
/// Return the defining type for this address space.
/// - IPTR_CONSTANT for the constant space
/// - IPTR_PROCESSOR for a normal space
/// - IPTR_INTERNAL for the temporary register space
/// - IPTR_FSPEC for special FuncCallSpecs references
/// - IPTR_IOP for special PcodeOp references
/// \return the basic type of this space
inline spacetype AddrSpace::getType(void) const {
return type;
}
/// If the heritage algorithms need to trace dataflow
/// within this space, the algorithms can delay tracing this
/// space in order to let indirect references into the space
/// resolve themselves. This method indicates the number of
/// rounds of dataflow analysis that should be skipped for this
/// space to let this resolution happen
/// \return the number of rounds to skip heritage
inline int4 AddrSpace::getDelay(void) const {
return delay;
}
/// The point at which deadcode removal is performed on varnodes within
/// a space can be set to skip some number of heritage passes, in case
/// not all the varnodes are created within a single pass. This method
/// gives the number of rounds that should be skipped before deadcode
/// elimination begins
/// \return the number of rounds to skip deadcode removal
inline int4 AddrSpace::getDeadcodeDelay(void) const {
return deadcodedelay;
}
/// Each address space has an associated index that can be used
/// as an integer encoding of the space.
/// \return the unique index
inline int4 AddrSpace::getIndex(void) const {
return index;
}
/// This method indicates the number of bytes contained in an
/// \e addressable \e unit of this space. This is almost always
/// 1, but can be any other small integer.
/// \return the number of bytes in a unit
inline uint4 AddrSpace::getWordSize(void) const {
return wordsize;
}
/// Return the number of bytes needed to represent an offset
/// into this space. A space with 2^32 bytes has an address
/// size of 4, for instance.
/// \return the size of an address
inline uint4 AddrSpace::getAddrSize(void) const {
return addressSize;
}
/// Get the highest (byte) offset possible for this space
/// \return the offset
inline uintb AddrSpace::getHighest(void) const {
return highest;
}
/// Calculate \e off modulo the size of this address space in
/// order to construct the offset "equivalent" to \e off that
/// fits properly into this space
/// \param off is the offset requested
/// \return the wrapped offset
inline uintb AddrSpace::wrapOffset(uintb off) const {
if (off <= highest) // Comparison is unsigned
return off;
intb mod = (intb)(highest+1);
intb res = (intb)off % mod; // remainder is signed
if (res<0) // Remainder may be negative
res += mod; // Adding mod guarantees res is in (0,mod)
return (uintb)res;
}
/// Return a unique short cut character that is associated
/// with this space. The shortcut character can be used by
/// the read method to quickly specify the space of an address.
/// \return the shortcut character
inline char AddrSpace::getShortcut(void) const {
return shortcut;
}
/// During analysis, memory locations in most spaces need to
/// have their data-flow traced. This method returns \b true
/// for these spaces. For some of the special spaces, like
/// the \e constant space, tracing data flow makes no sense,
/// and this routine will return \b false.
/// \return \b true if this space's data-flow is analyzed
inline bool AddrSpace::isHeritaged(void) const {
return ((flags & heritaged)!=0);
}
/// Most memory locations should have dead-code analysis performed,
/// and this routine will return \b true.
/// For certain special spaces like the \e constant space, dead-code
/// analysis doesn't make sense, and this routine returns \b false.
inline bool AddrSpace::doesDeadcode(void) const {
return ((flags & does_deadcode)!=0);
}
/// This routine returns \b true, if, like most spaces, the space
/// has actual read/writeable bytes associated with it.
/// Some spaces, like the \e constant space, do not.
/// \return \b true if the space has physical data in it.
inline bool AddrSpace::hasPhysical(void) const {
return ((flags & hasphysical) !=0);
}
/// If integer values stored in this space are encoded in this
/// space using the big endian format, then return \b true.
/// \return \b true if the space is big endian
inline bool AddrSpace::isBigEndian(void) const {
return ((flags&big_endian)!=0);
}
/// Certain architectures or compilers specify an alignment for accessing words within the space
/// The space required for a variable must be rounded up to the alignment. For variables smaller
/// than the alignment, there is the issue of how the variable is "justified" within the aligned
/// word. Usually the justification depends on the endianness of the space, for certain weird
/// cases the justification may be the opposite of the endianness.
inline bool AddrSpace::isReverseJustified(void) const {
return ((flags&reverse_justification)!=0);
}
inline bool AddrSpace::isOverlay(void) const {
return ((flags&overlay)!=0);
}
inline bool AddrSpace::isOverlayBase(void) const {
return ((flags&overlaybase)!=0);
}
inline bool AddrSpace::isOtherSpace(void) const {
return ((flags&is_otherspace)!=0);
}
/// If this method returns \b true, the logical form of this space is truncated from its actual size
/// Pointers may refer to this original size put the most significant bytes are ignored
inline bool AddrSpace::isTruncated(void) const {
return ((flags&truncated)!=0);
}
/// Some spaces are "virtual", like the stack spaces, where addresses are really relative to a
/// base pointer stored in a register, like the stackpointer. This routine will return non-zero
/// if \b this space is virtual and there is 1 (or more) associated pointer registers
/// \return the number of base registers associated with this space
inline int4 AddrSpace::numSpacebase(void) const {
return 0;
}
/// For virtual spaces, like the stack space, this routine returns the location information for
/// a base register of the space. This routine will throw an exception if the register does not exist
/// \param i is the index of the base register starting at
/// \return the VarnodeData that describes the register
inline const VarnodeData &AddrSpace::getSpacebase(int4 i) const {
throw LowlevelError(name+" space is not virtual and has no associated base register");
}
/// If a stack pointer is truncated to fit the stack space, we may need to know the
/// extent of the original register
/// \param i is the index of the base register
/// \return the original register before truncation
inline const VarnodeData &AddrSpace::getSpacebaseFull(int4 i) const {
throw LowlevelError(name+" has no truncated registers");
}
/// For stack (or other spacebase) spaces, this routine returns \b true if the space can viewed as a stack
/// and a \b push operation causes the spacebase pointer to be decreased (grow negative)
/// \return \b true if stacks grow in negative direction.
inline bool AddrSpace::stackGrowsNegative(void) const {
return true;
}
/// If this space is virtual, then
/// this routine returns the containing address space, otherwise
/// it returns NULL.
/// \return a pointer to the containing space or NULL
inline AddrSpace *AddrSpace::getContain(void) const {
return (AddrSpace *)0;
}
/// Given an offset into an address space based on the addressable unit size (wordsize),
/// convert it into a byte relative offset
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline uintb AddrSpace::addressToByte(uintb val,uint4 ws) {
return val*ws;
}
/// Given an offset in an address space based on bytes, convert it
/// into an offset relative to the addressable unit of the space (wordsize)
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline uintb AddrSpace::byteToAddress(uintb val,uint4 ws) {
return val/ws;
}
/// Given an int4 offset into an address space based on the addressable unit size (wordsize),
/// convert it into a byte relative offset
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline int4 AddrSpace::addressToByteInt(int4 val,uint4 ws) {
return val*ws;
}
/// Given an int4 offset in an address space based on bytes, convert it
/// into an offset relative to the addressable unit of the space (wordsize)
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline int4 AddrSpace::byteToAddressInt(int4 val,uint4 ws) {
return val/ws;
}
#endif