Candidate release of source code.

This commit is contained in:
Dan 2019-03-26 13:45:32 -04:00
parent db81e6b3b0
commit 79d8f164f8
12449 changed files with 2800756 additions and 16 deletions

View file

@ -0,0 +1,477 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file space.hh
/// \brief Classes for describing address spaces
#ifndef __CPUI_SPACE__
#define __CPUI_SPACE__
#include "error.hh"
#include "xml.hh"
/// \brief Fundemental address space types
///
/// Every address space must be one of the following core types
enum spacetype {
IPTR_CONSTANT = 0, ///< Special space to represent constants
IPTR_PROCESSOR = 1, ///< Normal spaces modelled by processor
IPTR_SPACEBASE = 2, ///< addresses = offsets off of base register
IPTR_INTERNAL = 3, ///< Internally managed temporary space
IPTR_FSPEC = 4, ///< Special internal FuncCallSpecs reference
IPTR_IOP = 5, ///< Special internal PcodeOp reference
IPTR_JOIN = 6 ///< Special virtual space to represent split variables
};
class AddrSpace;
class AddrSpaceManager;
class VarnodeData;
class Translate;
/// \brief A region where processor data is stored
///
/// An AddrSpace (Address Space) is an arbitrary sequence of
/// bytes where a processor can store data. As is usual with
/// most processors' concept of RAM, an integer offset
/// paired with an AddrSpace forms the address (See Address)
/// of a byte. The \e size of an AddrSpace indicates the number
/// of bytes that can be separately addressed and is usually
/// described by the number of bytes needed to encode the biggest
/// offset. I.e. a \e 4-byte address space means that there are
/// offsets ranging from 0x00000000 to 0xffffffff within the space
/// for a total of 2^32 addressable bytes within the space.
/// There can be multiple address spaces, and it is typical to have spaces
/// - \b ram Modelling the main processor address bus
/// - \b register Modelling a processors registers
///
/// The processor specification can set up any address spaces it
/// needs in an arbitrary manner, but \e all data manipulated by
/// the processor, which the specification hopes to model, must
/// be contained in some address space, including RAM, ROM,
/// general registers, special registers, i/o ports, etc.
///
/// The analysis engine also uses additional address spaces to
/// model special concepts. These include
/// - \b const There is a \e constant address space for
/// modelling constant values in pcode expressions
/// (See ConstantSpace)
/// - \b unique There is always a \e unique address space used
/// as a pool for temporary registers. (See UniqueSpace)
///
class AddrSpace {
friend class AddrSpaceManager; // Space container
public:
enum {
big_endian = 1, ///< Space is big endian if set, little endian otherwise
heritaged = 2, ///< This space is heritaged
does_deadcode = 4, ///< Dead-code analysis is done on this space
programspecific = 8, ///< Space is specific to a particular loadimage
reverse_justification = 16, ///< Justification within aligned word is opposite of endianness
overlay = 32, ///< This space is an overlay of another space
overlaybase = 64, ///< This is the base space for overlay space(s)
truncated = 128, ///< Space is truncated from its original size, expect pointers larger than this size
hasphysical = 256 ///< Has physical memory associated with it
};
private:
spacetype type; ///< Type of space (PROCESSOR, CONSTANT, INTERNAL, ...)
AddrSpaceManager *manage; ///< Manager for processor using this space
const Translate *trans; ///< Processor translator (for register names etc) for this space
int4 refcount; ///< Number of managers using this space
uint4 flags; ///< Attributes of the space
uintb highest; ///< Highest (byte) offset into this space
char shortcut; ///< Shortcut character for printing
protected:
string name; ///< Name of this space
uint4 addressSize; ///< Size of an address into this space in bytes
uint4 wordsize; ///< Size of unit being addressed (1=byte)
int4 index; ///< An integer identifier for the space
int4 delay; ///< Delay in heritaging this space
int4 deadcodedelay; ///< Delay before deadcode removal is allowed on this space
void calcScaleMask(void); ///< Calculate scale and mask
void assignShortcut(void); ///< Assign a shortcut character to the space
void setFlags(uint4 fl); ///< Set a cached attribute
void clearFlags(uint4 fl); ///< Clear a cached attribute
void saveBasicAttributes(ostream &s) const; ///< Write the XML attributes of this space
void truncateSpace(uint4 newsize);
public:
AddrSpace(AddrSpaceManager *m,const Translate *t,spacetype tp,const string &nm,uint4 size,uint4 ws,int4 ind,uint4 fl,int4 dl);
AddrSpace(AddrSpaceManager *m,const Translate *t,spacetype tp); ///< For use with restoreXml
virtual ~AddrSpace(void) {} ///< The address space destructor
const string &getName(void) const; ///< Get the name
AddrSpaceManager *getManager(void) const; ///< Get the space manager
const Translate *getTrans(void) const; ///< Get the processor translator
spacetype getType(void) const; ///< Get the type of space
int4 getDelay(void) const; ///< Get number of heritage passes being delayed
int4 getDeadcodeDelay(void) const; ///< Get number of passes before deadcode removal is allowed
int4 getIndex(void) const; ///< Get the integer identifier
uint4 getWordSize(void) const; ///< Get the addressable unit size
uint4 getAddrSize(void) const; ///< Get the size of the space
uintb getHighest(void) const; ///< Get the highest byte-scaled address
uintb wrapOffset(uintb off) const; ///< Wrap -off- to the offset that fits into this space
char getShortcut(void) const; ///< Get the shortcut character
bool contain(AddrSpace *id2) const; ///< Determine if this space contains another
bool isHeritaged(void) const; ///< Return \b true if dataflow has been traced
bool doesDeadcode(void) const; ///< Return \b true if dead code analysis should be done on this space
bool hasPhysical(void) const; ///< Return \b true if data is physically stored in this
bool isBigEndian(void) const; ///< Return \b true if values in this space are big endian
bool isReverseJustified(void) const; ///< Return \b true if alignment justification does not match endianness
bool isOverlay(void) const; ///< Return \b true if this is an overlay space
bool isOverlayBase(void) const; ///< Return \b true if other spaces overlay this space
bool isTruncated(void) const; ///< Return \b true if this space is truncated from its original size
uintm data2Uintm(const uint1 *ptr,int4 size) const; ///< Convert a sequence of bytes into an integer value
void printOffset(ostream &s,uintb offset) const; ///< Write an address offset to a stream
virtual int4 numSpacebase(void) const; ///< Number of base registers associated with this space
virtual const VarnodeData &getSpacebase(int4 i) const; ///< Get a base register that creates this virtual space
virtual const VarnodeData &getSpacebaseFull(int4 i) const; ///< Return original spacebase register before truncation
virtual bool stackGrowsNegative(void) const; ///< Return \b true if a stack in this space grows negative
virtual AddrSpace *getContain(void) const; ///< Return this space's containing space (if any)
virtual void saveXmlAttributes(ostream &s,uintb offset) const; ///< Save an address as XML
virtual void saveXmlAttributes(ostream &s,uintb offset,int4 size) const; ///< Save an address and size as XML
virtual uintb restoreXmlAttributes(const Element *el,uint4 &size) const; ///< Recover an offset and size
virtual void printRaw(ostream &s,uintb offset) const; ///< Write an address in this space to a stream
virtual uintb read(const string &s,int4 &size) const; ///< Read in an address (and possible size) from a string
virtual void saveXml(ostream &s) const; ///< Write the details of this space as XML
virtual void restoreXml(const Element *el); ///< Recover the details of this space from XML
static uintb addressToByte(uintb val,uint4 ws); ///< Scale from addressable units to byte units
static uintb byteToAddress(uintb val,uint4 ws); ///< Scale from byte units to addressable units
static int4 addressToByteInt(int4 val,uint4 ws); ///< Scale int4 from addressable units to byte units
static int4 byteToAddressInt(int4 val,uint4 ws); ///< Scale int4 from byte units to addressable units
};
/// \brief Special AddrSpace for representing constants during analysis.
///
/// The underlying RTL (See PcodeOp) represents all data in terms of
/// an Address, which is made up of an AddrSpace and offset pair.
/// In order to represent constants in the semantics of the RTL,
/// there is a special \e constant address space. An \e offset
/// within the address space encodes the actual constant represented
/// by the pair. I.e. the pair (\b const,4) represents the constant
/// \b 4 within the RTL. The \e size of the ConstantSpace has
/// no meaning, as we always want to be able to represent an arbitrarily
/// large constant. In practice, the size of a constant is limited
/// by the offset field of an Address.
class ConstantSpace : public AddrSpace {
public:
ConstantSpace(AddrSpaceManager *m,const Translate *t,const string &nm,int4 ind); ///< Only constructor
virtual void printRaw(ostream &s,uintb offset) const;
virtual void saveXml(ostream &s) const;
virtual void restoreXml(const Element *el);
};
/// \brief The pool of temporary storage registers
///
/// It is convenient both for modelling processor instructions
/// in an RTL and for later transforming of the RTL to have a pool
/// of temporary registers that can hold data but that aren't a
/// formal part of the state of the processor. The UniqueSpace
/// provides a specific location for this pool. The analysis
/// engine always creates exactly one of these spaces named
/// \b unique.
class UniqueSpace : public AddrSpace {
public:
UniqueSpace(AddrSpaceManager *m,const Translate *t,const string &nm,int4 ind,uint4 fl);
UniqueSpace(AddrSpaceManager *m,const Translate *t); ///< For use with restoreXml
virtual void saveXml(ostream &s) const;
};
/// \brief The pool of logically joined variables
///
/// Some logical variables are split across non-contiguous regions of memory. This space
/// creates a virtual place for these logical variables to exist. Any memory location within this
/// space is backed by 2 or more memory locations in other spaces that physically hold the pieces
/// of the logical value. The database controlling symbols is responsible for keeping track of
/// mapping the logical address in this space to its physical pieces. Offsets into this space do not
/// have an absolute meaning, the database may vary what offset is assigned to what set of pieces.
class JoinSpace : public AddrSpace {
public:
JoinSpace(AddrSpaceManager *m,const Translate *t,const string &nm,int4 ind);
virtual void saveXmlAttributes(ostream &s,uintb offset) const;
virtual void saveXmlAttributes(ostream &s,uintb offset,int4 size) const;
virtual uintb restoreXmlAttributes(const Element *el,uint4 &size) const;
virtual void printRaw(ostream &s,uintb offset) const;
virtual uintb read(const string &s,int4 &size) const;
virtual void saveXml(ostream &s) const;
virtual void restoreXml(const Element *el);
};
/// \brief An overlay space.
///
/// A different code and data layout that occupies the same memory as another address space.
/// Some compilers use this concept to increase the logical size of a program without increasing
/// its physical memory requirements. An overlay space allows the same physical location to contain
/// different code and be labeled with different symbols, depending on context.
/// From the point of view of reverse engineering, the different code and symbols are viewed
/// as a logically distinct space.
class OverlaySpace : public AddrSpace {
AddrSpace *baseSpace; ///< Space being overlayed
public:
OverlaySpace(AddrSpaceManager *m,const Translate *t); ///< Constructor
AddrSpace *getBaseSpace(void) const; ///< Get the address space being overlayed
virtual void saveXml(ostream &s) const;
virtual void restoreXml(const Element *el);
};
/// An internal method for derived classes to set space attributes
/// \param fl is the set of attributes to be set
inline void AddrSpace::setFlags(uint4 fl) {
flags |= fl;
}
/// An internal method for derived classes to clear space attibutes
/// \param fl is the set of attributes to clear
inline void AddrSpace::clearFlags(uint4 fl) {
flags &= ~fl;
}
/// Every address space has a (unique) name, which is referred
/// to especially in configuration files via XML.
/// \return the name of this space
inline const string &AddrSpace::getName(void) const {
return name;
}
/// Every address space is associated with a manager of (all possible) spaces.
/// This method recovers the address space manager object.
/// \return a pointer to the address space manager
inline AddrSpaceManager *AddrSpace::getManager(void) const {
return manage;
}
/// Every address space is associated with a processor which may have additional objects
/// like registers etc. associated with it. This method returns a pointer to that processor
/// translator
/// \return a pointer to the Translate object
inline const Translate *AddrSpace::getTrans(void) const {
return trans;
}
///
/// Return the defining type for this address space.
/// - IPTR_CONSTANT for the constant space
/// - IPTR_PROCESSOR for a normal space
/// - IPTR_INTERNAL for the temporary register space
/// - IPTR_FSPEC for special FuncCallSpecs references
/// - IPTR_IOP for special PcodeOp references
/// \return the basic type of this space
inline spacetype AddrSpace::getType(void) const {
return type;
}
/// If the heritage algorithms need to trace dataflow
/// within this space, the algorithms can delay tracing this
/// space in order to let indirect references into the space
/// resolve themselves. This method indicates the number of
/// rounds of dataflow analysis that should be skipped for this
/// space to let this resolution happen
/// \return the number of rounds to skip heritage
inline int4 AddrSpace::getDelay(void) const {
return delay;
}
/// The point at which deadcode removal is performed on varnodes within
/// a space can be set to skip some number of heritage passes, in case
/// not all the varnodes are created within a single pass. This method
/// gives the number of rounds that should be skipped before deadcode
/// elimination begins
/// \return the number of rounds to skip deadcode removal
inline int4 AddrSpace::getDeadcodeDelay(void) const {
return deadcodedelay;
}
/// Each address space has an associated index that can be used
/// as an integer encoding of the space.
/// \return the unique index
inline int4 AddrSpace::getIndex(void) const {
return index;
}
/// This method indicates the number of bytes contained in an
/// \e addressable \e unit of this space. This is almost always
/// 1, but can be any other small integer.
/// \return the number of bytes in a unit
inline uint4 AddrSpace::getWordSize(void) const {
return wordsize;
}
/// Return the number of bytes needed to represent an offset
/// into this space. A space with 2^32 bytes has an address
/// size of 4, for instance.
/// \return the size of an address
inline uint4 AddrSpace::getAddrSize(void) const {
return addressSize;
}
/// Get the highest (byte) offset possible for this space
/// \return the offset
inline uintb AddrSpace::getHighest(void) const {
return highest;
}
/// Calculate \e off modulo the size of this address space in
/// order to construct the offset "equivalent" to \e off that
/// fits properly into this space
/// \param off is the offset requested
/// \return the wrapped offset
inline uintb AddrSpace::wrapOffset(uintb off) const {
if (off <= highest) // Comparison is unsigned
return off;
intb mod = (intb)(highest+1);
intb res = (intb)off % mod; // remainder is signed
if (res<0) // Remainder may be negative
res += mod; // Adding mod guarantees res is in (0,mod)
return (uintb)res;
}
/// Return a unique short cut character that is associated
/// with this space. The shortcut character can be used by
/// the read method to quickly specify the space of an address.
/// \return the shortcut character
inline char AddrSpace::getShortcut(void) const {
return shortcut;
}
/// During analysis, memory locations in most spaces need to
/// have their data-flow traced. This method returns \b true
/// for these spaces. For some of the special spaces, like
/// the \e constant space, tracing data flow makes no sense,
/// and this routine will return \b false.
/// \return \b true if this space's data-flow is analyzed
inline bool AddrSpace::isHeritaged(void) const {
return ((flags & heritaged)!=0);
}
/// Most memory locations should have dead-code analysis performed,
/// and this routine will return \b true.
/// For certain special spaces like the \e constant space, dead-code
/// analysis doesn't make sense, and this routine returns \b false.
inline bool AddrSpace::doesDeadcode(void) const {
return ((flags & does_deadcode)!=0);
}
/// This routine returns \b true, if, like most spaces, the space
/// has actual read/writeable bytes associated with it.
/// Some spaces, like the \e constant space, do not.
/// \return \b true if the space has physical data in it.
inline bool AddrSpace::hasPhysical(void) const {
return ((flags & hasphysical) !=0);
}
/// If integer values stored in this space are encoded in this
/// space using the big endian format, then return \b true.
/// \return \b true if the space is big endian
inline bool AddrSpace::isBigEndian(void) const {
return ((flags&big_endian)!=0);
}
/// Certain architectures or compilers specify an alignment for accessing words within the space
/// The space required for a variable must be rounded up to the alignment. For variables smaller
/// than the alignment, there is the issue of how the variable is "justified" within the aligned
/// word. Usually the justification depends on the endianness of the space, for certain weird
/// cases the justification may be the opposite of the endianness.
inline bool AddrSpace::isReverseJustified(void) const {
return ((flags&reverse_justification)!=0);
}
inline bool AddrSpace::isOverlay(void) const {
return ((flags&overlay)!=0);
}
inline bool AddrSpace::isOverlayBase(void) const {
return ((flags&overlaybase)!=0);
}
/// If this method returns \b true, the logical form of this space is truncated from its actual size
/// Pointers may refer to this original size put the most significant bytes are ignored
inline bool AddrSpace::isTruncated(void) const {
return ((flags&truncated)!=0);
}
/// Some spaces are "virtual", like the stack spaces, where addresses are really relative to a
/// base pointer stored in a register, like the stackpointer. This routine will return non-zero
/// if \b this space is virtual and there is 1 (or more) associated pointer registers
/// \return the number of base registers associated with this space
inline int4 AddrSpace::numSpacebase(void) const {
return 0;
}
/// For virtual spaces, like the stack space, this routine returns the location information for
/// a base register of the space. This routine will throw an exception if the register does not exist
/// \param i is the index of the base register starting at
/// \return the VarnodeData that describes the register
inline const VarnodeData &AddrSpace::getSpacebase(int4 i) const {
throw LowlevelError(name+" space is not virtual and has no associated base register");
}
/// If a stack pointer is truncated to fit the stack space, we may need to know the
/// extent of the original register
/// \param i is the index of the base register
/// \return the original register before truncation
inline const VarnodeData &AddrSpace::getSpacebaseFull(int4 i) const {
throw LowlevelError(name+" has no truncated registers");
}
/// For stack (or other spacebase) spaces, this routine returns \b true if the space can viewed as a stack
/// and a \b push operation causes the spacebase pointer to be decreased (grow negative)
/// \return \b true if stacks grow in negative direction.
inline bool AddrSpace::stackGrowsNegative(void) const {
return true;
}
/// If this space is virtual, then
/// this routine returns the containing address space, otherwise
/// it returns NULL.
/// \return a pointer to the containing space or NULL
inline AddrSpace *AddrSpace::getContain(void) const {
return (AddrSpace *)0;
}
/// Given an offset into an address space based on the addressable unit size (wordsize),
/// convert it into a byte relative offset
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline uintb AddrSpace::addressToByte(uintb val,uint4 ws) {
return val*ws;
}
/// Given an offset in an address space based on bytes, convert it
/// into an offset relative to the addressable unit of the space (wordsize)
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline uintb AddrSpace::byteToAddress(uintb val,uint4 ws) {
return val/ws;
}
/// Given an int4 offset into an address space based on the addressable unit size (wordsize),
/// convert it into a byte relative offset
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline int4 AddrSpace::addressToByteInt(int4 val,uint4 ws) {
return val*ws;
}
/// Given an int4 offset in an address space based on bytes, convert it
/// into an offset relative to the addressable unit of the space (wordsize)
/// \param val is the offset to convert
/// \param ws is the number of bytes in the addressable word
/// \return the scaled offset
inline int4 AddrSpace::byteToAddressInt(int4 val,uint4 ws) {
return val/ws;
}
#endif