GP-4300 Full extended division in optimized division rules

2025-10-04 02:09:44 +02:00 · 2024-03-27 22:55:19 +00:00 · 2024-03-27 22:55:19 +00:00 · 4bbbba3e5a
commit 4bbbba3e5a
parent c63be6d2c3
17 changed files with 821 additions and 228 deletions
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile
@ -80,7 +80,7 @@ CORE=	xml marshal space float address pcoderaw translate opcodes globalcontext
 # Additional core files for any projects that decompile
 DECCORE=capability architecture options graph cover block cast typeop database cpool \
 	comment stringmanage modelrules fspec action loadimage grammar varnode op \
-	type variable varmap jumptable emulate emulateutil flow userop \
+	type variable varmap jumptable emulate emulateutil flow userop multiprecision \
 	funcdata funcdata_block funcdata_op funcdata_varnode unionresolve pcodeinject \
 	heritage prefersplit rangeutil ruleaction subflow blockaction merge double \
 	transform coreaction condexe override dynamic crc32 prettyprint \
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc
@ -831,133 +831,4 @@ int4 bit_transitions(uintb val,int4 sz)
  return res;
 }

-/// \brief Multiply 2 unsigned 64-bit values, producing a 128-bit value
-///
-/// TODO: Remove once we import a full multiprecision library.
-/// \param res points to the result array (2 uint8 pieces)
-/// \param x is the first 64-bit value
-/// \param y is the second 64-bit value
-void mult64to128(uint8 *res,uint8 x,uint8 y)
-
-{
-  uint8 f = x & 0xffffffff;
-  uint8 e = x >> 32;
-  uint8 d = y & 0xffffffff;
-  uint8 c = y >> 32;
-  uint8 fd = f * d;
-  uint8 fc = f * c;
-  uint8 ed = e * d;
-  uint8 ec = e * c;
-  uint8 tmp = (fd >> 32) + (fc & 0xffffffff) + (ed & 0xffffffff);
-  res[1] = (tmp>>32) + (fc>>32) + (ed>>32) + ec;
-  res[0] = (tmp<<32) + (fd & 0xffffffff);
-}
-
-/// \brief Subtract (in-place) a 128-bit value from a base 128-value
-///
-/// The base value is altered in place.
-/// TODO: Remove once we import a full multiprecision library.
-/// \param a is the base 128-bit value being subtracted from in-place
-/// \param b is the other 128-bit value being subtracted
-void unsignedSubtract128(uint8 *a,uint8 *b)
-
-{
-  bool borrow = (a[0] < b[0]);
-  a[0] -= b[0];
-  a[1] -= b[1];
-  if (borrow)
-    a[1] -= 1;
-}
-
-/// \brief Compare two unsigned 128-bit values
-///
-/// TODO: Remove once we import a full multiprecision library.
-/// Given a first and second value, return -1, 0, or 1 depending on whether the first value
-/// is \e less, \e equal, or \e greater than the second value.
-/// \param a is the first 128-bit value (as an array of 2 uint8 elements)
-/// \param b is the second 128-bit value
-/// \return the comparison code
-int4 unsignedCompare128(uint8 *a,uint8 *b)
-
-{
-  if (a[1] != b[1])
-    return (a[1] < b[1]) ? -1 : 1;
-  if (a[0] != b[0])
-    return (a[0] < b[0]) ? -1 : 1;
-  return 0;
-}
-
-/// \brief Unsigned division of a power of 2 (upto 2^127) by a 64-bit divisor
-///
-/// The result must be less than 2^64. The remainder is calculated.
-/// \param n is the power of 2 for the numerand
-/// \param divisor is the 64-bit divisor
-/// \param q is the passed back 64-bit quotient
-/// \param r is the passed back 64-bit remainder
-/// \return 0 if successful, 1 if result is too big, 2 if divide by 0
-int4 power2Divide(int4 n,uint8 divisor,uint8 &q,uint8 &r)
-
-{
-  if (divisor == 0) return 2;
-  uint8 power = 1;
-  if (n < 64) {
-    power <<= n;
-    q = power / divisor;
-    r = power % divisor;
-    return 0;
-  }
-  // Divide numerand and divisor by 2^(n-63) to get approximation of result
-  uint8 y = divisor >> (n-64);	// Most of the way on divisor
-  if (y == 0) return 1;		// Check if result will be too big
-  y >>= 1;			// Divide divisor by final bit
-  power <<= 63;
-  uint8 max;
-  if (y == 0) {
-    max = 0;
-    max -= 1;			// Could be maximal
-    // Check if divisor is a power of 2
-    if ((((uint8)1) << (n-64)) == divisor)
-      return 1;
-  }
-  else
-    max = power / y + 1;
-  uint8 min = power / (y+1);
-  if (min != 0)
-    min -= 1;
-  uint8 fullpower[2];
-  fullpower[1] = ((uint8)1)<<(n-64);
-  fullpower[0] = 0;
-  uint8 mult[2];
-  mult[0] = 0;
-  mult[1] = 0;
-  uint8 tmpq = 0;
-  while(max > min+1) {
-    tmpq = max + min;
-    if (tmpq < min) {
-      tmpq = (tmpq>>1) + 0x8000000000000000L;
-    }
-    else
-      tmpq >>= 1;
-    mult64to128(mult,divisor,tmpq);
-    if (unsignedCompare128(fullpower,mult) < 0)
-      max = tmpq-1;
-    else
-      min = tmpq;
-  }
-  // min is now our putative quotient
-  if (tmpq != min)
-    mult64to128(mult,divisor,min);
-  unsignedSubtract128(fullpower,mult); // Calculate remainder
-  // min might be 1 too small
-  if (fullpower[1] != 0 || fullpower[0] >= divisor) {
-    q = min + 1;
-    r = fullpower[0] - divisor;
-  }
-  else {
-    q = min;
-    r = fullpower[0];
-  }
-  return 0;
-}
-
 } // End namespace ghidra
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh
@ -577,10 +577,5 @@ extern int4 count_leading_zeros(uintb val);		///< Return the number of leading z
 extern uintb coveringmask(uintb val);			///< Return a mask that \e covers the given value
 extern int4 bit_transitions(uintb val,int4 sz);		///< Calculate the number of bit transitions in the sized value

-extern void mult64to128(uint8 *res,uint8 x,uint8 y);
-extern void unsignedSubtract128(uint8 *a,uint8 *b);
-extern int4 unsignedCompare128(uint8 *a,uint8 *b);
-extern int4 power2Divide(int4 n,uint8 divisor,uint8 &q,uint8 &r);
-
 } // End namespace ghidra
 #endif
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/fspec.cc
@ -1600,6 +1600,15 @@ void ParamListStandardOut::initialize(void)
  }
 }

+/// \brief Find the return value storage using the older \e fallback method
+///
+/// Given the active set of trial locations that might hold (pieces of) the return value, calculate
+/// the best matching ParamEntry from \b this ParamList and mark all the trials that are contained
+/// in the ParamEntry as \e used.  If \b firstOnly is \b true, the ParamList is assumed to contain
+/// partial storage locations that might get used for return values split storage.  In this case,
+/// only the first ParamEntry in a storage class is allowed to match.
+/// \param active is the set of active trials
+/// \param firstOnly is \b true if only the first entry in a storage class can match
 void ParamListStandardOut::fillinMapFallback(ParamActive *active,bool firstOnly) const

 {
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh
@ -286,6 +286,7 @@ public:
  Varnode *newUnique(int4 s,Datatype *ct=(Datatype *)0);	///< Create a new \e temporary Varnode
  Varnode *newCodeRef(const Address &m);			///< Create a code address \e annotation Varnode
  Varnode *setInputVarnode(Varnode *vn);			///< Mark a Varnode as an input to the function
+  Varnode *newExtendedConstant(int4 s,uint8 *val,PcodeOp *op);	///< Create extended precision constant
  void adjustInputVarnodes(const Address &addr,int4 sz);
  void deleteVarnode(Varnode *vn) { vbank.destroy(vn); }	///< Delete the given varnode

--- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc
@ -372,6 +372,36 @@ Varnode *Funcdata::setInputVarnode(Varnode *vn)
  return vn;
 }

+/// Construct a constant Varnode up to 128 bits,  using INT_ZEXT and PIECE if necessary.
+/// This method is temporary until we have full extended precision constants.
+/// \param s is the size of the Varnode in bytes
+/// \param val is the 128-bit value in 2 64-bit chunks
+/// \param op is point before which any new PcodeOp should get inserted
+/// \return the new effective constant Varnode
+Varnode *Funcdata::newExtendedConstant(int4 s,uint8 *val,PcodeOp *op)
+
+{
+  if (s <= 8)
+    return newConstant(s, val[0]);
+  Varnode *newConstVn;
+  if (val[1] == 0) {
+    PcodeOp *extOp = newOp(1,op->getAddr());
+    opSetOpcode(extOp,CPUI_INT_ZEXT);
+    newConstVn = newUniqueOut(s,extOp);
+    opSetInput(extOp,newConstant(8,val[0]),0);
+    opInsertBefore(extOp,op);
+  }
+  else {
+    PcodeOp *pieceOp = newOp(2,op->getAddr());
+    opSetOpcode(pieceOp,CPUI_PIECE);
+    newConstVn = newUniqueOut(s,pieceOp);
+    opSetInput(pieceOp,newConstant(8,val[1]),0);	// Most significant piece
+    opSetInput(pieceOp,newConstant(8,val[0]),1);	// Least significant piece
+    opInsertBefore(pieceOp,op);
+  }
+  return newConstVn;
+}
+
 /// \brief Adjust input Varnodes contained in the given range
 ///
 /// After this call, a single \e input Varnode will exist that fills the given range.
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/modelrules.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/modelrules.hh
@ -54,6 +54,7 @@ class PrimitiveExtractor {
    union_invalid = 16			///< Unions are treated as an illegal element
  };
 public:
+  /// \brief A primitive data-type and its offset within the containing data-type
  class Primitive {
  public:
    Datatype *dt;		///< Primitive data-type
@ -71,7 +72,7 @@ public:
  PrimitiveExtractor(Datatype *dt,bool unionIllegal,int4 offset,int4 max);	///< Constructor
  int4 size(void) const { return primitives.size(); }	///< Return the number of primitives extracted
  const Primitive &get(int4 i) const { return primitives[i]; }	///< Get a particular primitive
-  bool isValid(void) const { return (flags & invalid) == 0; }
+  bool isValid(void) const { return (flags & invalid) == 0; }	///< Return \b true if primitives were successfully extracted
  bool containsUnknown(void) const { return (flags & unknown_element)!=0; }	///< Are there \b unknown elements
  bool isAligned(void) const { return (flags & unaligned)==0; }		///< Are all elements aligned
  bool containsHoles(void) const { return (flags & extra_space)!=0; }	///< Is there empty space that is not padding
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/multiprecision.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/multiprecision.cc
@ -0,0 +1,334 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "multiprecision.hh"
+
+namespace ghidra {
+
+extern int4 count_leading_zeros(uintb val);		///< Return the number of leading zero bits in the given value
+
+/// \brief Multi-precision logical left shift by a constant amount
+///
+/// \b in and \b out arrays are specified and can point to the same storage.
+/// \param num is the number 64-bit words in the extended precision integers
+/// \param in is the 128-bit value to shift
+/// \param out is the container for the 128-bit result
+/// \param sa is the number of bits to shift
+static void leftshift(int4 num,uint8 *in,uint8 *out,int4 sa)
+
+{
+  int4 inIndex = num - 1 - sa / 64;
+  sa = sa % 64;
+  int4 outIndex = num - 1;
+  if (sa == 0) {
+    for(;inIndex>=0;--inIndex) {
+      out[outIndex--] = in[inIndex];
+    }
+    for(;outIndex>=0;--outIndex) {
+      out[outIndex] = 0;
+    }
+  }
+  else {
+    for(;inIndex>0;--inIndex) {
+      out[outIndex--] = (in[inIndex] << sa) | (in[inIndex-1] >> (64-sa));
+    }
+    out[outIndex--] = in[0] << sa;
+    for(;outIndex>=0;--outIndex) {
+      out[outIndex] = 0;
+    }
+  }
+}
+
+/// \param in is the 128-bit input (as 2 64-bit words)
+/// \param out will hold the 128-bit result
+/// \param sa is the number of bits to shift
+void leftshift128(uint8 *in,uint8 *out,int4 sa)
+
+{
+  leftshift(2,in,out,sa);
+}
+
+/// \brief Compare two multi-precision unsigned integers
+///
+/// -1, 0, or 1 is returned depending on if the first integer is less than, equal to, or greater than
+/// the second integer.
+/// \param num is the number 64-bit words in the extended precision integers
+/// \param in1 is the first integer to compare
+/// \param in2 is the second integer to compare
+/// \return -1, 0, or 1
+static inline int4 ucompare(int4 num,uint8 *in1,uint8 *in2)
+
+{
+  for(int4 i=num-1;i>=0;--i) {
+    if (in1[i] != in2[i])
+      return (in1[i] < in2[i]) ? -1 : 1;
+  }
+  return 0;
+}
+
+/// \param in1 is the first 128-bit value (as 2 64-bit words) to compare
+/// \param in2 is the second 128-bit value
+/// \return \b true if the first value is less than the second value
+bool uless128(uint8 *in1,uint8 *in2)
+
+{
+  return ucompare(2,in1,in2) < 0;
+}
+
+/// \param in1 is the first 128-bit value (as 2 64-bit words) to compare
+/// \param in2 is the second 128-bit value
+/// \return \b true if the first value is less than or equal to the second value
+bool ulessequal128(uint8 *in1,uint8 *in2)
+
+{
+  return ucompare(2,in1,in2) <= 0;
+}
+
+/// \brief Multi-precision add operation
+///
+/// \param num is the number 64-bit words in the extended precision integers
+/// \param in1 is the first integer
+/// \param in2 is the integer added to the first
+/// \param out is where the add result is stored
+static inline void add(int4 num,uint8 *in1,uint8 *in2,uint8 *out)
+
+{
+  uint8 carry = 0;
+  for(int4 i=0;i<num;++i) {
+    uint8 tmp = in2[i] + carry;
+    uint8 tmp2 = in1[i] + tmp;
+    out[i] = tmp2;
+    carry = (tmp < in2[i] || tmp2 < tmp) ? 1 : 0;
+  }
+}
+
+/// \param in1 is the first 128-bit value (as 2 64-bit words) to add
+/// \param in2 is the second 128-bit value to add
+/// \param out will hold the 128-bit result
+void add128(uint8 *in1,uint8 *in2,uint8 *out)
+
+{
+  add(2,in1,in2,out);
+}
+
+/// \brief Multi-precision subtract operation
+///
+/// \param num is the number 64-bit words in the extended precision integers
+/// \param in1 is the first integer
+/// \param in2 is the integer subtracted from the first
+/// \param out is where the subtraction result is stored
+static inline void subtract(int4 num,uint8 *in1,uint8 *in2,uint8 *out)
+
+{
+  uint8 borrow = 0;
+  for(int4 i=0;i<num;++i) {
+    uint8 tmp = in2[i] + borrow;
+    borrow = (tmp < in2[i] || in1[i] < tmp) ? 1: 0;
+    out[i] = in1[i] - tmp;
+  }
+}
+
+/// \param in1 is the first 128-bit value (as 2 64-bit words)
+/// \param in2 is the second 128-bit value to subtract
+/// \param out will hold the 128-bit result
+void subtract128(uint8 *in1,uint8 *in2,uint8 *out)
+
+{
+  subtract(2,in1,in2,out);
+}
+
+/// \brief Split an array of 64-bit words into an array of 32-bit words
+///
+/// The arrays must already be allocated.  The least significant half of each 64-bit word is put
+/// into the 32-bit word array first.  The index of the most significant non-zero 32-bit word is
+/// calculated and returned as the \e effective size of the resulting array.
+/// \param num is the number of 64-bit words to split
+/// \param val is the array of 64-bit words
+/// \param res is the array that will hold the 32-bit words
+/// \return the effective size of the 32-bit word array
+static int4 split64_32(int4 num,uint8 *val,uint4 *res)
+
+{
+  int4 m = 0;
+  for(int4 i=0;i<num;++i) {
+    uint4 hi = val[i] >> 32;
+    uint4 lo = val[i] & 0xffffffff;
+    if (hi != 0)
+      m = i*2 + 2;
+    else if (lo != 0)
+      m = i*2 + 1;
+    res[i*2] = lo;
+    res[i*2+1] = hi;
+  }
+  return m;
+}
+
+/// \brief Pack an array of 32-bit words into an array of 64-bit words
+///
+/// The arrays must already be allocated.  The 64-bit word array is padded out with zeroes if
+/// the specified size exceeds the provided number of 32-bit words.
+/// \param num is the number of 64-bit words in the resulting array
+/// \param max is the number of 32-bit words to pack
+/// \param out is the array of 64-bit words
+/// \param in is the array of 32-bit words
+static void pack32_64(int4 num,int4 max,uint8 *out,uint4 *in)
+
+{
+  int4 j = num * 2 - 1;
+  for(int4 i=num-1;i>=0;--i) {
+    uint8 val;
+    val = (j<max) ? in[j] : 0;
+    val <<= 32;
+    j -= 1;
+    if (j < max)
+      val |= in[j];
+    j -= 1;
+    out[i] = val;
+  }
+}
+
+/// \brief Logical shift left for an extended integer in 32-bit word arrays
+///
+/// \param arr is the array of 32-bit words
+/// \param size is the number of words in the array
+/// \param sa is the number of bits to shift
+static void shift_left(uint4 *arr,int4 size,int4 sa)
+
+{
+  if (sa == 0) return;
+  for (int4 i = size - 1; i > 0; --i)
+    arr[i] = (arr[i] << sa) | (arr[i-1] >> (32-sa));
+  arr[0] = arr[0] << sa;
+}
+
+/// \brief Logical shift right for an extended integer in 32-bit word arrays
+///
+/// \param arr is the array of 32-bit words
+/// \param size is the number of words in the array
+/// \param sa is the number of bits to shift
+static void shift_right(uint4 *arr,int4 size,int4 sa)
+
+{
+  if (sa == 0) return;
+  for(int4 i=0;i<size-1;++i)
+    arr[i] = (arr[i] >> sa) | (arr[i+1] << (32-sa));
+  arr[size-1] = arr[size -1] >> sa;
+}
+
+/// \brief Knuth's algorithm d, for integer division
+///
+/// The numerator and denominator, expressed in 32-bit \e digits, are provided.
+/// The algorithm calculates the quotient and the remainder is left in the array originally
+/// containing the numerator.
+/// \param m is the number of 32-bit digits in the numerator
+/// \param n is the number of 32-bit digits in the denominator
+/// \param u is the numerator and will hold the remainder
+/// \param v is the denominator
+/// \param q will hold the final quotient
+static void knuth_algorithm_d(int4 m,int4 n,uint4 *u,uint4 *v,uint4 *q)
+
+{
+  int4 s = count_leading_zeros(v[n-1]) - 8*(sizeof(uintb)-sizeof(uint4));
+  shift_left(v,n,s);
+  shift_left(u,m,s);
+
+  for(int4 j=m-n-1;j>=0;--j) {
+    uint8 tmp = ((uint8)u[n+j] << 32) + u[n-1+j];
+    uint8 qhat = tmp / v[n-1];
+    uint8 rhat = tmp % v[n-1];
+    do {
+      if (qhat <= 0xffffffff && qhat * v[n-2] <= (rhat << 32) + u[n-2+j])
+	break;
+      qhat -= 1;
+      rhat += v[n-1];
+    } while(rhat <= 0xffffffff);
+
+    uint8 carry = 0;
+    int8 t;
+    for (int4 i=0;i<n;++i) {
+      tmp = qhat*v[i];
+      t = u[i+j] - carry - (tmp & 0xffffffff);
+      u[i+j] = t;
+      carry = (tmp >> 32) - (t >> 32);
+    }
+    t = u[j+n] - carry;
+    u[j+n] = t;
+
+    q[j] = qhat;
+    if (t < 0) {
+      q[j] -= 1;
+      carry = 0;
+      for(int4 i=0;i<n;++i) {
+	tmp = u[i+j] + (v[i] + carry);
+	u[i+j] = tmp;
+	carry = tmp >> 32;
+      }
+      u[j+n] += carry;
+    }
+  }
+  shift_right(u,m,s);
+}
+
+/// \param numer holds the 2 64-bit words of the numerator
+/// \param denom holds the 2 words of the denominator
+/// \param quotient_res will hold the 2 words of the quotient
+/// \param remainder_res will hold the 2 words of the remainder
+void udiv128(uint8 *numer,uint8 *denom,uint8 *quotient_res,uint8 *remainder_res)
+
+{
+  if (numer[1] == 0 && denom[1] == 0) {
+    quotient_res[0] = numer[0] / denom[0];
+    quotient_res[1] = 0;
+    remainder_res[0] = numer[0] % denom[0];
+    remainder_res[1] = 0;
+    return;
+  }
+  uint4 v[4];
+  uint4 u[5];	// Array needs one more entry for normalization overflow
+  uint4 q[4];
+  int4 n = split64_32(2,denom,v);
+  if (n == 0) {
+    throw LowlevelError("divide by 0");
+  }
+  int4 m = split64_32(2,numer,u);
+  if ( m < n || ( (n==m) && u[n-1] < v[n-1])) {
+				// denominator is smaller than the numerator, quotient is 0
+    quotient_res[0] = 0;
+    quotient_res[1] = 0;
+    remainder_res[0] = numer[0];
+    remainder_res[1] = numer[1];
+    return;
+  }
+  u[m] = 0;
+  m += 1;			// Extend u array by 1 to account for normalization
+  if (n == 1) {
+    uint4 d = v[0];
+    uint4 rem = 0;
+    for(int4 i=m-1;i>=0;--i) {
+      uint8 tmp = ((uint8)rem << 32) + u[i];
+      q[i] = tmp / d;
+      u[i] = 0;
+      rem = tmp % d;
+    }
+    u[0] = rem;			// Last carry is final remainder
+  }
+  else {
+    knuth_algorithm_d(m,n,u,v,q);
+  }
+  pack32_64(2,m-n,quotient_res,q);
+  pack32_64(2,m-1,remainder_res,u);
+}
+
+} // End namespace ghidra
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/multiprecision.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/multiprecision.hh
@ -0,0 +1,42 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/// \file multiprecision.hh
+/// \brief Multi-precision integers
+#ifndef __CPUI_MULTIPRECISION__
+#define __CPUI_MULTIPRECISION__
+
+#include "error.hh"
+
+namespace ghidra {
+
+extern void leftshift128(uint8 *in,uint8 *out,int4 sa);		///< 128-bit INT_LEFT operation with constant shift amount
+extern bool uless128(uint8 *in1,uint8 *in2);			///< 128-bit INT_LESS operation
+extern bool ulessequal128(uint8 *in1,uint8 *in2);		///< 128-bit INT_LESSEQUAL operation
+extern void udiv128(uint8 *numer,uint8 *denom,uint8 *quotient_res,uint8 *remainder_res);	///< 128-bit INT_DIV
+extern void add128(uint8 *in1,uint8 *in2,uint8 *out);		///< 128-bit INT_ADD operation
+extern void subtract128(uint8 *in1,uint8 *in2,uint8 *out);	///< 128-bit INT_SUB operation
+
+/// \brief Set a 128-bit value (2 64-bit words) from a 64-bit value
+///
+/// \param res will hold the 128-bit value
+/// \param val is the 64-bit value to set from
+inline void set_u128(uint8 *res,uint8 val) {
+  res[0] = val;
+  res[1] = 0;
+}
+
+} // End namespace ghidra
+#endif
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc
@ -17,6 +17,7 @@
 #include "coreaction.hh"
 #include "subflow.hh"
 #include "rangeutil.hh"
+#include "multiprecision.hh"

 namespace ghidra {

@ -1884,8 +1885,12 @@ int4 RuleDoubleShift::applyOp(PcodeOp *op,Funcdata &data)
  }
  else if (sa1 == sa2 && size <= sizeof(uintb)) {	// FIXME:  precision
    mask = calc_mask(size);
-    if (opc1 == CPUI_INT_LEFT)
+    if (opc1 == CPUI_INT_LEFT) {
+      // The INT_LEFT is highly likely to be a multiply, so don't collapse to an INT_AND if there
+      // are other uses of the intermediate value.
+      if (secvn->loneDescend() == (PcodeOp *)0) return 0;
      mask = (mask<<sa1) & mask;
+    }
    else
      mask = (mask>>sa1) & mask;
    newvn = data.newConstant(size,mask);
@ -7359,16 +7364,15 @@ int4 RuleDivTermAdd::applyOp(PcodeOp *op,Funcdata &data)
  OpCode shiftopc;
  PcodeOp *subop = findSubshift(op,n,shiftopc);
  if (subop == (PcodeOp *)0) return 0;
-  // TODO: Cannot currently support 128-bit arithmetic, except in special case of 2^64
-  if (n > 64) return 0;
+  if (n > 127) return 0;	// Up to 128-bits
  
  Varnode *multvn = subop->getIn(0);
  if (!multvn->isWritten()) return 0;
  PcodeOp *multop = multvn->getDef();
  if (multop->code() != CPUI_INT_MULT) return 0;
-  uintb multConst;
-  int4 constExtType = multop->getIn(1)->isConstantExtended(multConst);
-  if (constExtType < 0) return 0;
+  uint8 multConst[2];
+  if (!multop->getIn(1)->isConstantExtended(multConst))
+    return 0;
  
  Varnode *extvn = multop->getIn(0);
  if (!extvn->isWritten()) return 0;
@ -7381,20 +7385,10 @@ int4 RuleDivTermAdd::applyOp(PcodeOp *op,Funcdata &data)
    if (op->code()==CPUI_INT_RIGHT) return 0;
  }

-  uintb newc;
-  if (n < 64 || (extvn->getSize() <= 8)) {
-    uintb pow = 1;
-    pow <<= n;			// Calculate 2^n
-    newc = multConst + pow;
-  }
-  else {
-    if (constExtType != 2) return 0; // TODO: Can't currently represent
-    if (!signbit_negative(multConst,8)) return 0;
-    // Adding 2^64 to a sign-extended 64-bit value with its sign set, causes all the
-    // set extension bits to be cancelled out, converting it into a
-    // zero-extended 64-bit value.
-    constExtType = 1;		// Set extension of constant to INT_ZEXT
-  }
+  uint8 power[2];
+  set_u128(power, 1);
+  leftshift128(power,power,n);		// power = 2^n
+  add128(multConst,power,multConst);	// multConst += 2^n
  Varnode *x = extop->getIn(0);

  list<PcodeOp *>::const_iterator iter;
@ -7405,17 +7399,7 @@ int4 RuleDivTermAdd::applyOp(PcodeOp *op,Funcdata &data)
      continue;

    // Construct the new constant
-    Varnode *newConstVn;
-    if (constExtType == 0)
-      newConstVn = data.newConstant(extvn->getSize(),newc);
-    else {
-      // Create new extension of the constant
-      PcodeOp *newExtOp = data.newOp(1,op->getAddr());
-      data.opSetOpcode(newExtOp,(constExtType==1) ? CPUI_INT_ZEXT : CPUI_INT_SEXT);
-      newConstVn = data.newUniqueOut(extvn->getSize(),newExtOp);
-      data.opSetInput(newExtOp,data.newConstant(8,multConst),0);
-      data.opInsertBefore(newExtOp,op);
-    }
+    Varnode *newConstVn = data.newExtendedConstant(extvn->getSize(), multConst, op);

    // Construct the new multiply
    PcodeOp *newmultop = data.newOp(2,op->getAddr());
@ -7532,7 +7516,8 @@ int4 RuleDivTermAdd2::applyOp(PcodeOp *op,Funcdata &data)
  if (!multvn->isWritten()) return 0;
  PcodeOp *multop = multvn->getDef();
  if (multop->code() != CPUI_INT_MULT) return 0;
-  if (!multop->getIn(1)->isConstant()) return 0;
+  uint8 multConst[2];
+  if (!multop->getIn(1)->isConstantExtended(multConst)) return 0;
  Varnode *zextvn = multop->getIn(0);
  if (!zextvn->isWritten()) return 0;
  PcodeOp *zextop = zextvn->getDef();
@ -7545,14 +7530,16 @@ int4 RuleDivTermAdd2::applyOp(PcodeOp *op,Funcdata &data)
    if (addop->code() != CPUI_INT_ADD) continue;
    if ((addop->getIn(0)!=z)&&(addop->getIn(1)!=z)) continue;

-    uintb pow = 1;
-    pow <<= n;			// Calculate 2^n
-    uintb newc = multop->getIn(1)->getOffset() + pow;
+    uint8 pow[2];
+    set_u128(pow, 1);
+    leftshift128(pow,pow,n);		// Calculate 2^n
+    add128(multConst, pow, multConst);	// multConst = multConst + 2^n
    PcodeOp *newmultop = data.newOp(2,op->getAddr());
    data.opSetOpcode(newmultop,CPUI_INT_MULT);
    Varnode *newmultvn = data.newUniqueOut(zextvn->getSize(),newmultop);
    data.opSetInput(newmultop,zextvn,0);
-    data.opSetInput(newmultop,data.newConstant(zextvn->getSize(),newc),1);
+    Varnode *newConstVn = data.newExtendedConstant(zextvn->getSize(), multConst, op);
+    data.opSetInput(newmultop,newConstVn,1);
    data.opInsertBefore(newmultop,op);

    PcodeOp *newshiftop = data.newOp(2,op->getAddr());
@ -7591,7 +7578,7 @@ int4 RuleDivTermAdd2::applyOp(PcodeOp *op,Funcdata &data)
 /// \param xsize will hold the number of (non-zero) bits in the numerand
 /// \param extopc holds whether the extension is INT_ZEXT or INT_SEXT
 /// \return the extended numerand if possible, or the unextended numerand, or NULL
-Varnode *RuleDivOpt::findForm(PcodeOp *op,int4 &n,uintb &y,int4 &xsize,OpCode &extopc)
+Varnode *RuleDivOpt::findForm(PcodeOp *op,int4 &n,uint8 *y,int4 &xsize,OpCode &extopc)

 {
  PcodeOp *curOp = op;
@ -7621,18 +7608,22 @@ Varnode *RuleDivOpt::findForm(PcodeOp *op,int4 &n,uintb &y,int4 &xsize,OpCode &e
  if (curOp->code() != CPUI_INT_MULT) return (Varnode *)0;	// There MUST be an INT_MULT
  Varnode *inVn = curOp->getIn(0);
  if (!inVn->isWritten()) return (Varnode *)0;
-  if (inVn->isConstantExtended(y) >= 0) {
+  if (inVn->isConstantExtended(y)) {
    inVn = curOp->getIn(1);
    if (!inVn->isWritten()) return (Varnode *)0;
  }
-  else if (curOp->getIn(1)->isConstantExtended(y) < 0)
+  else if (!curOp->getIn(1)->isConstantExtended(y))
    return (Varnode *)0;	// There MUST be a constant

  Varnode *resVn;
  PcodeOp *extOp = inVn->getDef();
  extopc = extOp->code();
  if (extopc != CPUI_INT_SEXT) {
-    uintb nzMask = inVn->getNZMask();
+    uintb nzMask;
+    if (extopc == CPUI_INT_ZEXT)
+      nzMask = extOp->getIn(0)->getNZMask();
+    else
+      nzMask = inVn->getNZMask();
    xsize = 8*sizeof(uintb) - count_leading_zeros(nzMask);
    if (xsize == 0) return (Varnode *)0;
    if (xsize > 4*inVn->getSize()) return (Varnode *)0;
@ -7672,44 +7663,50 @@ Varnode *RuleDivOpt::findForm(PcodeOp *op,int4 &n,uintb &y,int4 &xsize,OpCode &e
 /// Do some additional checks on the parameters as an optimized encoding
 /// of a divisor.
 /// \param n is the power of 2
-/// \param y is the multiplicative coefficient
+/// \param y is the (up to 128-bit) multiplicative coefficient
 /// \param xsize is the maximum power of 2
 /// \return the divisor or 0 if the checks fail
-uintb RuleDivOpt::calcDivisor(uintb n,uint8 y,int4 xsize)
+uintb RuleDivOpt::calcDivisor(uintb n,uint8 *y,int4 xsize)

 {
-  if (n > 127) return 0;		// Not enough precision
-  if (y <= 1) return 0;		// Boundary cases are wrong form
+  if (n > 127 || xsize > 64) return 0;		// Not enough precision
+  uint8 power[2];
+  uint8 q[2];
+  uint8 r[2];
+  set_u128(power, 1);
+  if (ulessequal128(y, power))		// Boundary cases, y <= 1, are wrong form
+    return 0;

-  uint8 d,r;
-  uint8 power;
-  if (n < 64) {
-    power = ((uint8)1) << n;
-    d = power / (y-1);
-    r = power % (y-1);
+  subtract128(y, power, y);			// y = y - 1
+  leftshift128(power, power, n);		// power = 2^n
+
+  udiv128(power, y, q, r);
+  if (0 != q[1])
+    return 0;			// Result is bigger than 64-bits
+  if (uless128(y,q)) return 0;	// if y < q
+  uint8 diff = 0;
+  if (!uless128(r,q)) {		// if r >= q
+    // Its possible y is 1 too big giving us a q that is smaller by 1 than the correct value
+    q[0] += 1;			// Adjust to bigger q
+    subtract128(r,y,r);		// and remainder for the smaller y
+    add128(r, q, r);
+    if (!uless128(r,q)) return 0;
+    diff = q[0];		// Using y that is off by one adds extra error, affecting allowable maxx
  }
-  else {
-    if (0 != power2Divide(n,y-1,d,r))
-      return 0;			// Result is bigger than 64-bits
-  }
-  if (d>=y) return 0;
-  if (r >= d) return 0;
  // The optimization of division to multiplication
  // by the reciprocal holds true, if the maximum value
-  // of x times the remainder is less than 2^n
-  uint8 maxx = 1;
-  maxx <<= xsize;
+  // of x times q-r is less than 2^n
+  uint8 maxx = (xsize == 64) ? 0 : ((uint8)1) << xsize;
  maxx -= 1;			// Maximum possible x value
-  uint8 tmp;
-  if (n < 64)
-    tmp = power / (d-r);	// r < d => divisor is non-zero
-  else {
-    uint8 unused;
-    if (0 != power2Divide(n,d-r,tmp,unused))
-      return (uintb)d;		// tmp is bigger than 2^64 > maxx
-  }
-  if (tmp<=maxx) return 0;
-  return (uintb)d;
+  uint8 tmp[2];
+  uint8 denom[2];
+  diff += q[0] - r[0];
+  set_u128(denom,diff);
+  udiv128(power,denom, tmp, r);
+  if (0 != tmp[1])
+    return (uintb)q[0];		// tmp is bigger than 2^64 > maxx
+  if (tmp[0]<=maxx) return 0;
+  return (uintb)q[0];
 }

 /// \brief Replace sign-bit extractions from the first given Varnode with the second Varnode
@ -7785,7 +7782,7 @@ bool RuleDivOpt::checkFormOverlap(PcodeOp *op)
    Varnode *cvn = superOp->getIn(1);
    if (!cvn->isConstant()) return true;	// Might be a form where constant has propagated yet
    int4 n,xsize;
-    uintb y;
+    uint8 y[2];
    OpCode extopc;
    Varnode *inVn = findForm(superOp, n, y, xsize, extopc);
    if (inVn != (Varnode *)0) return true;
@ -7797,8 +7794,8 @@ bool RuleDivOpt::checkFormOverlap(PcodeOp *op)
 /// \brief Convert INT_MULT and shift forms into INT_DIV or INT_SDIV
 ///
 /// The unsigned and signed variants are:
-///   - `sub( (zext(V)*c)>>n, 0)   =>  V / (2^n/(c-1))`
-///   - `sub( (sext(V)*c)s>>n, 0)  =>  V s/ (2^n/(c-1))`
+///   - `sub( (zext(V)*c), d) >> e   =>  V / (2^n/(c-1)) where n = d*8 + e`
+///   - `sub( (sext(V)*c), d) s>> e =>  V s/ (2^n/(c-1)) where n = d*8 + e`
 void RuleDivOpt::getOpList(vector<uint4> &oplist) const

 {
@ -7811,7 +7808,7 @@ int4 RuleDivOpt::applyOp(PcodeOp *op,Funcdata &data)

 {
  int4 n,xsize;
-  uintb y;
+  uint8 y[2];
  OpCode extOpc;
  Varnode *inVn = findForm(op,n,y,xsize,extOpc);
  if (inVn == (Varnode *)0) return 0;
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.hh
@ -1264,7 +1264,7 @@ public:
 };

 class RuleDivOpt : public Rule {
-  static uintb calcDivisor(uintb n,uint8 y,int4 xsize);		///< Calculate the divisor
+  static uintb calcDivisor(uintb n,uint8 *y,int4 xsize);		///< Calculate the divisor
  static void moveSignBitExtraction(Varnode *firstVn,Varnode *replaceVn,Funcdata &data);
  static bool checkFormOverlap(PcodeOp *op);	///< If form rooted at given PcodeOp is superseded by an overlapping form
 public:
@ -1275,7 +1275,7 @@ public:
  }
  virtual void getOpList(vector<uint4> &oplist) const;
  virtual int4 applyOp(PcodeOp *op,Funcdata &data);
-  static Varnode *findForm(PcodeOp *op,int4 &n,uintb &y,int4 &xsize,OpCode &extopc);
+  static Varnode *findForm(PcodeOp *op,int4 &n,uint8 *y,int4 &xsize,OpCode &extopc);
 };

 class RuleSignDiv2 : public Rule {
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc
@ -3706,6 +3706,7 @@ static void findSlaSpecs(vector<string> &res, const string &dir, const string &s
 /// \param enforceLocalKeyWord is \b true to force all local variable definitions to use the \b local keyword
 /// \param largeTemporaryWarning is \b true for individual warnings about temporary varnodes that are too large
 /// \param caseSensitiveRegisterNames is \b true if register names are allowed to be case sensitive
+/// \param debugOutput is \b true if the output file is written using the debug (XML) form of the .sla format
 void SleighCompile::setAllOptions(const map<string,string> &defines, bool unnecessaryPcodeWarning,
 				  bool lenientConflict, bool allCollisionWarning,
 				  bool allNopWarning,bool deadTempWarning,bool enforceLocalKeyWord,
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.cc
@ -780,37 +780,55 @@ void Varnode::printRawHeritage(ostream &s,int4 depth) const
    s << endl;
 }

-/// If \b this is a constant, or is extended (INT_ZEXT,INT_SEXT) from a constant,
-/// the \e value of the constant is passed back and a non-negative integer is returned, either:
-///   - 0 for a normal constant Varnode
-///   - 1 for a zero extension (INT_ZEXT) of a normal constant
-///   - 2 for a sign extension (INT_SEXT) of a normal constant
-/// \param val is a reference to the constant value that is passed back
-/// \return the extension code (or -1 if \b this cannot be interpreted as a constant)
-int4 Varnode::isConstantExtended(uintb &val) const
+/// If \b this is a constant, or is extended (INT_ZEXT,INT_SEXT,PIECE) from a constant,
+/// the \e value of the constant (currently up to 128 bits) is passed back and \b true is returned.
+/// \param val will hold the 128-bit constant value
+/// \return \b true if a constant was recovered
+bool Varnode::isConstantExtended(uint8 *val) const

 {
  if (isConstant()) {
-    val = getOffset();
-    return 0;
+    val[0] = getOffset();
+    val[1] = 0;
+    return true;
  }
-  if (!isWritten()) return -1;
+  if (!isWritten() || size <= 8) return false;
+  if (size > 16) return false;		// Currently only up to 128-bit values
  OpCode opc = def->code();
  if (opc == CPUI_INT_ZEXT) {
    Varnode *vn0 = def->getIn(0);
    if (vn0->isConstant()) {
-      val = vn0->getOffset();
-      return 1;
+      val[0] = vn0->getOffset();
+      val[1] = 0;
+      return true;
    }
  }
  else if (opc == CPUI_INT_SEXT) {
    Varnode *vn0 = def->getIn(0);
    if (vn0->isConstant()) {
-      val = vn0->getOffset();
-      return 2;
+      val[0] = vn0->getOffset();
+      if (vn0->getSize() < 8)
+	val[0] = sign_extend(val[0], vn0->getSize(), size);
+      val[1] = (signbit_negative(val[0], 8)) ? 0xffffffffffffffffL : 0;
+      return true;
    }
  }
-  return -1;
+  else if (opc == CPUI_PIECE) {
+    Varnode *vnlo = def->getIn(1);
+    if (vnlo->isConstant()) {
+      val[0] = vnlo->getOffset();
+      Varnode *vnhi = def->getIn(0);
+      if (vnhi->isConstant()) {
+	val[1] = vnhi->getOffset();
+	if (vnlo->getSize() == 8)
+	  return true;
+	val[0] |= val[1] << 8*vnlo->getSize();
+	val[1] >>= 8*(8-vnlo->getSize());
+	return true;
+      }
+    }
+  }
+  return false;
 }

 /// Make an initial determination of the Datatype of this Varnode. If a Datatype is already
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/varnode.hh
@ -290,7 +290,7 @@ public:
    return (loc.getOffset() == val);
  }

-  int4 isConstantExtended(uintb &val) const; ///< Is \b this an (extended) constant
+  bool isConstantExtended(uint8 *val) const; ///< Is \b this an (extended) constant
  /// Return \b true if this Varnode is linked into the SSA tree
  bool isHeritageKnown(void) const { return ((flags&(Varnode::insert|Varnode::constant|Varnode::annotation))!=0); }
  bool isTypeLock(void) const { return ((flags&Varnode::typelock)!=0); } ///< Does \b this have a locked Datatype?