Initial commit of new StringManager

2025-10-04 10:19:23 +02:00 · 2020-04-17 18:37:40 -04:00 · 2020-04-17 18:37:40 -04:00 · 0eb48e441f
commit 0eb48e441f
parent 6a15520aa5
19 changed files with 740 additions and 236 deletions
--- a/Ghidra/Features/Decompiler/build.gradle
+++ b/Ghidra/Features/Decompiler/build.gradle
@ -271,6 +271,7 @@ model {
 		                include "database.cc"
 		                include "cpool.cc"
 		                include "comment.cc"
+						include "stringmanage.cc"
 		                include "fspec.cc"
 		                include "action.cc"
 		                include "loadimage.cc"
@ -321,6 +322,7 @@ model {
 		                include "cpool_ghidra.cc"
 		                include "ghidra_process.cc"
 		                include "comment_ghidra.cc"
+						include "string_ghidra.cc"
 		         //       include "callgraph.cc"			// uncomment for debug
 		         //       include "ifacedecomp.cc"		// uncomment for debug
 		         //       include "ifaceterm.cc"			// uncomment for debug
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile
@ -75,7 +75,7 @@ EXTERNAL_CONSOLEEXT_NAMES=$(subst .cc,,$(notdir $(EXTERNAL_CONSOLEEXT_SOURCE)))
 CORE=	xml space float address pcoderaw translate opcodes globalcontext
 # Additional core files for any projects that decompile
 DECCORE=capability architecture options graph cover block cast typeop database cpool \
-	comment fspec action loadimage grammar varnode op \
+	comment stringmanage fspec action loadimage grammar varnode op \
 	type variable varmap jumptable emulate emulateutil flow userop \
 	funcdata funcdata_block funcdata_op funcdata_varnode pcodeinject \
 	heritage prefersplit rangeutil ruleaction subflow blockaction merge double \
@ -87,7 +87,7 @@ SLEIGH=	sleigh pcodeparse pcodecompile sleighbase slghsymbol \
 # Additional files for the GHIDRA specific build
 GHIDRA=	ghidra_arch inject_ghidra ghidra_translate loadimage_ghidra \
 	typegrp_ghidra database_ghidra ghidra_context cpool_ghidra \
-	ghidra_process comment_ghidra $(GHIDRAEXT_NAMES)
+	ghidra_process comment_ghidra string_ghidra $(GHIDRAEXT_NAMES)
 # Additional files specific to the sleigh compiler
 SLACOMP=slgh_compile slghparse slghscan
 # Additional special files that should not be considered part of the library
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.cc
@ -100,6 +100,7 @@ Architecture::Architecture(void)
  loader = (LoadImage *)0;
  pcodeinjectlib = (PcodeInjectLibrary *)0;
  commentdb = (CommentDatabase *)0;
+  stringManager = (StringManager *)0;
  cpool = (ConstantPool *)0;
  symboltab = new Database(this);
  context = (ContextDatabase *)0;
@ -152,6 +153,8 @@ Architecture::~Architecture(void)
    delete pcodeinjectlib;
  if (commentdb != (CommentDatabase *)0)
    delete commentdb;
+  if (stringManager != (StringManager *)0)
+    delete stringManager;
  if (cpool != (ConstantPool *)0)
    delete cpool;
  if (context != (ContextDatabase *)0)
@ -268,6 +271,7 @@ void Architecture::clearAnalysis(Funcdata *fd)
  fd->clear();			// Clear stuff internal to function
  // Clear out any analysis generated comments
  commentdb->clearType(fd->getAddress(),Comment::warning|Comment::warningheader);
+  stringManager->clear();
 }

 /// Symbols do not necessarily need to be available for the decompiler.
@ -405,6 +409,7 @@ void Architecture::saveXml(ostream &s) const
  symboltab->saveXml(s);
  context->saveXml(s);
  commentdb->saveXml(s);
+  stringManager->saveXml(s);
  if (!cpool->empty())
    cpool->saveXml(s);
  s << "</save_state>\n";
@ -437,6 +442,8 @@ void Architecture::restoreXml(DocumentStorage &store)
      context->restoreXml(subel,this);
    else if (subel->getName() == "commentdb")
      commentdb->restoreXml(subel,this);
+    else if (subel->getName() == "stringmanage")
+      stringManager->restoreXml(subel,this);
    else if (subel->getName() == "constantpool")
      cpool->restoreXml(subel,*types);
    else if (subel->getName() == "optionslist")
@ -575,6 +582,14 @@ void Architecture::buildCommentDB(DocumentStorage &store)
  commentdb = new CommentDatabaseInternal();
 }

+/// Build container that holds decoded strings
+/// \param store may hold configuration information
+void Architecture::buildStringManager(DocumentStorage &store)
+
+{
+  stringManager = new StringManagerUnicode(this,2048);
+}
+
 /// Some processor models (Java byte-code) need a database of constants.
 /// The database is always built, but may remain empty.
 /// \param store may hold configuration information
@ -1237,6 +1252,7 @@ void Architecture::init(DocumentStorage &store)
  buildContext(store);
  buildTypegrp(store);
  buildCommentDB(store);
+  buildStringManager(store);
  buildConstantPool(store);

  restoreFromSpec(store);
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/architecture.hh
@ -28,6 +28,7 @@
 #include "loadimage.hh"
 #include "globalcontext.hh"
 #include "comment.hh"
+#include "stringmanage.hh"
 #include "userop.hh"
 #include "options.hh"
 #include "transform.hh"
@ -147,6 +148,7 @@ public:
  PcodeInjectLibrary *pcodeinjectlib;	///< Pcode injection manager
  RangeList nohighptr;          ///< Ranges for which high-level pointers are not possible
  CommentDatabase *commentdb;	///< Comments for this architecture
+  StringManager *stringManager;	///< Manager of decoded strings
  ConstantPool *cpool;		///< Deferred constant values
  PrintLanguage *print;	        ///< Current high-level language printer
  vector<PrintLanguage *> printlist;	///< List of high-level language printers supported
@ -227,6 +229,7 @@ protected:

  virtual void buildTypegrp(DocumentStorage &store);		///< Build the data-type factory/container
  virtual void buildCommentDB(DocumentStorage &store);		///< Build the comment database
+  virtual void buildStringManager(DocumentStorage &store);	///< Build the string manager
  virtual void buildConstantPool(DocumentStorage &store);	///< Build the constant pool
  virtual void buildInstructions(DocumentStorage &store);	///< Register the p-code operations
  virtual void buildAction(DocumentStorage &store);		///< Build the Action framework
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/ghidra_arch.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ghidra_arch.cc
@ -615,6 +615,52 @@ void ArchitectureGhidra::getBytes(uint1 *buf,int4 size,const Address &inaddr)
  readResponseEnd(sin);
 }

+uint4 ArchitectureGhidra::getStringData(uint1 *buf,const Address &addr,Datatype *ct,int4 maxBytes)
+
+{
+  sout.write("\000\000\001\004",4);
+  writeStringStream(sout,"getString");
+  sout.write("\000\000\001\016",4); // Beginning of string header
+  addr.saveXml(sout,maxBytes);
+  sout.write("\000\000\001\017",4);
+  writeStringStream(sout,ct->getName());
+  sout.write("\000\000\001\016",4); // Beginning of string header
+  sout << dec << (int8)ct->getId();	// Pass as a signed integer
+  sout.write("\000\000\001\017",4);
+
+  sout.write("\000\000\001\005",4);
+  sout.flush();
+
+  readToResponse(sin);
+  int4 type = readToAnyBurst(sin);
+  uint4 size = 0;
+  if (type == 12) {
+    int4 c = sin.get();
+    size ^= (c-0x20);
+    c = sin.get();
+    size ^= ((c-0x20)<<6);
+    uint1 *dblbuf = new uint1[size * 2];
+    sin.read((char *)dblbuf,size*2);
+    for (int4 i=0; i < size; i++) {
+      buf[i] = ((dblbuf[i*2]-'A') << 4) | (dblbuf[i*2 + 1]-'A');
+    }
+    delete [] dblbuf;
+  }
+  else if ((type&1)==1) {
+    ostringstream errmsg;
+    errmsg << "GHIDRA has no string in the loadimage at " << addr.getShortcut();
+    addr.printRaw(errmsg);
+    throw DataUnavailError(errmsg.str());
+  }
+  else
+    throw JavaError("alignment","Expecting bytes or end of query response");
+  type = readToAnyBurst(sin);
+  if (type != 13)
+    throw JavaError("alignment","Expecting byte alignment end");
+  readResponseEnd(sin);
+  return size;
+}
+
 /// \brief Retrieve p-code to inject for a specific context
 ///
 /// The particular injection is named and is of one of the types:
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/ghidra_arch.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ghidra_arch.hh
@ -124,6 +124,7 @@ public:

  bool getSendParamMeasures(void) const { return sendParamMeasures; }	///< Get the current setting for emitting parameter info

+  virtual uint4 getStringData(uint1 *buf,const Address &addr,Datatype *ct,int4 maxBytes);
  virtual void printMessage(const string &message) const;

  static void segvHandler(int4 sig);				///< Handler for a segment violation (SIGSEGV) signal
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/ghidra_process.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ghidra_process.cc
@ -245,6 +245,7 @@ void FlushNative::rawAction(void)
  ghidra->symboltab->deleteSubScopes(globscope); // Flush cached function and globals database
  ghidra->types->clearNoncore(); // Reset type information
  ghidra->commentdb->clear();	// Clear any comments
+  ghidra->stringManager->clear();	// Clear string decodings
  ghidra->cpool->clear();
  res = 0;
 }
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc
@ -1164,7 +1164,7 @@ void PrintC::printUnicode(ostream &s,int4 onechar) const
      s << "\\x" << setfill('0') << setw(8) << hex << onechar;
    return;
  }
-  writeUtf8(s, onechar);		// emit normally
+  StringManager::writeUtf8(s, onechar);		// emit normally
 }

 void PrintC::pushType(const Datatype *ct)
@ -1204,32 +1204,6 @@ bool PrintC::doEmitWideCharPrefix(void) const
  return true;
 }

-/// \brief Check if the byte buffer has a (unicode) string terminator
-///
-/// \param buffer is the byte buffer
-/// \param size is the number of bytes in the buffer
-/// \param charsize is the presumed size (in bytes) of character elements
-/// \return \b true if a string terminator is found
-bool PrintC::hasCharTerminator(uint1 *buffer,int4 size,int4 charsize)
-
-{
-  for(int4 i=0;i<size;i+=charsize) {
-    bool isTerminator = true;
-    for(int4 j=0;j<charsize;++j) {
-      if (buffer[i+j] != 0) {	// Non-zero bytes means character can't be a null terminator
-	isTerminator = false;
-	break;
-      }
-    }
-    if (isTerminator) return true;
-  }
-  return false;
-}
-
-#define STR_LITERAL_BUFFER_MAXSIZE 2048
-#define STR_LITERAL_BUFFER_INCREMENT 32
-
-
 /// \brief Print a quoted (unicode) string at the given address.
 ///
 /// Data for the string is obtained directly from the LoadImage.  The bytes are checked
@ -1237,43 +1211,27 @@ bool PrintC::hasCharTerminator(uint1 *buffer,int4 size,int4 charsize)
 /// pass, the string is emitted.
 /// \param s is the output stream to print to
 /// \param addr is the address of the string data within the LoadImage
-/// \param charsize is the number of bytes in an encoded element (i.e. UTF8, UTF16, or UTF32)
+/// \param charType is the underlying character data-type
 /// \return \b true if a proper string was found and printed to the stream
-bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const
+bool PrintC::printCharacterConstant(ostream &s,const Address &addr,Datatype *charType) const

 {
-  uint1 buffer[STR_LITERAL_BUFFER_MAXSIZE+4]; // Additional buffer for get_codepoint skip readahead
-  int4 curBufferSize = 0;
-  bool foundTerminator = false;
+  const uint1 *buffer;
+  StringManager *manager = glb->stringManager;
  try {
-    do {
-      uint4 newBufferSize = curBufferSize + STR_LITERAL_BUFFER_INCREMENT;
-      glb->loader->loadFill(buffer+curBufferSize,STR_LITERAL_BUFFER_INCREMENT,addr + curBufferSize);
-      foundTerminator = hasCharTerminator(buffer+curBufferSize,STR_LITERAL_BUFFER_INCREMENT,charsize);
-      curBufferSize = newBufferSize;
-    } while ((curBufferSize < STR_LITERAL_BUFFER_MAXSIZE)&&(!foundTerminator));
+    buffer = manager->getStringData(addr, charType);
  } catch(DataUnavailError &err) {
    return false;
  }
-  buffer[curBufferSize] = 0;		// Make sure bytes for final codepoint read are initialized
-  buffer[curBufferSize+1] = 0;
-  buffer[curBufferSize+2] = 0;
-  buffer[curBufferSize+3] = 0;
-  bool bigend = glb->translate->isBigEndian();
-  bool res;
-  if (isCharacterConstant(buffer,curBufferSize,charsize)) {
-    if (doEmitWideCharPrefix() && charsize > 1)
+  if (doEmitWideCharPrefix() && charType->getSize() > 1)
    s << 'L';			// Print symbol indicating wide character
  s << '"';
-    if (!escapeCharacterData(s,buffer,curBufferSize,charsize,bigend))
+  if (!escapeCharacterData(s,buffer,manager->getMaximumBytes(),charType->getSize(),glb->translate->isBigEndian()))
    s << "...\" /* TRUNCATED STRING LITERAL */";
-    else s << '"';
-     
-    res = true;
-  }
  else
-    res = false;
-  return res;
+    s << '"';
+
+  return true;
 }

 void PrintC::resetDefaultsPrintC(void)
@ -1373,7 +1331,7 @@ bool PrintC::pushPtrCharConstant(uintb val,const TypePointer *ct,const Varnode *

  ostringstream str;
  Datatype *subct = ct->getPtrTo();
-  if (!printCharacterConstant(str,stringaddr,subct->getSize()))
+  if (!printCharacterConstant(str,stringaddr,subct))
    return false;		// Can we get a nice ASCII string

  pushAtom(Atom(str.str(),vartoken,EmitXml::const_color,op,vn));
@ -1577,7 +1535,7 @@ void PrintC::pushSymbol(const Symbol *sym,const Varnode *vn,const PcodeOp *op)
      SymbolEntry *entry = sym->getFirstWholeMap();
      if (entry != (SymbolEntry *)0) {
 	ostringstream s;
-	if (printCharacterConstant(s,entry->getAddr(),subct->getSize())) {
+	if (printCharacterConstant(s,entry->getAddr(),subct)) {
 	  pushAtom(Atom(s.str(),vartoken,EmitXml::const_color,op,vn));
 	  return;
 	}
@ -1963,25 +1921,6 @@ void PrintC::setCommentStyle(const string &nm)
    throw LowlevelError("Unknown comment style. Use \"c\" or \"cplusplus\"");
 }

-bool PrintC::isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const
-
-{
-  // Return true if this looks like a c-string
-  // If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
-  // per character.  For UTF16, the surrogate reserved area gives at least some check.
-  if (buf == (const uint1 *)0) return false;
-  bool bigend = glb->translate->isBigEndian();
-  int4 i=0;
-  int4 skip = charsize;
-  while(i<size) {
-    int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
-    if (codepoint < 0) return false;
-    if (codepoint == 0) break;
-    i += skip;
-  }
-  return true;
-}
-
 /// \brief Emit the definition of the given data-type
 ///
 /// This is currently limited to a 'struct' or 'enum' definitions. The
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh
@ -157,8 +157,7 @@ protected:
  void opFunc(const PcodeOp *op);			///< Push a \e functional expression based on the given p-code op to the RPN stack
  void opTypeCast(const PcodeOp *op);			///< Push the given p-code op using type-cast syntax to the RPN stack
  void opHiddenFunc(const PcodeOp *op);			///< Push the given p-code op as a hidden token
-  static bool hasCharTerminator(uint1 *buffer,int4 size,int4 charsize);
-  bool printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const;
+  bool printCharacterConstant(ostream &s,const Address &addr,Datatype *charType) const;
  void resetDefaultsPrintC(void);			///< Set default values for options specific to PrintC
  virtual void pushConstant(uintb val,const Datatype *ct,
 			    const Varnode *vn,const PcodeOp *op);
@ -204,7 +203,6 @@ public:
  virtual void resetDefaults(void);
  virtual void adjustTypeOperators(void);
  virtual void setCommentStyle(const string &nm);
-  virtual bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const;
  virtual void docTypeDefinitions(const TypeFactory *typegrp);
  virtual void docAllGlobals(void);
  virtual void docSingleGlobal(const Symbol *sym);
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/printjava.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printjava.cc
@ -190,7 +190,7 @@ void PrintJava::printUnicode(ostream &s,int4 onechar) const
      s << "\\ux" << setfill('0') << setw(8) << hex << onechar;
    return;
  }
-  writeUtf8(s, onechar);		// Emit normally
+  StringManager::writeUtf8(s, onechar);		// Emit normally
 }

 void PrintJava::opLoad(const PcodeOp *op)
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc
@ -478,136 +478,6 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint)
  return false;
 }

-/// Encode the given unicode codepoint as UTF8 (1, 2, 3, or 4 bytes) and
-/// write the bytes to the stream.
-/// \param s is the output stream
-/// \param codepoint is the unicode codepoint
-void PrintLanguage::writeUtf8(ostream &s,int4 codepoint)
-
-{
-  uint1 bytes[4];
-  int4 size;
-
-  if (codepoint < 0)
-    throw LowlevelError("Negative unicode codepoint");
-  if (codepoint < 128) {
-    s.put((uint1)codepoint);
-    return;
-  }
-  int4 bits = mostsigbit_set(codepoint) + 1;
-  if (bits > 21)
-    throw LowlevelError("Bad unicode codepoint");
-  if (bits < 12) {	// Encode with two bytes
-    bytes[0] = 0xc0 ^ ((codepoint >> 6)&0x1f);
-    bytes[1] = 0x80 ^ (codepoint & 0x3f);
-    size = 2;
-  }
-  else if (bits < 17) {
-    bytes[0] = 0xe0 ^ ((codepoint >> 12)&0xf);
-    bytes[1] = 0x80 ^ ((codepoint >> 6)&0x3f);
-    bytes[2] = 0x80 ^ (codepoint & 0x3f);
-    size = 3;
-  }
-  else {
-    bytes[0] = 0xf0 ^ ((codepoint >> 18) & 7);
-    bytes[1] = 0x80 ^ ((codepoint >> 12) & 0x3f);
-    bytes[2] = 0x80 ^ ((codepoint >> 6) & 0x3f);
-    bytes[3] = 0x80 ^ (codepoint & 0x3f);
-    size = 4;
-  }
-  s.write((char *)bytes, size);
-}
-
-/// Pull the first two bytes from the byte array and combine them in the indicated endian order
-/// \param buf is the byte array
-/// \param bigend is \b true to request big endian encoding
-/// \return the decoded UTF16 element
-inline int4 PrintLanguage::readUtf16(const uint1 *buf,bool bigend)
-
-{
-  int4 codepoint;
-  if (bigend) {
-    codepoint = buf[0];
-    codepoint <<= 8;
-    codepoint += buf[1];
-  }
-  else {
-    codepoint = buf[1];
-    codepoint <<= 8;
-    codepoint += buf[0];
-  }
-  return codepoint;
-}
-
-/// \brief Extract the next \e unicode \e codepoint from an array of character data
-///
-/// One or more bytes is consumed from the array, and the number of bytes used is passed back.
-/// \param buf is a pointer to the bytes in the character array
-/// \param charsize is 1 for UTF8, 2 for UTF16, or 4 for UTF32
-/// \param bigend is \b true for big endian encoding of the UTF element
-/// \param skip is a reference for passing back the number of bytes consumed
-/// \return the codepoint or -1 if the encoding is invalid
-int4 PrintLanguage::getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip)
-
-{
-  int4 codepoint;
-  int4 sk = 0;
-  if (charsize==2) {		// UTF-16
-    codepoint = readUtf16(buf,bigend);
-    sk += 2;
-    if ((codepoint>=0xD800)&&(codepoint<=0xDBFF)) { // high surrogate
-      int4 trail=readUtf16(buf+2,bigend);
-      sk += 2;
-      if ((trail<0xDC00)||(trail>0xDFFF)) return -1; // Bad trail
-      codepoint = (codepoint<<10) + trail + (0x10000 - (0xD800 << 10) - 0xDC00);
-    }
-    else if ((codepoint>=0xDC00)&&(codepoint<=0xDFFF)) return -1; // trail before high
-  }
-  else if (charsize==1) {	// UTF-8
-    int4 val = buf[0];
-    if ((val&0x80)==0) {
-      codepoint = val;
-      sk = 1;
-    }
-    else if ((val&0xe0)==0xc0) {
-      int4 val2 = buf[1];
-      sk = 2;
-      if ((val2&0xc0)!=0x80) return -1; // Not a valid UTF8-encoding
-      codepoint = ((val&0x1f)<<6) | (val2 & 0x3f);
-    }
-    else if ((val&0xf0)==0xe0) {
-      int4 val2 = buf[1];
-      int4 val3 = buf[2];
-      sk = 3;
-      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)) return -1; // invalid encoding
-      codepoint = ((val&0xf)<<12) | ((val2&0x3f)<<6) | (val3 & 0x3f);
-    }
-    else if ((val&0xf8)==0xf0) {
-      int4 val2 = buf[1];
-      int4 val3 = buf[2];
-      int4 val4 = buf[3];
-      sk = 4;
-      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)||((val4&0xc0)!=0x80)) return -1;	// invalid encoding
-      codepoint = ((val&7)<<18) | ((val2&0x3f)<<12) | ((val3&0x3f)<<6) | (val4 & 0x3f);
-    }
-    else
-      return -1;
-  }
-  else if (charsize == 4) {	// UTF-32
-    sk = 4;
-    if (bigend)
-      codepoint = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
-    else
-      codepoint = (buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0];
-  }
-  else
-    return -1;
-  if (codepoint >= 0xd800 && codepoint <= 0xdfff)
-    return -1;		// Reserved for surrogates, invalid codepoints
-  skip = sk;
-  return codepoint;
-}
-
 /// \brief Emit a byte buffer to the stream as unicode characters.
 ///
 /// Characters are emitted until we reach a terminator character or \b count bytes is consumed.
@ -624,7 +494,7 @@ bool PrintLanguage::escapeCharacterData(ostream &s,const uint1 *buf,int4 count,i
  int4 skip = charsize;
  int4 codepoint = 0;
  while(i<count) {
-    codepoint = getCodepoint(buf+i,charsize,bigend,skip);
+    codepoint = StringManager::getCodepoint(buf+i,charsize,bigend,skip);
    if (codepoint == 0 || codepoint == -1) break;
    printUnicode(s,codepoint);
    i += skip;
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh
@ -267,9 +267,6 @@ protected:
  void emitOp(const ReversePolish &entry);				///< Send an operator token from the RPN to the emitter
  void emitAtom(const Atom &atom);					///< Send an variable token from the RPN to the emitter
  static bool unicodeNeedsEscape(int4 codepoint);			///< Determine if the given codepoint needs to be escaped
-  static void writeUtf8(ostream &s,int4 codepoint);			///< Write unicode character to stream in UTF8 encoding
-  static int4 readUtf16(const uint1 *buf,bool bigend);			///< Read a 2-byte UTF16 element from a byte array
-  static int4 getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip);
  bool escapeCharacterData(ostream &s,const uint1 *buf,int4 count,int4 charsize,bool bigend) const;
  void recurse(void);							///< Emit from the RPN stack as much as possible
  void opBinary(const OpToken *tok,const PcodeOp *op);			///< Push a binary operator onto the RPN stack
@ -434,14 +431,6 @@ public:
  /// \param nm is the configuration description
  virtual void setCommentStyle(const string &nm)=0;

-  /// \brief Decide is the given byte array looks like a character string
-  ///
-  /// This looks for encodings and/or a terminator that is appropriate for the high-level language
-  /// \param buf is a pointer to the byte array
-  /// \param size is the number of bytes in the array
-  /// \param charsize is the size in bytes of the encoding element (i.e. UTF8, UTF16, etc.) to assume
-  virtual bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const=0;
-
  /// \brief Emit definitions of data-types
  ///
  /// \param typegrp is the container for the data-types that should be defined
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc
@ -6409,14 +6409,8 @@ int4 RulePtrsubCharConstant::applyOp(PcodeOp *op,Funcdata &data)
  if (!scope->isReadOnly(symaddr,1,op->getAddr()))
    return 0;
  // Check if data at the address looks like a string
-  uint1 buffer[128];
-  try {
-    data.getArch()->loader->loadFill(buffer,128,symaddr);
-  } catch(DataUnavailError &err) {
+  if (!data.getArch()->stringManager->isString(symaddr, basetype))
    return 0;
-  }
-  bool isstring = data.getArch()->print->isCharacterConstant(buffer,128,basetype->getSize());
-  if (!isstring) return 0;

  // If we reach here, the PTRSUB should be converted to a (COPY of a) pointer constant.
  bool removeCopy = false;
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/string_ghidra.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/string_ghidra.cc
@ -0,0 +1,41 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ghidra_string.hh"
+
+GhidraStringManager::GhidraStringManager(ArchitectureGhidra *g,int4 max)
+  : StringManager(max)
+{
+  glb = g;
+  testBuffer = new uint1[max];
+}
+
+GhidraStringManager::~GhidraStringManager(void)
+
+{
+  delete [] testBuffer;
+}
+
+const uint1 *GhidraStringManager::getStringData(const Address &addr,Datatype *charType)
+
+{
+  map<Address,const uint1 *>::iterator iter;
+  iter = stringMap.find(addr);
+  if (iter != stringMap.end())
+    return (*iter).second;
+
+  int4 size = glb->getStringData(testBuffer, addr, charType, maximumBytes);
+  return mapBuffer(addr, testBuffer, size);
+}
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/string_ghidra.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/string_ghidra.hh
@ -0,0 +1,39 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/// \file ghidra_string.hh
+/// \brief Implementation of the StringManager through the ghidra client
+
+#ifndef __STRING_GHIDRA__
+#define __STRING_GHIDRA__
+
+#include "ghidra_arch.hh"
+
+/// \brief Implementation of the StringManager that queries through the ghidra client
+///
+/// This acts as a front end to Ghidra's string formats and encodings.
+/// The client translates any type of string into a UTF8 representation, and this
+/// class stores it for final presentation.  Escaping the UTF8 string is left up
+/// to the PrintLanguage.
+class GhidraStringManager : public StringManager {
+  ArchitectureGhidra *glb;		///< The ghidra client interface
+  uint1 *testBuffer;			///< Temporary storage for storing bytes from client
+public:
+  GhidraStringManager(ArchitectureGhidra *g,int4 max);	///< Constructor
+  virtual ~GhidraStringManager(void);
+  virtual const uint1 *getStringData(const Address &addr,Datatype *charType);
+};
+
+#endif
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.cc
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.cc
@ -0,0 +1,391 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "stringmanage.hh"
+#include "architecture.hh"
+
+/// Before calling, we must check that there is no other buffer stored at the address.
+/// \param addr is the Address to store the buffer at
+/// \param buf is the buffer to be copied into storage
+/// \param size is the number of bytes in the buffer
+/// \return the new permanent copy of the buffer
+const uint1 *StringManager::mapBuffer(const Address &addr,const uint1 *buf,int4 size)
+
+{
+  uint1 *storeBuf = new uint1[size + 1];
+  stringMap[addr] = storeBuf;
+  memcpy(storeBuf,buf,size);
+  storeBuf[size] = 0;
+  return storeBuf;
+}
+
+/// \param max is the maximum number of bytes to allow in a decoded string
+StringManager::StringManager(int4 max)
+
+{
+  maximumBytes = max;
+}
+
+StringManager::~StringManager(void)
+
+{
+  clear();
+}
+
+void StringManager::clear(void)
+
+{
+  map<Address,const uint1 *>::iterator iter;
+
+  for(iter=stringMap.begin();iter!=stringMap.end();++iter) {
+    delete [] (*iter).second;
+  }
+}
+
+/// Encode the given unicode codepoint as UTF8 (1, 2, 3, or 4 bytes) and
+/// write the bytes to the stream.
+/// \param s is the output stream
+/// \param codepoint is the unicode codepoint
+void StringManager::writeUtf8(ostream &s,int4 codepoint)
+
+{
+  uint1 bytes[4];
+  int4 size;
+
+  if (codepoint < 0)
+    throw LowlevelError("Negative unicode codepoint");
+  if (codepoint < 128) {
+    s.put((uint1)codepoint);
+    return;
+  }
+  int4 bits = mostsigbit_set(codepoint) + 1;
+  if (bits > 21)
+    throw LowlevelError("Bad unicode codepoint");
+  if (bits < 12) {	// Encode with two bytes
+    bytes[0] = 0xc0 ^ ((codepoint >> 6)&0x1f);
+    bytes[1] = 0x80 ^ (codepoint & 0x3f);
+    size = 2;
+  }
+  else if (bits < 17) {
+    bytes[0] = 0xe0 ^ ((codepoint >> 12)&0xf);
+    bytes[1] = 0x80 ^ ((codepoint >> 6)&0x3f);
+    bytes[2] = 0x80 ^ (codepoint & 0x3f);
+    size = 3;
+  }
+  else {
+    bytes[0] = 0xf0 ^ ((codepoint >> 18) & 7);
+    bytes[1] = 0x80 ^ ((codepoint >> 12) & 0x3f);
+    bytes[2] = 0x80 ^ ((codepoint >> 6) & 0x3f);
+    bytes[3] = 0x80 ^ (codepoint & 0x3f);
+    size = 4;
+  }
+  s.write((char *)bytes, size);
+}
+
+/// Returns \b true if the data is some kind of complete string.
+/// A given character data-type can be used as a hint for the encoding.
+/// The string decoding can be cached internally.
+/// \param addr is the given address
+/// \param charType is the given character data-type
+/// \return \b true if the address represents string data
+bool StringManager::isString(const Address &addr,Datatype *charType)
+
+{
+  const uint1 *buffer = (const uint1 *)0;
+  try {
+    buffer = getStringData(addr,charType);
+  }
+  catch(DataUnavailError &err) {
+    return false;
+  }
+  return (buffer != (const uint1 *)0);
+}
+
+/// Write \<stringmanage> tag, with \<string> sub-tags.
+/// \param s is the stream to write to
+void StringManager::saveXml(ostream &s) const
+
+{
+  s << "<stringmanage>\n";
+
+  map<Address,const uint1 *>::const_iterator iter1;
+  for(iter1=stringMap.begin();iter1!=stringMap.end();++iter1) {
+    s << "<string>\n";
+    (*iter1).first.saveXml(s);
+    const uint1 *buf = (*iter1).second;
+    s << " <bytes>\n" << setfill('0');
+    for(int4 i=0;;++i) {
+      if (buf[i] == 0) break;
+      s << hex << setw(2) << (int4)buf[i];
+      if (i%20 == 19)
+	s << "\n  ";
+    }
+    s << "\n </bytes>\n";
+  }
+  s << "</stringmanage>\n";
+}
+
+/// Read \<stringmanage> tag, with \<string> sub-tags.
+/// \param el is the root tag element
+/// \param m is the manager for looking up AddressSpaces
+void StringManager::restoreXml(const Element *el,const AddrSpaceManager *m)
+
+{
+  const List &list(el->getChildren());
+  List::const_iterator iter;
+  iter = list.begin();
+  Address addr = Address::restoreXml(*iter, m);
+  ++iter;
+  vector<uint1> vec;
+  istringstream is((*iter)->getContent());
+  int4 val;
+  char c1, c2;
+  is >> ws;
+  c1 = is.get();
+  c2 = is.get();
+  while ((c1 > 0) && (c2 > 0)) {
+    if (c1 <= '9')
+      c1 = c1 - '0';
+    else if (c1 <= 'F')
+      c1 = c1 + 10 - 'A';
+    else
+      c1 = c1 + 10 - 'a';
+    if (c2 <= '9')
+      c2 = c2 - '0';
+    else if (c2 <= 'F')
+      c2 = c2 + 10 - 'A';
+    else
+      c2 = c2 + 10 - 'a';
+    val = c1 * 16 + c2;
+    vec.push_back((uint1) val);
+    is >> ws;
+    c1 = is.get();
+    c2 = is.get();
+  }
+  mapBuffer(addr,vec.data(),vec.size());
+}
+
+/// \param buffer is the byte buffer
+/// \param size is the number of bytes in the buffer
+/// \param charsize is the presumed size (in bytes) of character elements
+/// \return \b true if a string terminator is found
+bool StringManager::hasCharTerminator(const uint1 *buffer,int4 size,int4 charsize)
+
+{
+  for(int4 i=0;i<size;i+=charsize) {
+    bool isTerminator = true;
+    for(int4 j=0;j<charsize;++j) {
+      if (buffer[i+j] != 0) {	// Non-zero bytes means character can't be a null terminator
+	isTerminator = false;
+	break;
+      }
+    }
+    if (isTerminator) return true;
+  }
+  return false;
+}
+
+/// Pull the first two bytes from the byte array and combine them in the indicated endian order
+/// \param buf is the byte array
+/// \param bigend is \b true to request big endian encoding
+/// \return the decoded UTF16 element
+inline int4 StringManager::readUtf16(const uint1 *buf,bool bigend)
+
+{
+  int4 codepoint;
+  if (bigend) {
+    codepoint = buf[0];
+    codepoint <<= 8;
+    codepoint += buf[1];
+  }
+  else {
+    codepoint = buf[1];
+    codepoint <<= 8;
+    codepoint += buf[0];
+  }
+  return codepoint;
+}
+
+/// One or more bytes is consumed from the array, and the number of bytes used is passed back.
+/// \param buf is a pointer to the bytes in the character array
+/// \param charsize is 1 for UTF8, 2 for UTF16, or 4 for UTF32
+/// \param bigend is \b true for big endian encoding of the UTF element
+/// \param skip is a reference for passing back the number of bytes consumed
+/// \return the codepoint or -1 if the encoding is invalid
+int4 StringManager::getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip)
+
+{
+  int4 codepoint;
+  int4 sk = 0;
+  if (charsize==2) {		// UTF-16
+    codepoint = readUtf16(buf,bigend);
+    sk += 2;
+    if ((codepoint>=0xD800)&&(codepoint<=0xDBFF)) { // high surrogate
+      int4 trail=readUtf16(buf+2,bigend);
+      sk += 2;
+      if ((trail<0xDC00)||(trail>0xDFFF)) return -1; // Bad trail
+      codepoint = (codepoint<<10) + trail + (0x10000 - (0xD800 << 10) - 0xDC00);
+    }
+    else if ((codepoint>=0xDC00)&&(codepoint<=0xDFFF)) return -1; // trail before high
+  }
+  else if (charsize==1) {	// UTF-8
+    int4 val = buf[0];
+    if ((val&0x80)==0) {
+      codepoint = val;
+      sk = 1;
+    }
+    else if ((val&0xe0)==0xc0) {
+      int4 val2 = buf[1];
+      sk = 2;
+      if ((val2&0xc0)!=0x80) return -1; // Not a valid UTF8-encoding
+      codepoint = ((val&0x1f)<<6) | (val2 & 0x3f);
+    }
+    else if ((val&0xf0)==0xe0) {
+      int4 val2 = buf[1];
+      int4 val3 = buf[2];
+      sk = 3;
+      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)) return -1; // invalid encoding
+      codepoint = ((val&0xf)<<12) | ((val2&0x3f)<<6) | (val3 & 0x3f);
+    }
+    else if ((val&0xf8)==0xf0) {
+      int4 val2 = buf[1];
+      int4 val3 = buf[2];
+      int4 val4 = buf[3];
+      sk = 4;
+      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)||((val4&0xc0)!=0x80)) return -1;	// invalid encoding
+      codepoint = ((val&7)<<18) | ((val2&0x3f)<<12) | ((val3&0x3f)<<6) | (val4 & 0x3f);
+    }
+    else
+      return -1;
+  }
+  else if (charsize == 4) {	// UTF-32
+    sk = 4;
+    if (bigend)
+      codepoint = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
+    else
+      codepoint = (buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0];
+  }
+  else
+    return -1;
+  if (codepoint >= 0xd800 && codepoint <= 0xdfff)
+    return -1;		// Reserved for surrogates, invalid codepoints
+  skip = sk;
+  return codepoint;
+}
+
+/// \param g is the underlying architecture (and loadimage)
+/// \param max is the maximum number of bytes to allow in a decoded string
+StringManagerUnicode::StringManagerUnicode(Architecture *g,int4 max)
+  : StringManager(max)
+{
+  glb = g;
+  testBuffer = new uint1[max];
+}
+
+StringManagerUnicode::~StringManagerUnicode(void)
+
+{
+  delete [] testBuffer;
+}
+
+const uint1 *StringManagerUnicode::getStringData(const Address &addr,Datatype *charType)
+
+{
+  map<Address,const uint1 *>::iterator iter;
+  iter = stringMap.find(addr);
+  if (iter != stringMap.end())
+    return (*iter).second;
+
+  int4 curBufferSize = 0;
+  int4 charsize = charType->getSize();
+  bool foundTerminator = false;
+
+  do {
+    int4 amount = 32;	// Grab 32 bytes of image at a time
+    uint4 newBufferSize = curBufferSize + amount;
+    if (newBufferSize > maximumBytes) {
+      newBufferSize = maximumBytes;
+      amount = newBufferSize - curBufferSize;
+      if (amount == 0) break;
+    }
+    glb->loader->loadFill(testBuffer+curBufferSize,amount,addr + curBufferSize);
+    foundTerminator = hasCharTerminator(testBuffer+curBufferSize,amount,charsize);
+    curBufferSize = newBufferSize;
+  } while (!foundTerminator);
+
+  const uint1 *resBuffer;
+  if (charsize == 1) {
+    if (!isCharacterConstant(testBuffer,curBufferSize,charsize))
+      return (const uint1 *)0;
+    resBuffer = mapBuffer(addr,testBuffer,curBufferSize);
+  }
+  else {
+    // We need to translate to UTF8
+    ostringstream s;
+    if (!writeUnicode(s, testBuffer, curBufferSize, charsize))
+      return (const uint1 *)0;
+    string resString = s.str();
+    int4 newSize = resString.size();
+    if (newSize > maximumBytes)
+      newSize = maximumBytes;
+    resBuffer = mapBuffer(addr,(const uint1 *)resString.c_str(),newSize);
+  }
+  return resBuffer;
+}
+
+/// If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
+/// per character.  For UTF16, the surrogate reserved area gives at least some check.
+/// \param buf is the byte array to check
+/// \param size is the size of the buffer in bytes
+/// \param charsize is the UTF encoding (1=UTF8, 2=UTF16, 4=UTF32)
+/// \return \b true if the buffer is filled with valid unicode
+bool StringManagerUnicode::isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const
+
+{
+  if (buf == (const uint1 *)0) return false;
+  bool bigend = glb->translate->isBigEndian();
+  int4 i=0;
+  int4 skip = charsize;
+  while(i<size) {
+    int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
+    if (codepoint < 0) return false;
+    if (codepoint == 0) break;
+    i += skip;
+  }
+  return true;
+}
+
+/// Assume the buffer contains a null terminated unicode encoded string.
+/// Write the characters out (as UTF8) to the stream.
+/// \param s is the output stream
+/// \param buffer is the given byte buffer
+/// \param size is the number of bytes in the buffer
+/// \param charsize specifies the encoding (1=UTF8 2=UTF16 4=UTF32)
+/// \return \b true if the byte array contains valid unicode
+bool StringManagerUnicode::writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize)
+
+{
+  bool bigend = glb->translate->isBigEndian();
+  int4 i=0;
+  int4 skip = charsize;
+  while(i<size) {
+    int4 codepoint = getCodepoint(buffer+i,charsize,bigend,skip);
+    if (codepoint < 0) return false;
+    if (codepoint == 0) break;		// Terminator
+    writeUtf8(s, codepoint);
+    i += skip;
+  }
+  return true;
+}
--- a/Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.hh
+++ b/Ghidra/Features/Decompiler/src/decompile/cpp/stringmanage.hh
@ -0,0 +1,80 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/// \file stringmanage.hh
+/// \brief Classes for decoding and storing string data
+
+#ifndef __STRING_MANAGE__
+#define __STRING_MANAGE__
+
+#include "type.hh"
+
+class Architecture;
+
+/// \brief Storage for decoding and storing strings associated with an address
+///
+/// Looks at data in the loadimage to determine if it represents a "string".
+/// Decodes the string for presentation in the output.
+/// Stores the decoded string until its needed for presentation.
+class StringManager {
+protected:
+  map<Address,const uint1 *> stringMap;	///< Map from address to string (in UTF8 format)
+  int4 maximumBytes;			///< Maximum bytes (in UTF8 encoding) allowed
+
+  const uint1 *mapBuffer(const Address &addr,const uint1 *buf,int4 size);	///< Move a decoded buffer into storage
+public:
+  StringManager(int4 max);		///< Constructor
+  virtual ~StringManager(void);		///< Destructor
+
+  int4 getMaximumBytes(void) const { return maximumBytes; }	///< Return the maximum bytes allowed in a string decoding
+  void clear(void);			///< Clear out any cached strings
+
+  bool isString(const Address &addr,Datatype *charType);	// Determine if data at the given address is a string
+
+  /// \brief Retrieve string data at the given address as a UTF8 byte array
+  ///
+  /// If the address does not represent string data, null is returned. Otherwise,
+  /// the string data is fetched, converted to a UTF8 encoding, cached and returned.
+  /// \param addr is the given address
+  /// \param charType is a character data-type indicating the encoding
+  /// \return the byte array of UTF8 data (or null)
+  virtual const uint1 *getStringData(const Address &addr,Datatype *charType)=0;
+
+  void saveXml(ostream &s) const;	///< Save cached strings to a stream as XML
+  void restoreXml(const Element *el,const AddrSpaceManager *m);	///< Restore string cache from XML
+
+  static bool hasCharTerminator(const uint1 *buffer,int4 size,int4 charsize);	///< Check for a unicode string terminator
+  static int4 readUtf16(const uint1 *buf,bool bigend);	///< Read a UTF16 code point from a byte array
+  static void writeUtf8(ostream &s,int4 codepoint);	///< Write unicode character to stream in UTF8 encoding
+  static int4 getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip);	///< Extract next \e unicode \e codepoint
+};
+
+/// \brief An implementation of StringManager that understands terminated unicode strings
+///
+/// This class understands UTF8, UTF16, and UTF32 encodings.  It reports a string if its
+/// sees a valid encoding that is null terminated.
+class StringManagerUnicode : public StringManager {
+  Architecture *glb;		///< Underlying architecture
+  uint1 *testBuffer;		///< Temporary buffer for pulling in loadimage bytes
+public:
+  StringManagerUnicode(Architecture *g,int4 max);	///< Constructor
+  virtual ~StringManagerUnicode(void);
+
+  virtual const uint1 *getStringData(const Address &addr,Datatype *charType);
+  bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const;	///< Return \b true if buffer looks like unicode
+  bool writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize);	///< Write unicode byte array to stream (as UTF8)
+};
+
+#endif
--- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileCallback.java
+++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileCallback.java
@ -18,6 +18,7 @@ package ghidra.app.decompiler;
 import java.io.IOException;
 import java.io.StringReader;
 import java.math.BigInteger;
+import java.nio.charset.Charset;
 import java.util.ArrayList;

 import javax.xml.parsers.SAXParser;
@ -27,14 +28,15 @@ import org.xml.sax.*;
 import org.xml.sax.helpers.DefaultHandler;

 import ghidra.app.cmd.function.CallDepthChangeInfo;
+import ghidra.docking.settings.Settings;
+import ghidra.docking.settings.SettingsImpl;
 import ghidra.program.disassemble.Disassembler;
 import ghidra.program.model.address.*;
-import ghidra.program.model.data.DataType;
+import ghidra.program.model.data.*;
 import ghidra.program.model.lang.*;
 import ghidra.program.model.lang.ConstantPool.Record;
 import ghidra.program.model.listing.*;
-import ghidra.program.model.mem.MemoryAccessException;
-import ghidra.program.model.mem.MemoryBlock;
+import ghidra.program.model.mem.*;
 import ghidra.program.model.pcode.*;
 import ghidra.program.model.symbol.*;
 import ghidra.util.Msg;
@ -65,6 +67,7 @@ public class DecompileCallback {
 	private AddressFactory addrfactory;
 	private ConstantPool cpool;
 	private PcodeDataTypeManager dtmanage;
+	private Charset utf8Charset;
 	private String nativeMessage;
 	private boolean showNamespace;

@ -84,6 +87,7 @@ public class DecompileCallback {
 		cpool = null;
 		nativeMessage = null;
 		debug = null;
+		utf8Charset = Charset.availableCharsets().get("UTF-8");
 	}

 	private static SAXParser getSAXParser() throws PcodeXMLException {
@ -1177,6 +1181,66 @@ public class DecompileCallback {
 		return listing.getFunctionAt(addr);
 	}

+	public byte[] getStringData(String addrString, String dtName, String dtId) {
+		Address addr;
+		int maxBytes;
+		try {
+			maxBytes = readXMLSize(addrString);
+			addr = Varnode.readXMLAddress(addrString, addrfactory, funcEntry.getAddressSpace());
+			if (addr == Address.NO_ADDRESS) {
+				throw new PcodeXMLException("Address does not physically map");
+			}
+		}
+		catch (PcodeXMLException e) {
+			Msg.error(this, "Decompiling " + funcEntry + ": " + e.getMessage());
+			return null;
+		}
+		Data data = program.getListing().getDataContaining(addr);
+		Settings settings = SettingsImpl.NO_SETTINGS;
+		AbstractStringDataType dataType = null;
+		if (data != null) {
+			settings = data;
+			if (data.getDataType() instanceof AbstractStringDataType) {
+				dataType = (AbstractStringDataType) data.getDataType();
+			}
+		}
+		if (dataType == null) {
+			DataType dt = dtmanage.findBaseType(dtName, dtId);
+			if (dt instanceof AbstractStringDataType) {
+				dataType = (AbstractStringDataType) dt;
+			}
+			else {
+				if (dt != null) {
+					int size = dt.getLength();
+					if (size == 2) {
+						dataType = TerminatedUnicodeDataType.dataType;
+					}
+					else if (size == 4) {
+						dataType = TerminatedUnicode32DataType.dataType;
+					}
+					else {
+						dataType = TerminatedStringDataType.dataType;
+					}
+				}
+				else {
+					dataType = TerminatedStringDataType.dataType;
+				}
+			}
+		}
+		MemoryBufferImpl buf = new MemoryBufferImpl(program.getMemory(), addr, 64);
+		Object value = dataType.getValue(buf, settings, maxBytes);
+		if (!(value instanceof String)) {
+			return null;
+		}
+		String stringVal = (String) value;
+		byte[] res = stringVal.getBytes(utf8Charset);
+		if (res.length > maxBytes) {
+			byte[] trim = new byte[maxBytes];
+			System.arraycopy(res, 0, trim, 0, maxBytes);
+		}
+		return res;
+	}
+
 //==================================================================================================
 // Inner Classes
 //==================================================================================================	
--- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileProcess.java
+++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileProcess.java
@ -326,7 +326,12 @@ public class DecompileProcess {
 								}
 								break;
 							case 'S':
+								if (name.equals("getString")) {
+									getStringData();
+								}
+								else {
 									getSymbol();					// getSymbol
+								}
 								break;
 							case 'T':
 								if (name.equals("getType")) {
@ -778,6 +783,31 @@ public class DecompileProcess {
 		write(query_response_end);
 	}

+	private void getStringData() throws IOException {
+		String addr = readQueryString();
+		String dtName = readQueryString();
+		String dtId = readQueryString();
+		byte[] res = callback.getStringData(addr, dtName, dtId);
+		write(query_response_start);
+		if ((res != null) && (res.length > 0)) {
+			int sz = res.length;
+			int sz1 = (sz & 0x3f) + 0x20;
+			sz >>>= 6;
+			int sz2 = (sz & 0x3f) + 0x20;
+			write(byte_start);
+			write(sz1);
+			write(sz2);
+			byte[] dblres = new byte[res.length * 2];
+			for (int i = 0; i < res.length; i++) {
+				dblres[i * 2] = (byte) (((res[i] >> 4) & 0xf) + 65);
+				dblres[i * 2 + 1] = (byte) ((res[i] & 0xf) + 65);
+			}
+			write(dblres);
+			write(byte_end);
+		}
+		write(query_response_end);
+	}
+
 	private void write(byte[] bytes) throws IOException {
 		if (nativeOut == null) {
 			return;