diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index 2edfd4dbe3..7c3fa0f073 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -1250,7 +1250,7 @@ bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize /// \brief Push a single character constant to the RPN stack /// /// For C, a character constant is usually emitted as the character in single quotes. -/// Handle unicode, wide characters, etc. +/// Handle unicode, wide characters, etc. Characters come in with the compiler's raw encoding. /// \param val is the constant value /// \param ct is data-type attached to the value /// \param vn is the Varnode holding the value @@ -1259,10 +1259,17 @@ void PrintC::pushCharConstant(uintb val,const TypeChar *ct,const Varnode *vn,con { ostringstream t; - if ((ct->getSize()==1)&& - ((val<7)||(val>0x7e)||((val>13)&&(val<0x20)))) // not a good character constant + if ((ct->getSize()==1)&&(val >= 0x80)) { + // For byte characters, the encoding is assumed to be ASCII, UTF-8, or some other + // code-page that extends ASCII. At 0x80 and above, we cannot treat the value as a + // unicode code-point. Its either part of a multi-byte UTF-8 encoding or an unknown + // code-page value. In either case, we print it as an integer. push_integer(val,1,true,vn,op); + } else { + // From here we assume, the constant value is a direct unicode code-point. + // The value could be an illegal code-point (surrogates or beyond the max code-point), + // but this will just be emitted as an escape sequence. if (doEmitWideCharPrefix() && ct->getSize() > 1) t << 'L'; // Print symbol indicating wide character t << '\''; // char is surrounded with single quotes diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc index 0b46e6dfe0..94fe032e51 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc @@ -446,6 +446,10 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint) if (codepoint == 0x3000) { return true; // ideographic space } + if (codepoint >= 0xd7fc) { // D7FC - D7FF are currently unassigned. + // D800 - DFFF are high and low surrogates, technically illegal. + return true; // Treat as needing to be escaped + } return false; } if (codepoint < 0xf900) { diff --git a/Ghidra/Processors/Dalvik/src/main/java/ghidra/dalvik/dex/inject/ConstantPoolDex.java b/Ghidra/Processors/Dalvik/src/main/java/ghidra/dalvik/dex/inject/ConstantPoolDex.java index cb64e5bac2..f0e3211c86 100644 --- a/Ghidra/Processors/Dalvik/src/main/java/ghidra/dalvik/dex/inject/ConstantPoolDex.java +++ b/Ghidra/Processors/Dalvik/src/main/java/ghidra/dalvik/dex/inject/ConstantPoolDex.java @@ -56,8 +56,9 @@ public class ConstantPoolDex extends ConstantPool { String classString = DexUtil.convertTypeIndexToString(dexHeader, fieldIDItem.getClassIndex()); String[] pathArray = DexUtil.convertClassStringToPathArray("", classString); - if (pathArray != null) + if (pathArray != null) { res.token = pathArray[pathArray.length - 1] + '.' + res.token; + } } DataType fieldDT = dexHeader.getDataType(program, fieldIDItem.getTypeIndex()); @@ -72,13 +73,16 @@ public class ConstantPoolDex extends ConstantPool { private String removeUniquifier(String name) { int len = name.length(); - if (len < 10 || name.charAt(len - 9) != '_') + if (len < 10 || name.charAt(len - 9) != '_') { return name; + } char matchChar = name.charAt(len - 8); - if (matchChar != '5' && matchChar != 'e') + if (matchChar != '5' && matchChar != 'e') { return name; - if (name.charAt(len - 7) != '0') + } + if (name.charAt(len - 7) != '0') { return name; + } return name.substring(0, len - 9); } @@ -103,14 +107,19 @@ public class ConstantPoolDex extends ConstantPool { String classString = DexUtil.convertTypeIndexToString(dexHeader, methodIDItem.getClassIndex()); String[] pathArray = DexUtil.convertClassStringToPathArray("", classString); - if (pathArray != null) + if (pathArray != null) { namespaceString = pathArray[pathArray.length - 1]; + } } - if (namespaceString != null) + if (namespaceString != null) { res.token = namespaceString + '.' + res.token; + } } res.tag = ConstantPool.POINTER_METHOD; - FunctionDefinitionDataType funcDef = new FunctionDefinitionDataType(res.token, dtManager); + // The FunctionDefinition is constructed on the fly, essentially as an anonymous type + // We use an internal naming scheme involding the the methodID to avoid name collisions + String defName = res.token + '_' + Integer.toHexString(methodID); + FunctionDefinitionDataType funcDef = new FunctionDefinitionDataType(defName, dtManager); res.type = new PointerDataType(funcDef); res.hasThisPtr = !isStatic;