mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-05 02:39:44 +02:00
GT-3407 Fix UnsupportedOp Exception with JISAutodetect charset. #1358
Fixes github issue #1358. Some character sets don't support the encoding operation.
This commit is contained in:
parent
4c57727282
commit
93bcabe582
2 changed files with 39 additions and 22 deletions
|
@ -684,8 +684,8 @@ public class StringDataInstance {
|
|||
|
||||
// if we get the same number of characters out that we put into the decoder,
|
||||
// then its a good chance there is a one-to-one correspondence between original char
|
||||
// and decoded char.
|
||||
boolean canRecoverOriginalCharBytes =
|
||||
// offsets and decoded char offsets.
|
||||
boolean isByteToStringCharEquiv =
|
||||
stringValue.length() == ((stringBytes.length - aci.byteStartOffset) / charSize);
|
||||
|
||||
stringValue = stringLayout.shouldTrimTrailingNulls() ? trimNulls(stringValue) : stringValue;
|
||||
|
@ -701,22 +701,10 @@ public class StringDataInstance {
|
|||
// For each 32bit character in the java string try to add it to the StringRenderBuilder
|
||||
for (int i = 0, strLength = stringValue.length(); i < strLength;) {
|
||||
int codePoint = stringValue.codePointAt(i);
|
||||
byte[] originalCharBytes;
|
||||
if (canRecoverOriginalCharBytes) {
|
||||
originalCharBytes = new byte[charSize];
|
||||
System.arraycopy(stringBytes, i * charSize + aci.byteStartOffset, originalCharBytes,
|
||||
0, charSize);
|
||||
}
|
||||
else {
|
||||
// can't get original bytes, cheat and run the codePoint through the charset
|
||||
// to get what should be the same as the original bytes.
|
||||
String singleCharStr = new String(new int[] { codePoint }, 0, 1);
|
||||
originalCharBytes = convertStringToBytes(singleCharStr, aci);
|
||||
}
|
||||
|
||||
RENDER_ENUM currentCharRenderSetting = renderSetting;
|
||||
if (codePoint == StringUtilities.UNICODE_REPLACEMENT && canRecoverOriginalCharBytes &&
|
||||
isMismatchedCharBytes(originalCharBytes, codePoint)) {
|
||||
if (codePoint == StringUtilities.UNICODE_REPLACEMENT && isByteToStringCharEquiv &&
|
||||
!isReplacementCharAt(stringBytes, i * charSize + aci.byteStartOffset)) {
|
||||
// if this is a true decode error and we can recover the original bytes,
|
||||
// then force the render mode to byte seq.
|
||||
currentCharRenderSetting = RENDER_ENUM.BYTE_SEQ;
|
||||
|
@ -753,7 +741,8 @@ public class StringDataInstance {
|
|||
strBuf.addCodePointChar(codePoint);
|
||||
break;
|
||||
case BYTE_SEQ:
|
||||
strBuf.addByteSeq(originalCharBytes);
|
||||
strBuf.addByteSeq(getOriginalBytes(isByteToStringCharEquiv, i, codePoint,
|
||||
stringBytes, aci));
|
||||
break;
|
||||
case ESC_SEQ:
|
||||
strBuf.addEscapedCodePoint(codePoint);
|
||||
|
@ -779,6 +768,26 @@ public class StringDataInstance {
|
|||
return prefix + strBuf.toString();
|
||||
}
|
||||
|
||||
private byte[] getOriginalBytes(boolean isByteToStringCharEquiv, int charOffset, int codePoint,
|
||||
byte[] stringBytes, AdjustedCharsetInfo aci) {
|
||||
|
||||
if (isByteToStringCharEquiv) {
|
||||
byte[] originalCharBytes = new byte[charSize];
|
||||
System.arraycopy(stringBytes, charOffset * charSize + aci.byteStartOffset,
|
||||
originalCharBytes, 0, charSize);
|
||||
return originalCharBytes;
|
||||
}
|
||||
|
||||
// can't get original bytes, cheat and run the codePoint through the charset
|
||||
// to get what should be the same as the original bytes.
|
||||
String singleCharStr = new String(new int[] { codePoint }, 0, 1);
|
||||
Charset cs = Charset.isSupported(aci.charsetName) ? Charset.forName(aci.charsetName) : null;
|
||||
if (cs == null || !cs.canEncode()) {
|
||||
return null;
|
||||
}
|
||||
return singleCharStr.getBytes(cs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trims trailing nulls off the end of the string.
|
||||
*
|
||||
|
@ -837,10 +846,13 @@ public class StringDataInstance {
|
|||
StringRenderBuilder.DOUBLE_QUOTE);
|
||||
}
|
||||
|
||||
private boolean isMismatchedCharBytes(byte[] originalCharBytes, int codePoint) {
|
||||
long originalValue = DataConverter.getInstance(buf.isBigEndian()).getValue(
|
||||
originalCharBytes, Math.min(charSize, originalCharBytes.length));
|
||||
return originalValue != codePoint;
|
||||
private boolean isReplacementCharAt(byte[] stringBytes, int byteOffset) {
|
||||
if (byteOffset + charSize > stringBytes.length) {
|
||||
return false;
|
||||
}
|
||||
long origCodePointValue = DataConverter.getInstance(buf.isBigEndian()).getValue(stringBytes,
|
||||
byteOffset, charSize);
|
||||
return origCodePointValue == StringUtilities.UNICODE_REPLACEMENT;
|
||||
}
|
||||
|
||||
private static String getTranslatedStringRepresentation(String translatedString) {
|
||||
|
|
|
@ -112,9 +112,14 @@ public class StringRenderBuilder {
|
|||
* <p>
|
||||
* {@literal { 0, 1, 2 } -> 00,01,02}
|
||||
*
|
||||
* @param bytes
|
||||
* @param bytes to convert to hex and append. If null, append "???"
|
||||
*/
|
||||
public void addByteSeq(byte[] bytes) {
|
||||
if (bytes == null) {
|
||||
ensureByteMode();
|
||||
sb.append("???");
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
ensureByteMode();
|
||||
String valStr = Integer.toHexString(bytes[i] & 0xff).toUpperCase();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue