GT-3407 Fix UnsupportedOp Exception with JISAutodetect charset. #1358

Fixes github issue #1358. Some character sets don't support the encoding operation.
2025-10-05 02:39:44 +02:00 · 2019-12-18 11:05:52 -05:00 · 2019-12-18 11:05:52 -05:00 · 93bcabe582
commit 93bcabe582
parent 4c57727282
2 changed files with 39 additions and 22 deletions
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java
@ -684,8 +684,8 @@ public class StringDataInstance {
 		// if we get the same number of characters out that we put into the decoder,
 		// then its a good chance there is a one-to-one correspondence between original char
-		// and decoded char.
+		// offsets and decoded char offsets.
-		boolean canRecoverOriginalCharBytes =
+		boolean isByteToStringCharEquiv =
 			stringValue.length() == ((stringBytes.length - aci.byteStartOffset) / charSize);
 		stringValue = stringLayout.shouldTrimTrailingNulls() ? trimNulls(stringValue) : stringValue;
@ -701,22 +701,10 @@ public class StringDataInstance {
 		// For each 32bit character in the java string try to add it to the StringRenderBuilder
 		for (int i = 0, strLength = stringValue.length(); i < strLength;) {
 			int codePoint = stringValue.codePointAt(i);
 			byte[] originalCharBytes;
 			if (canRecoverOriginalCharBytes) {
 				originalCharBytes = new byte[charSize];
 				System.arraycopy(stringBytes, i * charSize + aci.byteStartOffset, originalCharBytes,
 					0, charSize);
 			}
 			else {
 				// can't get original bytes, cheat and run the codePoint through the charset
 				// to get what should be the same as the original bytes.
 				String singleCharStr = new String(new int[] { codePoint }, 0, 1);
 				originalCharBytes = convertStringToBytes(singleCharStr, aci);
 			}
 			RENDER_ENUM currentCharRenderSetting = renderSetting;
-			if (codePoint == StringUtilities.UNICODE_REPLACEMENT && canRecoverOriginalCharBytes &&
+			if (codePoint == StringUtilities.UNICODE_REPLACEMENT && isByteToStringCharEquiv &&
-				isMismatchedCharBytes(originalCharBytes, codePoint)) {
+				!isReplacementCharAt(stringBytes, i * charSize + aci.byteStartOffset)) {
 				// if this is a true decode error and we can recover the original bytes,
 				// then force the render mode to byte seq.
 				currentCharRenderSetting = RENDER_ENUM.BYTE_SEQ;
@ -753,7 +741,8 @@ public class StringDataInstance {
 						strBuf.addCodePointChar(codePoint);
 						break;
 					case BYTE_SEQ:
-						strBuf.addByteSeq(originalCharBytes);
+						strBuf.addByteSeq(getOriginalBytes(isByteToStringCharEquiv, i, codePoint,
 							stringBytes, aci));
 						break;
 					case ESC_SEQ:
 						strBuf.addEscapedCodePoint(codePoint);
@ -779,6 +768,26 @@ public class StringDataInstance {
 		return prefix + strBuf.toString();
 	}
 	private byte[] getOriginalBytes(boolean isByteToStringCharEquiv, int charOffset, int codePoint,
 			byte[] stringBytes, AdjustedCharsetInfo aci) {
 		if (isByteToStringCharEquiv) {
 			byte[] originalCharBytes = new byte[charSize];
 			System.arraycopy(stringBytes, charOffset * charSize + aci.byteStartOffset,
 				originalCharBytes, 0, charSize);
 			return originalCharBytes;
 		}
 		// can't get original bytes, cheat and run the codePoint through the charset
 		// to get what should be the same as the original bytes.
 		String singleCharStr = new String(new int[] { codePoint }, 0, 1);
 		Charset cs = Charset.isSupported(aci.charsetName) ? Charset.forName(aci.charsetName) : null;
 		if (cs == null || !cs.canEncode()) {
 			return null;
 		}
 		return singleCharStr.getBytes(cs);
 	}
 	/**
 	 * Trims trailing nulls off the end of the string.
 	 *
@ -837,10 +846,13 @@ public class StringDataInstance {
 			StringRenderBuilder.DOUBLE_QUOTE);
 	}
-	private boolean isMismatchedCharBytes(byte[] originalCharBytes, int codePoint) {
+	private boolean isReplacementCharAt(byte[] stringBytes, int byteOffset) {
-		long originalValue = DataConverter.getInstance(buf.isBigEndian()).getValue(
+		if (byteOffset + charSize > stringBytes.length) {
-			originalCharBytes, Math.min(charSize, originalCharBytes.length));
+			return false;
-		return originalValue != codePoint;
+		}
 		long origCodePointValue = DataConverter.getInstance(buf.isBigEndian()).getValue(stringBytes,
 			byteOffset, charSize);
 		return origCodePointValue == StringUtilities.UNICODE_REPLACEMENT;
 	}
 	private static String getTranslatedStringRepresentation(String translatedString) {
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringRenderBuilder.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringRenderBuilder.java
@ -112,9 +112,14 @@ public class StringRenderBuilder {
 	 * <p>
 	 * {@literal { 0, 1, 2 } -> 00,01,02}
 	 *
-	 * @param bytes
+	 * @param bytes to convert to hex and append.  If null, append "???"
 	 */
 	public void addByteSeq(byte[] bytes) {
 		if (bytes == null) {
 			ensureByteMode();
 			sb.append("???");
 			return;
 		}
 		for (int i = 0; i < bytes.length; i++) {
 			ensureByteMode();
 			String valStr = Integer.toHexString(bytes[i] & 0xff).toUpperCase();