GT-3407 Fix UnsupportedOp Exception with JISAutodetect charset. #1358

Fixes github issue #1358.


Some character sets don't support the encoding operation.
This commit is contained in:
dev747368 2019-12-18 11:05:52 -05:00
parent 4c57727282
commit 93bcabe582
2 changed files with 39 additions and 22 deletions

View file

@ -684,8 +684,8 @@ public class StringDataInstance {
// if we get the same number of characters out that we put into the decoder, // if we get the same number of characters out that we put into the decoder,
// then its a good chance there is a one-to-one correspondence between original char // then its a good chance there is a one-to-one correspondence between original char
// and decoded char. // offsets and decoded char offsets.
boolean canRecoverOriginalCharBytes = boolean isByteToStringCharEquiv =
stringValue.length() == ((stringBytes.length - aci.byteStartOffset) / charSize); stringValue.length() == ((stringBytes.length - aci.byteStartOffset) / charSize);
stringValue = stringLayout.shouldTrimTrailingNulls() ? trimNulls(stringValue) : stringValue; stringValue = stringLayout.shouldTrimTrailingNulls() ? trimNulls(stringValue) : stringValue;
@ -701,22 +701,10 @@ public class StringDataInstance {
// For each 32bit character in the java string try to add it to the StringRenderBuilder // For each 32bit character in the java string try to add it to the StringRenderBuilder
for (int i = 0, strLength = stringValue.length(); i < strLength;) { for (int i = 0, strLength = stringValue.length(); i < strLength;) {
int codePoint = stringValue.codePointAt(i); int codePoint = stringValue.codePointAt(i);
byte[] originalCharBytes;
if (canRecoverOriginalCharBytes) {
originalCharBytes = new byte[charSize];
System.arraycopy(stringBytes, i * charSize + aci.byteStartOffset, originalCharBytes,
0, charSize);
}
else {
// can't get original bytes, cheat and run the codePoint through the charset
// to get what should be the same as the original bytes.
String singleCharStr = new String(new int[] { codePoint }, 0, 1);
originalCharBytes = convertStringToBytes(singleCharStr, aci);
}
RENDER_ENUM currentCharRenderSetting = renderSetting; RENDER_ENUM currentCharRenderSetting = renderSetting;
if (codePoint == StringUtilities.UNICODE_REPLACEMENT && canRecoverOriginalCharBytes && if (codePoint == StringUtilities.UNICODE_REPLACEMENT && isByteToStringCharEquiv &&
isMismatchedCharBytes(originalCharBytes, codePoint)) { !isReplacementCharAt(stringBytes, i * charSize + aci.byteStartOffset)) {
// if this is a true decode error and we can recover the original bytes, // if this is a true decode error and we can recover the original bytes,
// then force the render mode to byte seq. // then force the render mode to byte seq.
currentCharRenderSetting = RENDER_ENUM.BYTE_SEQ; currentCharRenderSetting = RENDER_ENUM.BYTE_SEQ;
@ -753,7 +741,8 @@ public class StringDataInstance {
strBuf.addCodePointChar(codePoint); strBuf.addCodePointChar(codePoint);
break; break;
case BYTE_SEQ: case BYTE_SEQ:
strBuf.addByteSeq(originalCharBytes); strBuf.addByteSeq(getOriginalBytes(isByteToStringCharEquiv, i, codePoint,
stringBytes, aci));
break; break;
case ESC_SEQ: case ESC_SEQ:
strBuf.addEscapedCodePoint(codePoint); strBuf.addEscapedCodePoint(codePoint);
@ -779,6 +768,26 @@ public class StringDataInstance {
return prefix + strBuf.toString(); return prefix + strBuf.toString();
} }
private byte[] getOriginalBytes(boolean isByteToStringCharEquiv, int charOffset, int codePoint,
byte[] stringBytes, AdjustedCharsetInfo aci) {
if (isByteToStringCharEquiv) {
byte[] originalCharBytes = new byte[charSize];
System.arraycopy(stringBytes, charOffset * charSize + aci.byteStartOffset,
originalCharBytes, 0, charSize);
return originalCharBytes;
}
// can't get original bytes, cheat and run the codePoint through the charset
// to get what should be the same as the original bytes.
String singleCharStr = new String(new int[] { codePoint }, 0, 1);
Charset cs = Charset.isSupported(aci.charsetName) ? Charset.forName(aci.charsetName) : null;
if (cs == null || !cs.canEncode()) {
return null;
}
return singleCharStr.getBytes(cs);
}
/** /**
* Trims trailing nulls off the end of the string. * Trims trailing nulls off the end of the string.
* *
@ -837,10 +846,13 @@ public class StringDataInstance {
StringRenderBuilder.DOUBLE_QUOTE); StringRenderBuilder.DOUBLE_QUOTE);
} }
private boolean isMismatchedCharBytes(byte[] originalCharBytes, int codePoint) { private boolean isReplacementCharAt(byte[] stringBytes, int byteOffset) {
long originalValue = DataConverter.getInstance(buf.isBigEndian()).getValue( if (byteOffset + charSize > stringBytes.length) {
originalCharBytes, Math.min(charSize, originalCharBytes.length)); return false;
return originalValue != codePoint; }
long origCodePointValue = DataConverter.getInstance(buf.isBigEndian()).getValue(stringBytes,
byteOffset, charSize);
return origCodePointValue == StringUtilities.UNICODE_REPLACEMENT;
} }
private static String getTranslatedStringRepresentation(String translatedString) { private static String getTranslatedStringRepresentation(String translatedString) {

View file

@ -112,9 +112,14 @@ public class StringRenderBuilder {
* <p> * <p>
* {@literal { 0, 1, 2 } -> 00,01,02} * {@literal { 0, 1, 2 } -> 00,01,02}
* *
* @param bytes * @param bytes to convert to hex and append. If null, append "???"
*/ */
public void addByteSeq(byte[] bytes) { public void addByteSeq(byte[] bytes) {
if (bytes == null) {
ensureByteMode();
sb.append("???");
return;
}
for (int i = 0; i < bytes.length; i++) { for (int i = 0; i < bytes.length; i++) {
ensureByteMode(); ensureByteMode();
String valStr = Integer.toHexString(bytes[i] & 0xff).toUpperCase(); String valStr = Integer.toHexString(bytes[i] & 0xff).toUpperCase();