GT-3333, #1255 fix string rendering issue when with dataOrg char sizes >

1 byte.

If the language's dataOrg specifies a character size larger than 1 byte,
strings with a charSet that uses just 1 byte (ie. UTF-8 strings inside a
java .dex file) will incorrectly treat some of the string bytes as
padding between array elements.

Fixes issue #1255.
This commit is contained in:
dev747368 2019-11-18 12:54:19 -05:00
parent 7cd82462e9
commit 26750e23f2
2 changed files with 38 additions and 8 deletions

View file

@ -15,10 +15,10 @@
*/ */
package ghidra.program.model.data; package ghidra.program.model.data;
import static ghidra.program.model.data.EndianSettingsDefinition.ENDIAN; import static ghidra.program.model.data.EndianSettingsDefinition.*;
import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER; import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.*;
import static ghidra.program.model.data.StringLayoutEnum.*; import static ghidra.program.model.data.StringLayoutEnum.*;
import static ghidra.program.model.data.TranslationSettingsDefinition.TRANSLATION; import static ghidra.program.model.data.TranslationSettingsDefinition.*;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.HashMap; import java.util.HashMap;
@ -176,8 +176,9 @@ public class StringDataInstance {
/** /**
* Creates a string instance using the data in the {@link MemBuffer} and the settings * Creates a string instance using the data in the {@link MemBuffer} and the settings
* pulled from the {@link AbstractStringDataType string data type}. * pulled from the {@link AbstractStringDataType string data type}.
* *
* @param stringDataType {@link AbstractStringDataType} common string base data type. * @param dataType {@link DataType} of the string, either a {@link AbstractStringDataType} derived type
* or an {@link ArrayStringable} element-of-char-array type.
* @param settings {@link Settings} attached to the data location. * @param settings {@link Settings} attached to the data location.
* @param buf {@link MemBuffer} containing the data. * @param buf {@link MemBuffer} containing the data.
* @param length Length passed from the caller to the datatype. -1 indicates a 'probe' * @param length Length passed from the caller to the datatype. -1 indicates a 'probe'
@ -189,9 +190,10 @@ public class StringDataInstance {
this.buf = buf; this.buf = buf;
this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings); this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings);
this.charSize = CharsetInfo.getInstance().getCharsetCharSize(charsetName); this.charSize = CharsetInfo.getInstance().getCharsetCharSize(charsetName);
// NOTE: for now only handle padding for charSize == 1 // NOTE: for now only handle padding for charSize == 1 and the data type is an array of elements, not a "string"
this.paddedCharSize = this.paddedCharSize = (dataType instanceof ArrayStringable) && (charSize == 1) //
charSize == 1 ? getDataOrganization(dataType).getCharSize() : charSize; ? getDataOrganization(dataType).getCharSize()
: charSize;
this.stringLayout = getLayoutFromDataType(dataType); this.stringLayout = getLayoutFromDataType(dataType);
this.showTranslation = TRANSLATION.isShowTranslated(settings); this.showTranslation = TRANSLATION.isShowTranslated(settings);
this.translatedValue = TRANSLATION.getTranslatedValue(settings); this.translatedValue = TRANSLATION.getTranslatedValue(settings);

View file

@ -45,6 +45,19 @@ public class StringDataTypeTest extends AbstractGTest {
private PascalStringDataType pascalString = new PascalStringDataType(); private PascalStringDataType pascalString = new PascalStringDataType();
private PascalUnicodeDataType pascalUtf16String = new PascalUnicodeDataType(); private PascalUnicodeDataType pascalUtf16String = new PascalUnicodeDataType();
private static class DataOrgDTM extends TestDummyDataTypeManager {
private DataOrganization dataOrg;
public DataOrgDTM(DataOrganization dataOrg) {
this.dataOrg = dataOrg;
}
@Override
public DataOrganization getDataOrganization() {
return dataOrg;
}
}
private ByteMemBufferImpl mb(boolean isBE, int... values) { private ByteMemBufferImpl mb(boolean isBE, int... values) {
byte[] bytes = new byte[values.length]; byte[] bytes = new byte[values.length];
for (int i = 0; i < values.length; i++) { for (int i = 0; i < values.length; i++) {
@ -216,6 +229,21 @@ public class StringDataTypeTest extends AbstractGTest {
assertEquals("ab\ucc01\u1202", actual); assertEquals("ab\ucc01\u1202", actual);
} }
@Test
public void testGetStringValue_utf8_2bytechar_dataorg() {
// test UTF-8 when the dataorg specifies a 2byte character (ie. JVM)
ByteMemBufferImpl buf = mb(false, 'a', 'b', 'c');
DataOrganizationImpl dataOrg = DataOrganizationImpl.getDefaultOrganization(null);
dataOrg.setCharSize(2);
DataOrgDTM dtm = new DataOrgDTM(dataOrg);
StringUTF8DataType wideCharUTF8DT = new StringUTF8DataType(dtm);
String actual = (String) wideCharUTF8DT.getValue(buf, newset(), buf.getLength());
assertEquals("abc", actual);
}
@Test @Test
public void testGetStringValue_utf16_le() { public void testGetStringValue_utf16_le() {
ByteMemBufferImpl buf = mb(false, // ByteMemBufferImpl buf = mb(false, //