From 26750e23f221a2129b02348fbaeaf16bcc6a61e0 Mon Sep 17 00:00:00 2001 From: dev747368 <48332326+dev747368@users.noreply.github.com> Date: Mon, 18 Nov 2019 12:54:19 -0500 Subject: [PATCH 1/3] GT-3333, #1255 fix string rendering issue when with dataOrg char sizes > 1 byte. If the language's dataOrg specifies a character size larger than 1 byte, strings with a charSet that uses just 1 byte (ie. UTF-8 strings inside a java .dex file) will incorrectly treat some of the string bytes as padding between array elements. Fixes issue #1255. --- .../model/data/StringDataInstance.java | 18 ++++++------ .../model/data/StringDataTypeTest.java | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java index 016901ee2d..9349f4c117 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java @@ -15,10 +15,10 @@ */ package ghidra.program.model.data; -import static ghidra.program.model.data.EndianSettingsDefinition.ENDIAN; -import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER; +import static ghidra.program.model.data.EndianSettingsDefinition.*; +import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.*; import static ghidra.program.model.data.StringLayoutEnum.*; -import static ghidra.program.model.data.TranslationSettingsDefinition.TRANSLATION; +import static ghidra.program.model.data.TranslationSettingsDefinition.*; import java.nio.charset.Charset; import java.util.HashMap; @@ -176,8 +176,9 @@ public class StringDataInstance { /** * Creates a string instance using the data in the {@link MemBuffer} and the settings * pulled from the {@link AbstractStringDataType string data type}. - * - * @param stringDataType {@link AbstractStringDataType} common string base data type. + * + * @param dataType {@link DataType} of the string, either a {@link AbstractStringDataType} derived type + * or an {@link ArrayStringable} element-of-char-array type. * @param settings {@link Settings} attached to the data location. * @param buf {@link MemBuffer} containing the data. * @param length Length passed from the caller to the datatype. -1 indicates a 'probe' @@ -189,9 +190,10 @@ public class StringDataInstance { this.buf = buf; this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings); this.charSize = CharsetInfo.getInstance().getCharsetCharSize(charsetName); - // NOTE: for now only handle padding for charSize == 1 - this.paddedCharSize = - charSize == 1 ? getDataOrganization(dataType).getCharSize() : charSize; + // NOTE: for now only handle padding for charSize == 1 and the data type is an array of elements, not a "string" + this.paddedCharSize = (dataType instanceof ArrayStringable) && (charSize == 1) // + ? getDataOrganization(dataType).getCharSize() + : charSize; this.stringLayout = getLayoutFromDataType(dataType); this.showTranslation = TRANSLATION.isShowTranslated(settings); this.translatedValue = TRANSLATION.getTranslatedValue(settings); diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/StringDataTypeTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/StringDataTypeTest.java index 9b5dd3321a..c568b15e56 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/StringDataTypeTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/StringDataTypeTest.java @@ -45,6 +45,19 @@ public class StringDataTypeTest extends AbstractGTest { private PascalStringDataType pascalString = new PascalStringDataType(); private PascalUnicodeDataType pascalUtf16String = new PascalUnicodeDataType(); + private static class DataOrgDTM extends TestDummyDataTypeManager { + private DataOrganization dataOrg; + + public DataOrgDTM(DataOrganization dataOrg) { + this.dataOrg = dataOrg; + } + + @Override + public DataOrganization getDataOrganization() { + return dataOrg; + } + } + private ByteMemBufferImpl mb(boolean isBE, int... values) { byte[] bytes = new byte[values.length]; for (int i = 0; i < values.length; i++) { @@ -216,6 +229,21 @@ public class StringDataTypeTest extends AbstractGTest { assertEquals("ab\ucc01\u1202", actual); } + @Test + public void testGetStringValue_utf8_2bytechar_dataorg() { + // test UTF-8 when the dataorg specifies a 2byte character (ie. JVM) + ByteMemBufferImpl buf = mb(false, 'a', 'b', 'c'); + + DataOrganizationImpl dataOrg = DataOrganizationImpl.getDefaultOrganization(null); + dataOrg.setCharSize(2); + DataOrgDTM dtm = new DataOrgDTM(dataOrg); + StringUTF8DataType wideCharUTF8DT = new StringUTF8DataType(dtm); + + String actual = (String) wideCharUTF8DT.getValue(buf, newset(), buf.getLength()); + + assertEquals("abc", actual); + } + @Test public void testGetStringValue_utf16_le() { ByteMemBufferImpl buf = mb(false, // From 3be5949814711f59616e57e1b909868d319b7036 Mon Sep 17 00:00:00 2001 From: dev747368 <48332326+dev747368@users.noreply.github.com> Date: Thu, 21 Nov 2019 14:04:19 -0500 Subject: [PATCH 2/3] GT-3333 more better tests & fix padding logic Add ArrayStringable tests and fix an error in rounding length code --- .../model/data/StringDataInstance.java | 2 +- .../model/data/ArrayStringableTest.java | 112 ++++++++++++++++++ 2 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/ArrayStringableTest.java diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java index 9349f4c117..7790b93757 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java @@ -509,7 +509,7 @@ public class StringDataInstance { private byte[] getBytesFromMemBuff(MemBuffer memBuffer, int copyLen) { // round copyLen down to multiple of paddedCharSize - copyLen &= ~(paddedCharSize - 1); + copyLen = (copyLen / paddedCharSize) * paddedCharSize; byte[] bytes = new byte[copyLen]; if (memBuffer.getBytes(bytes, 0) != bytes.length) { diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/ArrayStringableTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/ArrayStringableTest.java new file mode 100644 index 0000000000..5575159165 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/program/model/data/ArrayStringableTest.java @@ -0,0 +1,112 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.program.model.data; + +import static org.junit.Assert.*; + +import org.junit.Test; + +import generic.test.AbstractGTest; +import ghidra.program.model.address.AddressSpace; +import ghidra.program.model.address.GenericAddressSpace; +import ghidra.program.model.mem.ByteMemBufferImpl; + +public class ArrayStringableTest extends AbstractGTest { + private ByteMemBufferImpl mb(boolean isBE, int... values) { + byte[] bytes = new byte[values.length]; + for (int i = 0; i < values.length; i++) { + bytes[i] = (byte) values[i]; + } + GenericAddressSpace gas = new GenericAddressSpace("test", 32, AddressSpace.TYPE_RAM, 1); + return new ByteMemBufferImpl(gas.getAddress(0), bytes, isBE); + } + + private SettingsBuilder newset() { + return new SettingsBuilder(); + } + + private static class DataOrgDTM extends TestDummyDataTypeManager { + private DataOrganization dataOrg; + + public DataOrgDTM(int charSize) { + DataOrganizationImpl dataOrgImpl = DataOrganizationImpl.getDefaultOrganization(null); + dataOrgImpl.setCharSize(charSize); + + this.dataOrg = dataOrgImpl; + } + + @Override + public DataOrganization getDataOrganization() { + return dataOrg; + } + } + + private Array mkArray(DataTypeManager dtm, int count) { + CharDataType charDT = new CharDataType(dtm); + Array arrayDT = new ArrayDataType(charDT, count, charDT.getLength(), dtm); + + return arrayDT; + } + + + private Array array10char1 = mkArray(new DataOrgDTM(1), 10); + private Array array10char2 = mkArray(new DataOrgDTM(2), 10); + private Array array6char4 = mkArray(new DataOrgDTM(4), 6); + private Array array10char5 = mkArray(new DataOrgDTM(5), 3); + + //------------------------------------------------------------------------------------- + // get string rep of an array of various sized character elements + //------------------------------------------------------------------------------------- + @Test + public void testGetRep_1bytechar() { + // because the char size is 1, default charset will be us-ascii, which matches character element size + ByteMemBufferImpl buf = mb(false, 'h', 'e', 'l', 'l', 'o', 0, 'x', 'y', 0, 0); + + assertEquals("\"hello\"", + array10char1.getRepresentation(buf, newset(), array10char1.getLength())); + } + + @Test + public void testGetRep_2bytechar() { + // because char size is 2, default charset will be utf-16, which matches character element size + ByteMemBufferImpl buf = + mb(false, 'h', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0); + + assertEquals("u\"hello\"", + array10char2.getRepresentation(buf, newset(), array10char2.getLength())); + } + + @Test + public void testGetRep_4bytechar() { + // because char size is 4, default charset will be utf-32, which matches character element size + ByteMemBufferImpl buf = mb(false, 'h', 0, 0, 0, 'e', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0, + 'o', 0, 0, 0, 0, 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0); + + assertEquals("U\"hello\"", + array6char4.getRepresentation(buf, newset(), array6char4.getLength())); + } + + @Test + public void testGetRep_5bytechar() { + // because the char size isn't normal, charset will default to us-ascii, which does not match + // the element size of the array, triggering padding-removal in the stringdatainstance code. + ByteMemBufferImpl buf = + mb(false, 'h', 'x', 'x', 'x', 'x', 'e', 'x', 'x', 'x', 'x', 0, 0, 0, 0, 0); + + assertEquals("\"he\"", + array10char5.getRepresentation(buf, newset(), array10char5.getLength())); + } +} From 09ba78b7a05a3df7a26ae4680a8843695f8278f1 Mon Sep 17 00:00:00 2001 From: dev747368 <48332326+dev747368@users.noreply.github.com> Date: Fri, 22 Nov 2019 13:44:44 -0500 Subject: [PATCH 3/3] GT-3333 minor code review tweaks --- .../ghidra/program/model/data/StringDataInstance.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java index 7790b93757..2ee4c710b0 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java @@ -155,10 +155,10 @@ public class StringDataInstance { private final String translatedValue; private final Endian endianSetting; - private boolean showTranslation; - private RENDER_ENUM renderSetting; + private final boolean showTranslation; + private final RENDER_ENUM renderSetting; - private int length; + private final int length; private final MemBuffer buf; protected StringDataInstance() { @@ -171,6 +171,8 @@ public class StringDataInstance { stringLayout = StringLayoutEnum.FIXED_LEN; endianSetting = null; renderSetting = RENDER_ENUM.ALL; + length = 0; + showTranslation = false; } /** @@ -789,7 +791,7 @@ public class StringDataInstance { * @return String containing the representation of the single char. */ public String getCharRepresentation() { - if (length < charSize) { + if (length < charSize /* also covers case of isProbe() */ ) { return UNKNOWN_DOT_DOT_DOT; }