Merge remote-tracking branch 'origin/GT-3333_dev747368_JVM_utf8_strings'

(fixes #1255)
This commit is contained in:
Ryan Kurtz 2019-11-25 11:49:15 -05:00
commit 7ab75e411e
3 changed files with 157 additions and 13 deletions

View file

@ -15,10 +15,10 @@
*/
package ghidra.program.model.data;
import static ghidra.program.model.data.EndianSettingsDefinition.ENDIAN;
import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER;
import static ghidra.program.model.data.EndianSettingsDefinition.*;
import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.*;
import static ghidra.program.model.data.StringLayoutEnum.*;
import static ghidra.program.model.data.TranslationSettingsDefinition.TRANSLATION;
import static ghidra.program.model.data.TranslationSettingsDefinition.*;
import java.nio.charset.Charset;
import java.util.HashMap;
@ -155,10 +155,10 @@ public class StringDataInstance {
private final String translatedValue;
private final Endian endianSetting;
private boolean showTranslation;
private RENDER_ENUM renderSetting;
private final boolean showTranslation;
private final RENDER_ENUM renderSetting;
private int length;
private final int length;
private final MemBuffer buf;
protected StringDataInstance() {
@ -171,13 +171,16 @@ public class StringDataInstance {
stringLayout = StringLayoutEnum.FIXED_LEN;
endianSetting = null;
renderSetting = RENDER_ENUM.ALL;
length = 0;
showTranslation = false;
}
/**
* Creates a string instance using the data in the {@link MemBuffer} and the settings
* pulled from the {@link AbstractStringDataType string data type}.
*
* @param stringDataType {@link AbstractStringDataType} common string base data type.
* @param dataType {@link DataType} of the string, either a {@link AbstractStringDataType} derived type
* or an {@link ArrayStringable} element-of-char-array type.
* @param settings {@link Settings} attached to the data location.
* @param buf {@link MemBuffer} containing the data.
* @param length Length passed from the caller to the datatype. -1 indicates a 'probe'
@ -189,9 +192,10 @@ public class StringDataInstance {
this.buf = buf;
this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings);
this.charSize = CharsetInfo.getInstance().getCharsetCharSize(charsetName);
// NOTE: for now only handle padding for charSize == 1
this.paddedCharSize =
charSize == 1 ? getDataOrganization(dataType).getCharSize() : charSize;
// NOTE: for now only handle padding for charSize == 1 and the data type is an array of elements, not a "string"
this.paddedCharSize = (dataType instanceof ArrayStringable) && (charSize == 1) //
? getDataOrganization(dataType).getCharSize()
: charSize;
this.stringLayout = getLayoutFromDataType(dataType);
this.showTranslation = TRANSLATION.isShowTranslated(settings);
this.translatedValue = TRANSLATION.getTranslatedValue(settings);
@ -507,7 +511,7 @@ public class StringDataInstance {
private byte[] getBytesFromMemBuff(MemBuffer memBuffer, int copyLen) {
// round copyLen down to multiple of paddedCharSize
copyLen &= ~(paddedCharSize - 1);
copyLen = (copyLen / paddedCharSize) * paddedCharSize;
byte[] bytes = new byte[copyLen];
if (memBuffer.getBytes(bytes, 0) != bytes.length) {
@ -787,7 +791,7 @@ public class StringDataInstance {
* @return String containing the representation of the single char.
*/
public String getCharRepresentation() {
if (length < charSize) {
if (length < charSize /* also covers case of isProbe() */ ) {
return UNKNOWN_DOT_DOT_DOT;
}

View file

@ -0,0 +1,112 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.program.model.data;
import static org.junit.Assert.*;
import org.junit.Test;
import generic.test.AbstractGTest;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.address.GenericAddressSpace;
import ghidra.program.model.mem.ByteMemBufferImpl;
public class ArrayStringableTest extends AbstractGTest {
private ByteMemBufferImpl mb(boolean isBE, int... values) {
byte[] bytes = new byte[values.length];
for (int i = 0; i < values.length; i++) {
bytes[i] = (byte) values[i];
}
GenericAddressSpace gas = new GenericAddressSpace("test", 32, AddressSpace.TYPE_RAM, 1);
return new ByteMemBufferImpl(gas.getAddress(0), bytes, isBE);
}
private SettingsBuilder newset() {
return new SettingsBuilder();
}
private static class DataOrgDTM extends TestDummyDataTypeManager {
private DataOrganization dataOrg;
public DataOrgDTM(int charSize) {
DataOrganizationImpl dataOrgImpl = DataOrganizationImpl.getDefaultOrganization(null);
dataOrgImpl.setCharSize(charSize);
this.dataOrg = dataOrgImpl;
}
@Override
public DataOrganization getDataOrganization() {
return dataOrg;
}
}
private Array mkArray(DataTypeManager dtm, int count) {
CharDataType charDT = new CharDataType(dtm);
Array arrayDT = new ArrayDataType(charDT, count, charDT.getLength(), dtm);
return arrayDT;
}
private Array array10char1 = mkArray(new DataOrgDTM(1), 10);
private Array array10char2 = mkArray(new DataOrgDTM(2), 10);
private Array array6char4 = mkArray(new DataOrgDTM(4), 6);
private Array array10char5 = mkArray(new DataOrgDTM(5), 3);
//-------------------------------------------------------------------------------------
// get string rep of an array of various sized character elements
//-------------------------------------------------------------------------------------
@Test
public void testGetRep_1bytechar() {
// because the char size is 1, default charset will be us-ascii, which matches character element size
ByteMemBufferImpl buf = mb(false, 'h', 'e', 'l', 'l', 'o', 0, 'x', 'y', 0, 0);
assertEquals("\"hello\"",
array10char1.getRepresentation(buf, newset(), array10char1.getLength()));
}
@Test
public void testGetRep_2bytechar() {
// because char size is 2, default charset will be utf-16, which matches character element size
ByteMemBufferImpl buf =
mb(false, 'h', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0);
assertEquals("u\"hello\"",
array10char2.getRepresentation(buf, newset(), array10char2.getLength()));
}
@Test
public void testGetRep_4bytechar() {
// because char size is 4, default charset will be utf-32, which matches character element size
ByteMemBufferImpl buf = mb(false, 'h', 0, 0, 0, 'e', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0,
'o', 0, 0, 0, 0, 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0);
assertEquals("U\"hello\"",
array6char4.getRepresentation(buf, newset(), array6char4.getLength()));
}
@Test
public void testGetRep_5bytechar() {
// because the char size isn't normal, charset will default to us-ascii, which does not match
// the element size of the array, triggering padding-removal in the stringdatainstance code.
ByteMemBufferImpl buf =
mb(false, 'h', 'x', 'x', 'x', 'x', 'e', 'x', 'x', 'x', 'x', 0, 0, 0, 0, 0);
assertEquals("\"he\"",
array10char5.getRepresentation(buf, newset(), array10char5.getLength()));
}
}

View file

@ -45,6 +45,19 @@ public class StringDataTypeTest extends AbstractGTest {
private PascalStringDataType pascalString = new PascalStringDataType();
private PascalUnicodeDataType pascalUtf16String = new PascalUnicodeDataType();
private static class DataOrgDTM extends TestDummyDataTypeManager {
private DataOrganization dataOrg;
public DataOrgDTM(DataOrganization dataOrg) {
this.dataOrg = dataOrg;
}
@Override
public DataOrganization getDataOrganization() {
return dataOrg;
}
}
private ByteMemBufferImpl mb(boolean isBE, int... values) {
byte[] bytes = new byte[values.length];
for (int i = 0; i < values.length; i++) {
@ -216,6 +229,21 @@ public class StringDataTypeTest extends AbstractGTest {
assertEquals("ab\ucc01\u1202", actual);
}
@Test
public void testGetStringValue_utf8_2bytechar_dataorg() {
// test UTF-8 when the dataorg specifies a 2byte character (ie. JVM)
ByteMemBufferImpl buf = mb(false, 'a', 'b', 'c');
DataOrganizationImpl dataOrg = DataOrganizationImpl.getDefaultOrganization(null);
dataOrg.setCharSize(2);
DataOrgDTM dtm = new DataOrgDTM(dataOrg);
StringUTF8DataType wideCharUTF8DT = new StringUTF8DataType(dtm);
String actual = (String) wideCharUTF8DT.getValue(buf, newset(), buf.getLength());
assertEquals("abc", actual);
}
@Test
public void testGetStringValue_utf16_le() {
ByteMemBufferImpl buf = mb(false, //