mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2025-10-04 18:29:37 +02:00
Merge remote-tracking branch 'origin/GT-3333_dev747368_JVM_utf8_strings'
(fixes #1255)
This commit is contained in:
commit
7ab75e411e
3 changed files with 157 additions and 13 deletions
|
@ -15,10 +15,10 @@
|
||||||
*/
|
*/
|
||||||
package ghidra.program.model.data;
|
package ghidra.program.model.data;
|
||||||
|
|
||||||
import static ghidra.program.model.data.EndianSettingsDefinition.ENDIAN;
|
import static ghidra.program.model.data.EndianSettingsDefinition.*;
|
||||||
import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER;
|
import static ghidra.program.model.data.RenderUnicodeSettingsDefinition.*;
|
||||||
import static ghidra.program.model.data.StringLayoutEnum.*;
|
import static ghidra.program.model.data.StringLayoutEnum.*;
|
||||||
import static ghidra.program.model.data.TranslationSettingsDefinition.TRANSLATION;
|
import static ghidra.program.model.data.TranslationSettingsDefinition.*;
|
||||||
|
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -155,10 +155,10 @@ public class StringDataInstance {
|
||||||
private final String translatedValue;
|
private final String translatedValue;
|
||||||
private final Endian endianSetting;
|
private final Endian endianSetting;
|
||||||
|
|
||||||
private boolean showTranslation;
|
private final boolean showTranslation;
|
||||||
private RENDER_ENUM renderSetting;
|
private final RENDER_ENUM renderSetting;
|
||||||
|
|
||||||
private int length;
|
private final int length;
|
||||||
private final MemBuffer buf;
|
private final MemBuffer buf;
|
||||||
|
|
||||||
protected StringDataInstance() {
|
protected StringDataInstance() {
|
||||||
|
@ -171,13 +171,16 @@ public class StringDataInstance {
|
||||||
stringLayout = StringLayoutEnum.FIXED_LEN;
|
stringLayout = StringLayoutEnum.FIXED_LEN;
|
||||||
endianSetting = null;
|
endianSetting = null;
|
||||||
renderSetting = RENDER_ENUM.ALL;
|
renderSetting = RENDER_ENUM.ALL;
|
||||||
|
length = 0;
|
||||||
|
showTranslation = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a string instance using the data in the {@link MemBuffer} and the settings
|
* Creates a string instance using the data in the {@link MemBuffer} and the settings
|
||||||
* pulled from the {@link AbstractStringDataType string data type}.
|
* pulled from the {@link AbstractStringDataType string data type}.
|
||||||
*
|
*
|
||||||
* @param stringDataType {@link AbstractStringDataType} common string base data type.
|
* @param dataType {@link DataType} of the string, either a {@link AbstractStringDataType} derived type
|
||||||
|
* or an {@link ArrayStringable} element-of-char-array type.
|
||||||
* @param settings {@link Settings} attached to the data location.
|
* @param settings {@link Settings} attached to the data location.
|
||||||
* @param buf {@link MemBuffer} containing the data.
|
* @param buf {@link MemBuffer} containing the data.
|
||||||
* @param length Length passed from the caller to the datatype. -1 indicates a 'probe'
|
* @param length Length passed from the caller to the datatype. -1 indicates a 'probe'
|
||||||
|
@ -189,9 +192,10 @@ public class StringDataInstance {
|
||||||
this.buf = buf;
|
this.buf = buf;
|
||||||
this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings);
|
this.charsetName = getCharsetNameFromDataTypeOrSettings(dataType, settings);
|
||||||
this.charSize = CharsetInfo.getInstance().getCharsetCharSize(charsetName);
|
this.charSize = CharsetInfo.getInstance().getCharsetCharSize(charsetName);
|
||||||
// NOTE: for now only handle padding for charSize == 1
|
// NOTE: for now only handle padding for charSize == 1 and the data type is an array of elements, not a "string"
|
||||||
this.paddedCharSize =
|
this.paddedCharSize = (dataType instanceof ArrayStringable) && (charSize == 1) //
|
||||||
charSize == 1 ? getDataOrganization(dataType).getCharSize() : charSize;
|
? getDataOrganization(dataType).getCharSize()
|
||||||
|
: charSize;
|
||||||
this.stringLayout = getLayoutFromDataType(dataType);
|
this.stringLayout = getLayoutFromDataType(dataType);
|
||||||
this.showTranslation = TRANSLATION.isShowTranslated(settings);
|
this.showTranslation = TRANSLATION.isShowTranslated(settings);
|
||||||
this.translatedValue = TRANSLATION.getTranslatedValue(settings);
|
this.translatedValue = TRANSLATION.getTranslatedValue(settings);
|
||||||
|
@ -507,7 +511,7 @@ public class StringDataInstance {
|
||||||
|
|
||||||
private byte[] getBytesFromMemBuff(MemBuffer memBuffer, int copyLen) {
|
private byte[] getBytesFromMemBuff(MemBuffer memBuffer, int copyLen) {
|
||||||
// round copyLen down to multiple of paddedCharSize
|
// round copyLen down to multiple of paddedCharSize
|
||||||
copyLen &= ~(paddedCharSize - 1);
|
copyLen = (copyLen / paddedCharSize) * paddedCharSize;
|
||||||
|
|
||||||
byte[] bytes = new byte[copyLen];
|
byte[] bytes = new byte[copyLen];
|
||||||
if (memBuffer.getBytes(bytes, 0) != bytes.length) {
|
if (memBuffer.getBytes(bytes, 0) != bytes.length) {
|
||||||
|
@ -787,7 +791,7 @@ public class StringDataInstance {
|
||||||
* @return String containing the representation of the single char.
|
* @return String containing the representation of the single char.
|
||||||
*/
|
*/
|
||||||
public String getCharRepresentation() {
|
public String getCharRepresentation() {
|
||||||
if (length < charSize) {
|
if (length < charSize /* also covers case of isProbe() */ ) {
|
||||||
return UNKNOWN_DOT_DOT_DOT;
|
return UNKNOWN_DOT_DOT_DOT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
/* ###
|
||||||
|
* IP: GHIDRA
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package ghidra.program.model.data;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import generic.test.AbstractGTest;
|
||||||
|
import ghidra.program.model.address.AddressSpace;
|
||||||
|
import ghidra.program.model.address.GenericAddressSpace;
|
||||||
|
import ghidra.program.model.mem.ByteMemBufferImpl;
|
||||||
|
|
||||||
|
public class ArrayStringableTest extends AbstractGTest {
|
||||||
|
private ByteMemBufferImpl mb(boolean isBE, int... values) {
|
||||||
|
byte[] bytes = new byte[values.length];
|
||||||
|
for (int i = 0; i < values.length; i++) {
|
||||||
|
bytes[i] = (byte) values[i];
|
||||||
|
}
|
||||||
|
GenericAddressSpace gas = new GenericAddressSpace("test", 32, AddressSpace.TYPE_RAM, 1);
|
||||||
|
return new ByteMemBufferImpl(gas.getAddress(0), bytes, isBE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private SettingsBuilder newset() {
|
||||||
|
return new SettingsBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class DataOrgDTM extends TestDummyDataTypeManager {
|
||||||
|
private DataOrganization dataOrg;
|
||||||
|
|
||||||
|
public DataOrgDTM(int charSize) {
|
||||||
|
DataOrganizationImpl dataOrgImpl = DataOrganizationImpl.getDefaultOrganization(null);
|
||||||
|
dataOrgImpl.setCharSize(charSize);
|
||||||
|
|
||||||
|
this.dataOrg = dataOrgImpl;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataOrganization getDataOrganization() {
|
||||||
|
return dataOrg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Array mkArray(DataTypeManager dtm, int count) {
|
||||||
|
CharDataType charDT = new CharDataType(dtm);
|
||||||
|
Array arrayDT = new ArrayDataType(charDT, count, charDT.getLength(), dtm);
|
||||||
|
|
||||||
|
return arrayDT;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Array array10char1 = mkArray(new DataOrgDTM(1), 10);
|
||||||
|
private Array array10char2 = mkArray(new DataOrgDTM(2), 10);
|
||||||
|
private Array array6char4 = mkArray(new DataOrgDTM(4), 6);
|
||||||
|
private Array array10char5 = mkArray(new DataOrgDTM(5), 3);
|
||||||
|
|
||||||
|
//-------------------------------------------------------------------------------------
|
||||||
|
// get string rep of an array of various sized character elements
|
||||||
|
//-------------------------------------------------------------------------------------
|
||||||
|
@Test
|
||||||
|
public void testGetRep_1bytechar() {
|
||||||
|
// because the char size is 1, default charset will be us-ascii, which matches character element size
|
||||||
|
ByteMemBufferImpl buf = mb(false, 'h', 'e', 'l', 'l', 'o', 0, 'x', 'y', 0, 0);
|
||||||
|
|
||||||
|
assertEquals("\"hello\"",
|
||||||
|
array10char1.getRepresentation(buf, newset(), array10char1.getLength()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetRep_2bytechar() {
|
||||||
|
// because char size is 2, default charset will be utf-16, which matches character element size
|
||||||
|
ByteMemBufferImpl buf =
|
||||||
|
mb(false, 'h', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
|
assertEquals("u\"hello\"",
|
||||||
|
array10char2.getRepresentation(buf, newset(), array10char2.getLength()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetRep_4bytechar() {
|
||||||
|
// because char size is 4, default charset will be utf-32, which matches character element size
|
||||||
|
ByteMemBufferImpl buf = mb(false, 'h', 0, 0, 0, 'e', 0, 0, 0, 'l', 0, 0, 0, 'l', 0, 0, 0,
|
||||||
|
'o', 0, 0, 0, 0, 0, 0, 0, 'x', 'y', 0, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
|
assertEquals("U\"hello\"",
|
||||||
|
array6char4.getRepresentation(buf, newset(), array6char4.getLength()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetRep_5bytechar() {
|
||||||
|
// because the char size isn't normal, charset will default to us-ascii, which does not match
|
||||||
|
// the element size of the array, triggering padding-removal in the stringdatainstance code.
|
||||||
|
ByteMemBufferImpl buf =
|
||||||
|
mb(false, 'h', 'x', 'x', 'x', 'x', 'e', 'x', 'x', 'x', 'x', 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
|
assertEquals("\"he\"",
|
||||||
|
array10char5.getRepresentation(buf, newset(), array10char5.getLength()));
|
||||||
|
}
|
||||||
|
}
|
|
@ -45,6 +45,19 @@ public class StringDataTypeTest extends AbstractGTest {
|
||||||
private PascalStringDataType pascalString = new PascalStringDataType();
|
private PascalStringDataType pascalString = new PascalStringDataType();
|
||||||
private PascalUnicodeDataType pascalUtf16String = new PascalUnicodeDataType();
|
private PascalUnicodeDataType pascalUtf16String = new PascalUnicodeDataType();
|
||||||
|
|
||||||
|
private static class DataOrgDTM extends TestDummyDataTypeManager {
|
||||||
|
private DataOrganization dataOrg;
|
||||||
|
|
||||||
|
public DataOrgDTM(DataOrganization dataOrg) {
|
||||||
|
this.dataOrg = dataOrg;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataOrganization getDataOrganization() {
|
||||||
|
return dataOrg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private ByteMemBufferImpl mb(boolean isBE, int... values) {
|
private ByteMemBufferImpl mb(boolean isBE, int... values) {
|
||||||
byte[] bytes = new byte[values.length];
|
byte[] bytes = new byte[values.length];
|
||||||
for (int i = 0; i < values.length; i++) {
|
for (int i = 0; i < values.length; i++) {
|
||||||
|
@ -216,6 +229,21 @@ public class StringDataTypeTest extends AbstractGTest {
|
||||||
assertEquals("ab\ucc01\u1202", actual);
|
assertEquals("ab\ucc01\u1202", actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetStringValue_utf8_2bytechar_dataorg() {
|
||||||
|
// test UTF-8 when the dataorg specifies a 2byte character (ie. JVM)
|
||||||
|
ByteMemBufferImpl buf = mb(false, 'a', 'b', 'c');
|
||||||
|
|
||||||
|
DataOrganizationImpl dataOrg = DataOrganizationImpl.getDefaultOrganization(null);
|
||||||
|
dataOrg.setCharSize(2);
|
||||||
|
DataOrgDTM dtm = new DataOrgDTM(dataOrg);
|
||||||
|
StringUTF8DataType wideCharUTF8DT = new StringUTF8DataType(dtm);
|
||||||
|
|
||||||
|
String actual = (String) wideCharUTF8DT.getValue(buf, newset(), buf.getLength());
|
||||||
|
|
||||||
|
assertEquals("abc", actual);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetStringValue_utf16_le() {
|
public void testGetStringValue_utf16_le() {
|
||||||
ByteMemBufferImpl buf = mb(false, //
|
ByteMemBufferImpl buf = mb(false, //
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue