GT-3347, PR #1241 - fix convert to char sequence menu item

Finish up integrating fixes for convert to char sequence, make
everything use StringDataInstance instead of StringUtils.toQuoteString.

Fix StringRenderBuilder to use '\x' for ASCII range escape sequences
(was using 2 byte wide for ASCII escape chars).
This commit is contained in:
dev747368 2019-11-21 19:16:22 -05:00
parent 0c97e44bf7
commit 73155ce499
4 changed files with 53 additions and 57 deletions

View file

@ -18,12 +18,8 @@ package ghidra.program.model.data;
import java.math.BigInteger;
import ghidra.docking.settings.*;
import ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER_ENUM;
import ghidra.program.model.mem.ByteMemBufferImpl;
import ghidra.program.model.mem.MemBuffer;
import ghidra.program.model.scalar.Scalar;
import ghidra.util.BigEndianDataConverter;
import ghidra.util.LittleEndianDataConverter;
import ghidra.util.StringFormat;
/**
@ -228,6 +224,10 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
return "??";
}
if (getFormatSettingsDefinition().getFormat(settings) == FormatSettingsDefinition.CHAR) {
return StringDataInstance.getCharRepresentation(this, bytes, settings);
}
boolean isBigEndian = ENDIAN.isBigEndian(settings, buf);
if (!isBigEndian) {
@ -238,6 +238,7 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
bytes = flipped;
}
return getRepresentation(new BigInteger(bytes), settings, 8 * length);
}
@ -260,34 +261,8 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
bigInt = bigInt.add(BigInteger.valueOf(2).pow(bitLength));
}
int nominalLen;
if (format == FormatSettingsDefinition.CHAR) {
nominalLen = (bitLength + 7) / 8;
byte[] bytes = getDataOrganization().isBigEndian() ?
BigEndianDataConverter.INSTANCE.getBytes(bigInt, nominalLen) :
LittleEndianDataConverter.INSTANCE.getBytes(bigInt, nominalLen);
if (bytes.length > nominalLen) {
// BigInteger supplied too many bytes
byte[] chars = new byte[nominalLen];
System.arraycopy(bytes, bytes.length - nominalLen, chars, 0, nominalLen);
bytes = chars;
}
else if (bytes.length < nominalLen) {
// BigInteger supplied too few bytes
byte[] chars = new byte[nominalLen];
System.arraycopy(bytes, 0, chars, nominalLen - bytes.length, bytes.length);
bytes = chars;
}
MemBuffer memBuf = new ByteMemBufferImpl(null, bytes, true);
StringDataInstance instance = new StringDataInstance(this, settings, memBuf,
nominalLen, RENDER_ENUM.ESC_SEQ);
return bytes.length == 1 ? instance.getCharRepresentation() :
instance.getStringRepresentation();
}
String valStr;
int nominalLen;
switch (format) {
default:

View file

@ -74,6 +74,24 @@ public class StringDataInstance {
(dt instanceof WideChar16DataType) || (dt instanceof WideChar32DataType);
}
/**
* Returns a string representation of the character(s) contained in the byte array, suitable
* for display as a single character, or as a sequence of characters.
* <p>
*
* @param dataType the {@link DataType} of the element containing the bytes (most likely a ByteDataType)
* @param bytes the bytes to convert
* @param settings the {@link Settings} object for the location where the bytes came from, or null
* @return formatted string (typically with quotes around the contents): single character: 'a', multiple characters: "a\x12bc"
*/
public static String getCharRepresentation(DataType dataType, byte[] bytes, Settings settings) {
MemBuffer memBuf = new ByteMemBufferImpl(null, bytes, true);
StringDataInstance instance =
new StringDataInstance(dataType, settings, memBuf, bytes.length, RENDER_ENUM.ESC_SEQ);
return bytes.length == 1 ? instance.getCharRepresentation()
: instance.getCharSequenceRepresentation();
}
/**
* Returns a new {@link StringDataInstance} using the bytes in the data codeunit.
* <p>
@ -210,7 +228,7 @@ public class StringDataInstance {
* pulled from the {@link AbstractStringDataType string data type} but using the given
* {@link RenderUnicodeSettingsDefinition.RENDER_ENUM rendering setting}.
*
* @param stringDataType {@link AbstractStringDataType} common string base data type.
* @param dataType {@link AbstractStringDataType} common string base data type.
* @param settings {@link Settings} attached to the data location.
* @param buf {@link MemBuffer} containing the data.
* @param length Length passed from the caller to the datatype. -1 indicates a 'probe'
@ -662,6 +680,10 @@ public class StringDataInstance {
* @return formatted String
*/
public String getStringRepresentation() {
return getStringRep(true);
}
private String getStringRep(boolean trimNulls) {
if (isProbe() || isBadCharSize() || !buf.isInitializedMemory()) {
return UNKNOWN;
@ -689,7 +711,7 @@ public class StringDataInstance {
StringRenderBuilder strBuf = new StringRenderBuilder(charSize);
stringValue = !isPascal() ? trimNulls(stringValue) : stringValue;
stringValue = !isPascal() && trimNulls ? trimNulls(stringValue) : stringValue;
if (stringValue.isEmpty() || (stringValue.length() == 1 && stringValue.charAt(0) == 0)) {
// force the string renderer into "string" mode so we get empty quotes when done.
strBuf.addString("");
@ -815,6 +837,18 @@ public class StringDataInstance {
return showTranslation;
}
/**
* Convert a sequence of char values in memory into a formatted string, without
* stripping any nulls.
* <p>
* See {@link #getCharRepresentation()} and {@link #getStringRepresentation()}
*
* @return String containing the representation of the char sequence
*/
public String getCharSequenceRepresentation() {
return getStringRep(false);
}
/**
* Convert a char value in memory into its canonical unicode representation, using
* attached charset and encoding information.

View file

@ -30,6 +30,7 @@ import ghidra.util.StringUtilities;
public class StringRenderBuilder {
public static final char DOUBLE_QUOTE = '"';
public static final char SINGLE_QUOTE = '\'';
private static final int MAX_ASCII = 0x80;
private StringBuilder sb = new StringBuilder();
private boolean byteMode = true;
@ -124,10 +125,9 @@ public class StringRenderBuilder {
*/
public void addEscapedCodePoint(int codePoint) {
ensureTextMode();
char escapeChar = StringUtilities.isAsciiChar(codePoint) ? 'x'
: Character.isBmpCodePoint(codePoint) ? 'u' : 'U';
int cpDigits = StringUtilities.isAsciiChar(codePoint) ? 2
: Character.isBmpCodePoint(codePoint) ? 4 : 8;
char escapeChar =
(codePoint < MAX_ASCII) ? 'x' : Character.isBmpCodePoint(codePoint) ? 'u' : 'U';
int cpDigits = (codePoint < MAX_ASCII) ? 2 : Character.isBmpCodePoint(codePoint) ? 4 : 8;
String s = Integer.toHexString(codePoint).toUpperCase();
sb.append("\\").append(escapeChar);
sb.append(StringUtilities.pad(s, '0', cpDigits));