GT-3347, PR #1241 - fix convert to char sequence menu item

Finish up integrating fixes for convert to char sequence, make everything use StringDataInstance instead of StringUtils.toQuoteString. Fix StringRenderBuilder to use '\x' for ASCII range escape sequences (was using 2 byte wide for ASCII escape chars).
2025-10-05 02:39:44 +02:00 · 2019-11-21 19:16:22 -05:00 · 2019-11-21 19:16:22 -05:00 · 73155ce499
commit 73155ce499
parent 0c97e44bf7
4 changed files with 53 additions and 57 deletions
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/AbstractIntegerDataType.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/AbstractIntegerDataType.java
@ -18,12 +18,8 @@ package ghidra.program.model.data;
 import java.math.BigInteger;

 import ghidra.docking.settings.*;
-import ghidra.program.model.data.RenderUnicodeSettingsDefinition.RENDER_ENUM;
-import ghidra.program.model.mem.ByteMemBufferImpl;
 import ghidra.program.model.mem.MemBuffer;
 import ghidra.program.model.scalar.Scalar;
-import ghidra.util.BigEndianDataConverter;
-import ghidra.util.LittleEndianDataConverter;
 import ghidra.util.StringFormat;

 /**
@ -228,6 +224,10 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
 			return "??";
 		}

+		if (getFormatSettingsDefinition().getFormat(settings) == FormatSettingsDefinition.CHAR) {
+			return StringDataInstance.getCharRepresentation(this, bytes, settings);
+		}
+
 		boolean isBigEndian = ENDIAN.isBigEndian(settings, buf);

 		if (!isBigEndian) {
@ -238,6 +238,7 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
 			bytes = flipped;
 		}

+
 		return getRepresentation(new BigInteger(bytes), settings, 8 * length);
 	}

@ -260,34 +261,8 @@ public abstract class AbstractIntegerDataType extends BuiltIn implements ArraySt
 			bigInt = bigInt.add(BigInteger.valueOf(2).pow(bitLength));
 		}

-		int nominalLen;
-
-		if (format == FormatSettingsDefinition.CHAR) {
-			nominalLen = (bitLength + 7) / 8;
-			byte[] bytes = getDataOrganization().isBigEndian() ?
-				BigEndianDataConverter.INSTANCE.getBytes(bigInt, nominalLen) :
-					LittleEndianDataConverter.INSTANCE.getBytes(bigInt, nominalLen);
-			if (bytes.length > nominalLen) {
-				// BigInteger supplied too many bytes
-				byte[] chars = new byte[nominalLen];
-				System.arraycopy(bytes, bytes.length - nominalLen, chars, 0, nominalLen);
-				bytes = chars;
-			}
-			else if (bytes.length < nominalLen) {
-				// BigInteger supplied too few bytes
-				byte[] chars = new byte[nominalLen];
-				System.arraycopy(bytes, 0, chars, nominalLen - bytes.length, bytes.length);
-				bytes = chars;
-			}
-
-			MemBuffer memBuf = new ByteMemBufferImpl(null, bytes, true);
-			StringDataInstance instance = new StringDataInstance(this, settings, memBuf,
-					nominalLen, RENDER_ENUM.ESC_SEQ);
-			return bytes.length == 1 ? instance.getCharRepresentation() :
-				instance.getStringRepresentation();
-		}
-
 		String valStr;
+		int nominalLen;

 		switch (format) {
 			default:
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringDataInstance.java
@ -74,6 +74,24 @@ public class StringDataInstance {
 			(dt instanceof WideChar16DataType) || (dt instanceof WideChar32DataType);
 	}

+	/**
+	 * Returns a string representation of the character(s) contained in the byte array, suitable
+	 * for display as a single character, or as a sequence of characters.
+	 * <p>
+	 * 
+	 * @param dataType the {@link DataType} of the element containing the bytes (most likely a ByteDataType)
+	 * @param bytes the bytes to convert
+	 * @param settings the {@link Settings} object for the location where the bytes came from, or null
+	 * @return formatted string (typically with quotes around the contents): single character: 'a', multiple characters: "a\x12bc"
+	 */
+	public static String getCharRepresentation(DataType dataType, byte[] bytes, Settings settings) {
+		MemBuffer memBuf = new ByteMemBufferImpl(null, bytes, true);
+		StringDataInstance instance =
+			new StringDataInstance(dataType, settings, memBuf, bytes.length, RENDER_ENUM.ESC_SEQ);
+		return bytes.length == 1 ? instance.getCharRepresentation()
+				: instance.getCharSequenceRepresentation();
+	}
+
 	/**
 	 * Returns a new {@link StringDataInstance} using the bytes in the data codeunit.
 	 * <p>
@ -210,7 +228,7 @@ public class StringDataInstance {
 	 * pulled from the {@link AbstractStringDataType string data type} but using the given
 	 * {@link RenderUnicodeSettingsDefinition.RENDER_ENUM rendering setting}.
 	 *
-	 * @param stringDataType {@link AbstractStringDataType} common string base data type.
+	 * @param dataType {@link AbstractStringDataType} common string base data type.
 	 * @param settings {@link Settings} attached to the data location.
 	 * @param buf {@link MemBuffer} containing the data.
 	 * @param length Length passed from the caller to the datatype.  -1 indicates a 'probe'
@ -662,6 +680,10 @@ public class StringDataInstance {
 	 * @return formatted String
 	 */
 	public String getStringRepresentation() {
+		return getStringRep(true);
+	}
+
+	private String getStringRep(boolean trimNulls) {

 		if (isProbe() || isBadCharSize() || !buf.isInitializedMemory()) {
 			return UNKNOWN;
@ -689,7 +711,7 @@ public class StringDataInstance {

 		StringRenderBuilder strBuf = new StringRenderBuilder(charSize);

-		stringValue = !isPascal() ? trimNulls(stringValue) : stringValue;
+		stringValue = !isPascal() && trimNulls ? trimNulls(stringValue) : stringValue;
 		if (stringValue.isEmpty() || (stringValue.length() == 1 && stringValue.charAt(0) == 0)) {
 			// force the string renderer into "string" mode so we get empty quotes when done.
 			strBuf.addString("");
@ -815,6 +837,18 @@ public class StringDataInstance {
 		return showTranslation;
 	}

+	/**
+	 * Convert a sequence of char values in memory into a formatted string, without
+	 * stripping any nulls. 
+	 * <p>
+	 * See {@link #getCharRepresentation()} and {@link #getStringRepresentation()} 
+	 *
+	 * @return String containing the representation of the char sequence
+	 */
+	public String getCharSequenceRepresentation() {
+		return getStringRep(false);
+	}
+
 	/**
 	 * Convert a char value in memory into its canonical unicode representation, using
 	 * attached charset and encoding information.
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringRenderBuilder.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/data/StringRenderBuilder.java
@ -30,6 +30,7 @@ import ghidra.util.StringUtilities;
 public class StringRenderBuilder {
 	public static final char DOUBLE_QUOTE = '"';
 	public static final char SINGLE_QUOTE = '\'';
+	private static final int MAX_ASCII = 0x80;

 	private StringBuilder sb = new StringBuilder();
 	private boolean byteMode = true;
@ -124,10 +125,9 @@ public class StringRenderBuilder {
 	 */
 	public void addEscapedCodePoint(int codePoint) {
 		ensureTextMode();
-		char escapeChar = StringUtilities.isAsciiChar(codePoint) ? 'x'
-				: Character.isBmpCodePoint(codePoint) ? 'u' : 'U';
-		int cpDigits = StringUtilities.isAsciiChar(codePoint) ? 2
-				: Character.isBmpCodePoint(codePoint) ? 4 : 8;
+		char escapeChar =
+			(codePoint < MAX_ASCII) ? 'x' : Character.isBmpCodePoint(codePoint) ? 'u' : 'U';
+		int cpDigits = (codePoint < MAX_ASCII) ? 2 : Character.isBmpCodePoint(codePoint) ? 4 : 8;
 		String s = Integer.toHexString(codePoint).toUpperCase();
 		sb.append("\\").append(escapeChar);
 		sb.append(StringUtilities.pad(s, '0', cpDigits));