Merge remote-tracking branch 'origin/GP-3210_dev747368_fix_xml_serialization_of_high_unicode'

This commit is contained in:
Ryan Kurtz 2023-03-22 11:26:30 -04:00
commit 93c82e083f
3 changed files with 27 additions and 20 deletions

View file

@ -17,11 +17,12 @@ package ghidra.framework.options;
import static org.junit.Assert.*;
import java.util.Arrays;
import java.util.Date;
import java.awt.Color;
import java.awt.Font;
import java.io.*;
import java.util.Arrays;
import java.util.Date;
import javax.swing.KeyStroke;
@ -342,7 +343,7 @@ public class SaveStateTest extends AbstractGenericTest {
ss.putString(greaterThanLessThanKey, stringWithGreaterThanAndLessThan);
String stringWithLargeHexDigit =
"The following is a large hex digit: \u0128, \u0132, \307 and \253 " +
"The following is a large hex digit: \u0128, \u0132, \307 and \253 \uD835\uDCC8 " +
"with some trailing text ÿ";
String hexDigitKey = "HEX_DIGIT_KEY";
ss.putString(hexDigitKey, stringWithLargeHexDigit);

View file

@ -59,7 +59,8 @@ public class PropertyFileTest extends AbstractGenericTest {
pf.putInt("TestInt", 1234);
pf.putLong("TestLong", 0x12345678);
StringBuffer sb = new StringBuffer("Line1\nLine2\n\"Ugly\" & Special <Values>; ");
StringBuffer sb = new StringBuffer(
"Line1\nLine2\n\"Ugly\" & Special <Values>; \u0128, \u0132, \307 and \253");
for (int i = 1; i < 35; i++) {
sb.append((char) i);
}
@ -70,6 +71,10 @@ public class PropertyFileTest extends AbstractGenericTest {
pf.putString("TestString", URLEncoder.encode(str, "UTF-8"));
// also test plain unicode values, as well as a 32bit unicode value
String string2 = "non-control char values: < & ; > \u00bb \u0128, \u0132, \uD835\uDCC8";
pf.putString("TestString2", string2);
pf.writeState();
PropertyFile pf2 = new PropertyFile(parent, storageName, "/", NAME);
@ -81,6 +86,7 @@ public class PropertyFileTest extends AbstractGenericTest {
assertEquals(1234, pf2.getInt("TestInt", -1));
assertEquals(0x12345678, pf2.getLong("TestLong", -1));
assertEquals(str, URLDecoder.decode(pf2.getString("TestString", null), "UTF-8"));
assertEquals(string2, pf2.getString("TestString2", null));
}

View file

@ -15,12 +15,13 @@
*/
package ghidra.util.xml;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.*;
import java.nio.charset.StandardCharsets;
import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
@ -87,19 +88,21 @@ public class XmlUtilities {
* @return the encoded XML string
*/
public static String escapeElementEntities(String xml) {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < xml.length(); i++) {
char next = xml.charAt(i);
if ((next < ' ') && (next != 0x09) && (next != 0x0A) && (next != 0x0D)) {
StringBuilder buffer = new StringBuilder();
for (int offset = 0; offset < xml.length();) {
int codePoint = xml.codePointAt(offset);
offset += Character.charCount(codePoint);
if ((codePoint < ' ') && (codePoint != 0x09) && (codePoint != 0x0A) && (codePoint != 0x0D)) {
continue;
}
if (next >= 0x7F) {
if (codePoint >= 0x7F) {
buffer.append("&#x");
buffer.append(Integer.toString(next, 16).toUpperCase());
buffer.append(Integer.toString(codePoint, 16).toUpperCase());
buffer.append(";");
continue;
}
switch (next) {
switch (codePoint) {
case '<':
buffer.append(LESS_THAN);
break;
@ -115,11 +118,8 @@ public class XmlUtilities {
case '&':
buffer.append(AMPERSAND);
break;
// Why was 7F deleted
// case 0x7F:
// break;
default:
buffer.append(next);
buffer.appendCodePoint(codePoint);
break;
}
}
@ -137,10 +137,10 @@ public class XmlUtilities {
public static String unEscapeElementEntities(String escapedXMLString) {
Matcher matcher = HEX_DIGIT_PATTERN.matcher(escapedXMLString);
StringBuffer buffy = new StringBuffer();
StringBuilder buffy = new StringBuilder();
while (matcher.find()) {
int intValue = Integer.parseInt(matcher.group(1), 16);
matcher.appendReplacement(buffy, Character.toString((char) intValue));
int codePoint = Integer.parseInt(matcher.group(1), 16);
matcher.appendReplacement(buffy, Character.toString(codePoint));
}
matcher.appendTail(buffy);