Merge remote-tracking branch

'origin/GP-2342_dev747368_cleanup_BinaryReader_string_reading--SQUASHED'
(Closes #4452)
This commit is contained in:
Ryan Kurtz 2022-09-03 01:35:09 -04:00
commit 3ee37fd4df
22 changed files with 578 additions and 296 deletions

View file

@ -15,7 +15,8 @@
*/
package ghidra.app.util.bin;
import java.io.IOException;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import ghidra.util.*;
@ -338,79 +339,131 @@ public class BinaryReader {
}
/**
* Reads the Ascii string at the current index and then increments the current
* index by the length of the Ascii string that was found. This method
* expects the string to be null-terminated.
* @return the null-terminated Ascii string at the current index
* Reads a null terminated US-ASCII string starting at the current index,
* advancing the current index by the length of the string that was found.
* <p>
* Note: this method no longer trims() the returned String.
* <p>
*
* @return the US-ASCII string at the current index
* @exception IOException if an I/O error occurs
*/
public String readNextAsciiString() throws IOException {
String s = readAsciiString(currentIndex);
currentIndex += (s.length() + 1);
return s;
return readNextString(StandardCharsets.US_ASCII, 1);
}
/**
* Reads a null terminated Ascii string starting at the current index,
* ending at the first null character or when reaching the
* end of the underlying ByteProvider.
* Reads a fixed length US-ASCII string starting at the current index,
* advancing the current index by the specified fixed length.
* <p>
* The current index is advanced to the next byte after the null terminator.
* Trailing null terminator characters will be removed. (suitable for reading
* a string from a fixed length field that is padded with trailing null chars)
* <p>
* @return the null-terminated Ascii string at the current index
* @exception IOException if an I/O error occurs
*/
public String readNextNullTerminatedAsciiString() throws IOException {
StringBuilder buffer = new StringBuilder();
while (currentIndex < provider.length()) {
byte b = provider.readByte(currentIndex++);
if (b == 0) {
break;
}
buffer.append((char) b);
}
return buffer.toString();
}
/**
* Reads an Ascii string of <code>length</code>
* characters starting at the current index and then increments the current
* index by <code>length</code>.
*
* @return the Ascii string at the current index
* Note: this method no longer trims() the returned String.
* <p>
* @param length number of bytes to read
* @return the US-ASCII string at the current index
*/
public String readNextAsciiString(int length) throws IOException {
String s = readAsciiString(currentIndex, length);
currentIndex += length;
return s;
return readNextString(length, StandardCharsets.US_ASCII, 1);
}
/**
* Reads the Unicode string at the current index and then increments the current
* index by the length of the Unicode string that was found. This method
* expects the string to be double null-terminated ('\0\0').
* @return the null-terminated Ascii string at the current index
* Reads a null-terminated UTF-16 Unicode string at the current index,
* advancing the current index by the length of the string that was found.
* <p>
*
* @return UTF-16 string at the current index
* @exception IOException if an I/O error occurs
*/
public String readNextUnicodeString() throws IOException {
String s = readUnicodeString(currentIndex);
currentIndex += ((s.length() + 1) * 2);
return s;
return readNextString(getUTF16Charset(), 2);
}
/**
* Reads fixed length UTF-16 Unicode string the current index and then increments the current
* {@link #setPointerIndex(int) pointer index} by <code>length</code> elements (length*2 bytes).
* Reads a fixed length UTF-16 Unicode string at the current index,
* advancing the current index by the length of the string that was found.
* <p>
*
* @param charCount number of UTF-16 characters to read (not bytes)
* @return the UTF-16 Unicode string at the current index
* @exception IOException if an I/O error occurs
*/
public String readNextUnicodeString(int length) throws IOException {
String s = readUnicodeString(currentIndex, length);
currentIndex += (length * 2);
return s;
public String readNextUnicodeString(int charCount) throws IOException {
return readNextString(charCount, getUTF16Charset(), 2);
}
/**
* Reads a null-terminated UTF-8 string at the current index,
* advancing the current index by the length of the string that was found.
* <p>
*
* @return UTF-8 string at the current index
* @exception IOException if an I/O error occurs
*/
public String readNextUtf8String() throws IOException {
return readNextString(StandardCharsets.UTF_8, 1);
}
/**
* Reads a fixed length UTF-8 string the current index,
* advancing the current index by the length of the string that was found.
* <p>
*
* @param length number of bytes to read
* @return the UTF-8 string at the current index
* @exception IOException if an I/O error occurs
*/
public String readNextUtf8String(int length) throws IOException {
return readNextString(length, StandardCharsets.UTF_8, 1);
}
/**
* Reads a null terminated string starting at the current index,
* using a specific {@link Charset}, advancing the current index by the length of
* the string that was found.
* <p>
* @param charset {@link Charset}, see {@link StandardCharsets}
* @param charLen number of bytes in each character
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readNextString(Charset charset, int charLen) throws IOException {
byte[] bytes = readUntilNullTerm(currentIndex, charLen);
currentIndex += bytes.length + charLen;
String result = new String(bytes, charset);
return result;
}
/**
* Reads a fixed length string of <code>charCount</code> characters
* starting at the current index, using a specific {@link Charset},
* advancing the current index by the length of the string that was found.
* <p>
* Trailing null terminator characters will be removed. (suitable for reading
* a string from a fixed length field that is padded with trailing null chars)
* <p>
* @param index the index where the string begins
* @param charCount the number of charLen character elements to read
* @param charset {@link Charset}, see {@link StandardCharsets}
* @param charLen number of bytes in each character
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readNextString(int charCount, Charset charset, int charLen) throws IOException {
if (charCount < 0) {
throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount));
}
byte[] bytes = readByteArray(currentIndex, charCount * charLen);
currentIndex += bytes.length;
int strLen = getLengthWithoutTrailingNullTerms(bytes, charLen);
String result = new String(bytes, 0, strLen, charset);
return result;
}
/**
* Reads a byte array of <code>nElements</code>
* starting at the current index and then increments the current
@ -463,141 +516,91 @@ public class BinaryReader {
return l;
}
////////////////////////////////////////////////////////////////////
//--------------------------------------------------------------------------------------------
// String stuff
//--------------------------------------------------------------------------------------------
private byte[] readUntilNullTerm(long index, int charLen) throws IOException {
long maxPos = provider.length() - charLen;
if (index > maxPos) {
throw new EOFException(String.format("Attempted to read string at 0x%x", index));
}
long curPos = index;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
for (; curPos <= maxPos; curPos += charLen) {
byte[] bytes = readByteArray(curPos, charLen);
if (isNullTerm(bytes, 0, charLen)) {
return baos.toByteArray();
}
baos.write(bytes);
}
throw new EOFException(String.format("Unterminated string at 0x%x..0x%x", index, curPos));
}
private boolean isNullTerm(byte[] bytes, int offset, int charLen) {
for (int i = offset; i < offset + charLen; i++) {
if (bytes[i] != 0) {
return false;
}
}
return true;
}
private int getLengthWithoutTrailingNullTerms(byte[] bytes, int charLen) {
int termPos = bytes.length - charLen;
while (termPos >= 0 && isNullTerm(bytes, termPos, charLen)) {
termPos -= charLen;
}
return termPos + charLen;
}
private Charset getUTF16Charset() {
return isBigEndian() ? StandardCharsets.UTF_16BE : StandardCharsets.UTF_16LE;
}
/**
* Reads an Ascii string starting at <code>index</code>, ending
* at the next character outside the range [32..126] or when
* reaching the end of the underlying ByteProvider.
* Reads a null terminated US-ASCII string, starting at specified index, stopping at
* the first null character.
* <p>
* Note: this method no longer trims() the returned String.
* <p>
* Leading and trailing spaces will be trimmed before the string is returned.
*
* @param index the index where the Ascii string begins
* @return the trimmed Ascii string
* @exception IOException if an I/O error occurs
* @param index starting position of the string
* @return US-ASCII string, excluding the trailing null terminator character
* @throws IOException if error reading bytes
*/
public String readAsciiString(long index) throws IOException {
StringBuilder buffer = new StringBuilder();
long len = provider.length();
while (true) {
if (index == len) {
// reached the end of the bytes and found no non-ascii data
break;
}
byte b = provider.readByte(index++);
if ((b >= 32) && (b <= 126)) {
buffer.append((char) b);
}
else {
break;
}
}
return buffer.toString().trim();
return readString(index, StandardCharsets.US_ASCII, 1);
}
/**
* Returns an Ascii string of <code>length</code> bytes
* starting at <code>index</code>. This method does not
* care about null-terminators. Leading and trailing spaces
* will be trimmed before the string is returned.
* @param index the index where the Ascii string begins
* @param length the length of the Ascii string
* @return the trimmed Ascii string
* Reads an fixed length US-ASCII string starting at <code>index</code>.
* <p>
* Trailing null terminator characters will be removed. (suitable for reading
* a string from a fixed length field that is padded with trailing null chars)
* <p>
* Note: this method no longer trims() the returned String.
* <p>
* @param index where the string begins
* @param length number of bytes to read
* @return the US-ASCII string
* @exception IOException if an I/O error occurs
*/
public String readAsciiString(long index, int length) throws IOException {
byte[] readBytes = provider.readBytes(index, length);
String str = new String(readBytes, StandardCharsets.US_ASCII);
return str.trim();
return readString(index, length, StandardCharsets.US_ASCII, 1);
}
/**
* Reads an Ascii string starting at <code>index</code>, ending
* at the next {@code termChar} character byte or when reaching the end of
* the underlying ByteProvider.
* <p>
* Does NOT trim the string.
* <p>
* @param index the index where the Ascii string begins
* @return the Ascii string (excluding the terminating character)
* @exception IOException if an I/O error occurs
*/
public String readTerminatedString(long index, char termChar) throws IOException {
StringBuilder buffer = new StringBuilder();
long len = provider.length();
while (index < len) {
char c = (char) provider.readByte(index++);
if (c == termChar) {
break;
}
buffer.append(c);
}
return buffer.toString();
}
/**
* Reads an Ascii string starting at <code>index</code>, ending
* at the next character that is one of the specified {@code termChars} or when
* reaching the end of the underlying ByteProvider.
* <p>
* Does NOT trim the string.
* <p>
* @param index the index where the Ascii string begins
* @return the Ascii string (excluding the terminating character)
* @exception IOException if an I/O error occurs
*/
public String readTerminatedString(long index, String termChars) throws IOException {
StringBuilder buffer = new StringBuilder();
long len = provider.length();
while (index < len) {
char c = (char) provider.readByte(index++);
if (termChars.indexOf(c) != -1) {
break;
}
buffer.append(c);
}
return buffer.toString();
}
/**
* Reads an fixed length Ascii string starting at <code>index</code>.
* <p>
* Does NOT trim the string.
* <p>
* @param index the index where the Ascii string begins
* @param len number of bytes to read
* @return the Ascii string
* @exception IOException if an I/O error occurs
*/
public String readFixedLenAsciiString(long index, int len) throws IOException {
byte[] bytes = readByteArray(index, len);
return new String(bytes, StandardCharsets.US_ASCII);
}
/**
* Reads a null-terminated UTF-16 Unicode string starting
* at <code>index</code> using the pre-specified
* {@link #setLittleEndian(boolean) endianness}.
* Reads a null-terminated UTF-16 Unicode string starting at <code>index</code> and using
* the pre-specified {@link #setLittleEndian(boolean) endianness}.
* <p>
* The end of the string is denoted by a two-byte (ie. short) <code>null</code> character.
* <p>
* Leading and trailing spaces will be trimmed before the string is returned.
* <p>
* @param index the index where the UTF-16 Unicode string begins
* @return the trimmed UTF-16 Unicode string
* @param index where the UTF-16 Unicode string begins
* @return the UTF-16 Unicode string
* @exception IOException if an I/O error occurs
*/
public String readUnicodeString(long index) throws IOException {
StringBuilder buffer = new StringBuilder();
while (index < length()) {
int ch = readUnsignedShort(index);
if (ch == 0) {
break;
}
buffer.append((char) ch);
index += 2;
}
return buffer.toString().trim();
return readString(index, getUTF16Charset(), 2);
}
/**
@ -605,26 +608,92 @@ public class BinaryReader {
* starting at <code>index</code>, using the pre-specified
* {@link #setLittleEndian(boolean) endianness}.
* <p>
* This method does not care about null-terminators.
* <p>
* Leading and trailing spaces will be trimmed before the string is returned.
* Trailing null terminator characters will be removed. (suitable for reading
* a string from a fixed length field that is padded with trailing null chars)
* <p>
* @param index the index where the UTF-16 Unicode string begins
* @param length the number of UTF-16 character elements to read.
* @return the trimmed UTF-16 Unicode string
* @param charCount the number of UTF-16 character elements to read.
* @return the UTF-16 Unicode string
* @exception IOException if an I/O error occurs
*/
public String readUnicodeString(long index, int length) throws IOException {
StringBuilder buffer = new StringBuilder(length);
long endOffset = index + (length * 2);
while (index < endOffset) {
int ch = readUnsignedShort(index);
buffer.append((char) ch);
index += 2;
public String readUnicodeString(long index, int charCount) throws IOException {
return readString(index, charCount, getUTF16Charset(), 2);
}
return buffer.toString().trim();
/**
* Reads a null-terminated UTF-8 string starting at <code>index</code>.
* <p>
* @param index where the UTF-8 string begins
* @return the string
* @exception IOException if an I/O error occurs
*/
public String readUtf8String(long index) throws IOException {
return readString(index, StandardCharsets.UTF_8, 1);
}
/**
* Reads a fixed length UTF-8 string of <code>length</code> bytes
* starting at <code>index</code>.
* <p>
* Trailing null terminator characters will be removed. (suitable for reading
* a string from a fixed length field that is padded with trailing null chars)
* <p>
* @param index the index where the UTF-8 string begins
* @param length the number of bytes to read
* @return the string
* @exception IOException if an I/O error occurs
*/
public String readUtf8String(long index, int length) throws IOException {
return readString(index, length, StandardCharsets.UTF_8, 1);
}
/**
* Reads a fixed length string of <code>charCount</code> characters
* starting at <code>index</code>, using a specific {@link Charset}.
* <p>
* Trailing null terminator characters will be removed. (suitable for reading
* a string from a fixed length field that is padded with trailing null chars)
* <p>
* @param index the index where the string begins
* @param charCount the number of charLen character elements to read
* @param charset {@link Charset}, see {@link StandardCharsets}
* @param charLen number of bytes in each character
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readString(long index, int charCount, Charset charset, int charLen)
throws IOException {
if (charCount < 0) {
throw new IllegalArgumentException(String.format("Invalid charCount: %d", charCount));
}
byte[] bytes = readByteArray(index, charCount * charLen);
int strLen = getLengthWithoutTrailingNullTerms(bytes, charLen);
String result = new String(bytes, 0, strLen, charset);
return result;
}
/**
* Reads a null-terminated string starting at <code>index</code>, using a specific
* {@link Charset}.
* <p>
* @param index where the string begins
* @param charset {@link Charset}, see {@link StandardCharsets}
* @param charLen number of bytes in each character
* @return the string
* @exception IOException if an I/O error occurs
*/
private String readString(long index, Charset charset, int charLen) throws IOException {
byte[] bytes = readUntilNullTerm(index, charLen);
String result = new String(bytes, charset);
return result;
}
//--------------------------------------------------------------------------------------------
// end String stuff
//--------------------------------------------------------------------------------------------
/**
* Returns the signed BYTE at <code>index</code>.
* @param index the index where the BYTE begins
@ -801,27 +870,6 @@ public class BinaryReader {
return arr;
}
/**
* Returns the Ascii string array of <code>nElements</code>
* starting at <code>index</code>
* @param index the index where the Ascii Strings begin
* @param nElements the number of array elements
* @return the Ascii String array
* @exception IOException if an I/O error occurs
*/
public String[] readAsciiStringArray(long index, int nElements) throws IOException {
if (nElements < 0) {
throw new IOException("Invalid number of elements specified: " + nElements);
}
String[] arr = new String[nElements];
for (int i = 0; i < nElements; ++i) {
String tmp = readAsciiString(index);
arr[i] = tmp;
index += (tmp == null ? 1 : tmp.length());
}
return arr;
}
/**
* Returns the underlying byte provider.
* @return the underlying byte provider

View file

@ -15,17 +15,17 @@
*/
package ghidra.app.util.bin.format.coff;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;
import java.io.InputStream;
import ghidra.app.util.bin.*;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.data.*;
import ghidra.program.model.lang.Language;
import ghidra.util.*;
import ghidra.util.exception.DuplicateNameException;
import ghidra.util.task.TaskMonitor;
@ -73,18 +73,21 @@ public class CoffSectionHeader implements StructConverter {
}
protected void readName(BinaryReader reader) throws IOException {
byte[] nameBytes = reader.readNextByteArray(CoffConstants.SECTION_NAME_LENGTH);
if (nameBytes[0] == 0 && nameBytes[1] == 0 && nameBytes[2] == 0 && nameBytes[3] == 0) {//if 1st 4 bytes are zero, then lookup name in string table
// Name field is fixed length 8 bytes.
// Can be thought of as union { struct { int32 zeroflag; int32 nameindex; }; char[8] name; }
DataConverter dc = reader.isLittleEndian() ? LittleEndianDataConverter.INSTANCE
: BigEndianDataConverter.INSTANCE;
int nameIndex = dc.getInt(nameBytes, 4);//string table index
if (reader.peekNextInt() == 0) {
// if first 4 bytes are 0, this variant is 2 x int32's. Read 2 int32 values.
reader.readNextInt(); // skip the 0
int nameIndex = reader.readNextInt();
int stringTableIndex = _header.getSymbolTablePointer() +
(_header.getSymbolTableEntries() * CoffConstants.SYMBOL_SIZEOF);
s_name = reader.readAsciiString(stringTableIndex + nameIndex);
}
else {
s_name = (new String(nameBytes)).trim();
// Read 8 chars
// TODO: support "/string_table_offset"?
s_name = reader.readNextAsciiString(CoffConstants.SECTION_NAME_LENGTH).trim();
}
}

View file

@ -94,45 +94,41 @@ public class CoffArchiveMemberHeader implements StructConverter {
* consists of a series of terminated ASCII strings.
* The longnames member is the third archive member
*/
String name =
reader.readFixedLenAsciiString(headerOffset + CAMH_NAME_OFF, CAMH_NAME_LEN).trim();
String name = reader.readAsciiString(headerOffset + CAMH_NAME_OFF, CAMH_NAME_LEN).trim();
/*
* The number of seconds since 1/1/1970 UCT
*/
String dateStr =
reader.readFixedLenAsciiString(headerOffset + CAMH_DATE_OFF, CAMH_DATE_LEN).trim();
String dateStr = reader.readAsciiString(headerOffset + CAMH_DATE_OFF, CAMH_DATE_LEN).trim();
/*
* Ascii integer string or blank
*/
String userId =
reader.readFixedLenAsciiString(headerOffset + CAMH_USERID_OFF, CAMH_USERID_LEN).trim();
reader.readAsciiString(headerOffset + CAMH_USERID_OFF, CAMH_USERID_LEN).trim();
/*
* Ascii integer string or blank
*/
String groupId = reader.readFixedLenAsciiString(headerOffset + CAMH_GROUPID_OFF,
CAMH_GROUPID_LEN).trim();
String groupId =
reader.readAsciiString(headerOffset + CAMH_GROUPID_OFF, CAMH_GROUPID_LEN).trim();
/*
* Ascii integer string of ST_MODE value from the C run-time function _wstat
*/
String mode =
reader.readFixedLenAsciiString(headerOffset + CAMH_MODE_OFF, CAMH_MODE_LEN).trim();
String mode = reader.readAsciiString(headerOffset + CAMH_MODE_OFF, CAMH_MODE_LEN).trim();
/*
* Ascii integer string representing the total size of the archive member,
* not including the header. If the name is stored at the beginning of the
* payload (ie. name == "#1/nnn"), the member's effective size needs to be adjusted.
*/
String sizeStr =
reader.readFixedLenAsciiString(headerOffset + CAMH_SIZE_OFF, CAMH_SIZE_LEN).trim();
String sizeStr = reader.readAsciiString(headerOffset + CAMH_SIZE_OFF, CAMH_SIZE_LEN).trim();
/*
* Two byte Ascii string 0x60 0x0a ("'\n")
*/
String endOfHeader = reader.readFixedLenAsciiString(headerOffset + CAMH_EOH_OFF, CAMH_EOH_LEN);
String endOfHeader = reader.readAsciiString(headerOffset + CAMH_EOH_OFF, CAMH_EOH_LEN);
if (!endOfHeader.equals(CAMH_EOH_MAGIC)) {
throw new IOException("Bad EOH magic string: " + endOfHeader);
@ -151,8 +147,7 @@ public class CoffArchiveMemberHeader implements StructConverter {
try {
int nameLen = Integer.parseInt(name.substring(3));
// name seems to be padded with trailing nulls to put payload at aligned offset
name = StringUtilities.trimTrailingNulls(
reader.readFixedLenAsciiString(payloadOffset, nameLen));
name = reader.readAsciiString(payloadOffset, nameLen);
size -= nameLen;
payloadOffset += nameLen;
} catch ( NumberFormatException nfe ) {

View file

@ -15,14 +15,15 @@
*/
package ghidra.app.util.bin.format.coff.archive;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;
import ghidra.app.util.bin.*;
import ghidra.program.model.data.*;
import ghidra.util.exception.DuplicateNameException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* A string table that contains the full filenames of COFF archive members who's actual
* filenames can not fit in the fixed-length name
@ -57,7 +58,7 @@ public final class LongNamesMember implements StructConverter {
reader.setPointerIndex(endOfStrings);
while (tmpOffset < endOfStrings) {
String s = reader.readTerminatedString(tmpOffset, LONGNAME_STR_TERM_CHARS);
String s = readTerminatedString(reader, tmpOffset);
tmpOffset += s.length() + 1;
++_nStrings;
lengths.add(s.length() + 1);
@ -70,15 +71,16 @@ public final class LongNamesMember implements StructConverter {
public String getStringAtOffset(ByteProvider provider, long offset) throws IOException {
BinaryReader reader = new BinaryReader(provider, false);
return reader.readTerminatedString(_fileOffset + offset, LONGNAME_STR_TERM_CHARS);
return readTerminatedString(reader, _fileOffset + offset);
}
@Override
public DataType toDataType() throws DuplicateNameException, IOException {
String name = StructConverterUtil.parseName(LongNamesMember.class);
String uniqueName = name + "_" + _nStrings;
Structure struct = new StructureDataType(uniqueName, 0);
for (int i = 0 ; i < _nStrings ; ++i) {
struct.add(STRING, lengths.get(i), "string["+i+"]", null);
for (int i = 0; i < _nStrings; ++i) {
struct.add(STRING, lengths.get(i), "string[" + i + "]", null);
}
return struct;
}
@ -100,4 +102,18 @@ public final class LongNamesMember implements StructConverter {
}
return nm;
}
private static String readTerminatedString(BinaryReader reader, long index) throws IOException {
StringBuilder buffer = new StringBuilder();
long len = reader.length();
while (index < len) {
char c = (char) reader.readByte(index++);
if (LONGNAME_STR_TERM_CHARS.indexOf(c) != -1) {
break;
}
buffer.append(c);
}
return buffer.toString();
}
}

View file

@ -139,7 +139,7 @@ public class DWARFAttributeFactory {
return DWARFBooleanAttribute.TRUE;
case DW_FORM_string:
return new DWARFStringAttribute(reader.readNextNullTerminatedAsciiString());
return new DWARFStringAttribute(reader.readNextUtf8String());
case DW_FORM_strp:
// Note: we can either read the string from the string section (via. the
// string table) here and put it in a DWARFStringAttribute and hope

View file

@ -15,11 +15,12 @@
*/
package ghidra.app.util.bin.format.elf;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.*;
import java.util.function.Consumer;
import java.io.IOException;
import java.io.RandomAccessFile;
import ghidra.app.util.bin.*;
import ghidra.app.util.bin.format.Writeable;
import ghidra.app.util.bin.format.elf.ElfRelocationTable.TableFormat;
@ -1277,24 +1278,17 @@ public class ElfHeader implements StructConverter, Writeable {
}
preLinkImageBase = -1L;
try {
long ptr = reader.getPointerIndex();
long fileLength = reader.getByteProvider().length();
// not enough bytes
if (fileLength < 8) {
return -1;
}
//reader.setPointerIndex(fileLength - 8);
int readInt = reader.readInt(fileLength - 8);
String readAsciiString = reader.readAsciiString(fileLength - 4, 4);
int preLinkImageBaseInt = reader.readInt(fileLength - 8);
String preLinkMagicString = reader.readAsciiString(fileLength - 4, 4).trim();
if (reader.getPointerIndex() != ptr) {
reader.setPointerIndex(ptr);
}
if (readAsciiString.equals("PRE")) {
preLinkImageBase = (readInt) & 0xffffffffL;
if (preLinkMagicString.equals("PRE")) {
preLinkImageBase = Integer.toUnsignedLong(preLinkImageBaseInt);
}
}
catch (IOException e) {

View file

@ -61,7 +61,7 @@ public class ElfStringTable implements ElfFileSection {
if (stringOffset >= length) {
throw new IOException("String read beyond table bounds");
}
return reader.readAsciiString(fileOffset + stringOffset);
return reader.readUtf8String(fileOffset + stringOffset).trim();
}
catch (IOException e) {
header.logError(

View file

@ -128,7 +128,7 @@ public class DyldChainedImport implements StructConverter {
}
public void initString(BinaryReader reader) throws IOException {
symbolName = reader.readNextNullTerminatedAsciiString();
symbolName = reader.readNextAsciiString();
}
}

View file

@ -15,10 +15,11 @@
*/
package ghidra.app.util.bin.format.macho.commands;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;
import ghidra.app.util.bin.BinaryReader;
import ghidra.app.util.bin.format.macho.MachHeader;
import ghidra.app.util.importer.MessageLog;
@ -42,11 +43,10 @@ public class LinkerOptionCommand extends LoadCommand {
super(reader);
count = reader.readNextInt();
linkerOptions = new ArrayList<>(count);
long readerIndex = reader.getPointerIndex();
BinaryReader stringReader = reader.clone();
for (int i = 0; i < count; i++) {
String str = reader.readTerminatedString(readerIndex, '\0');
String str = stringReader.readNextAsciiString();
linkerOptions.add(str);
readerIndex += str.length() + 1;
}
}

View file

@ -63,7 +63,7 @@ public class ArchitectureDataDirectory extends DataDirectory {
Msg.info(this, "Requesting ASCII string of size "+getSize());
return false;
}
copyright = reader.readAsciiString(ptr, getSize());
copyright = reader.readAsciiString(ptr, getSize()).trim();
return true;
}

View file

@ -15,10 +15,11 @@
*/
package ghidra.app.util.bin.format.pe;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;
import ghidra.app.util.bin.BinaryReader;
import ghidra.app.util.bin.ByteProvider;
import ghidra.app.util.bin.format.pe.debug.DebugDirectoryParser;
@ -116,18 +117,16 @@ public class SeparateDebugHeader implements OffsetValidator {
ptr += SectionHeader.IMAGE_SIZEOF_SECTION_HEADER;
}
long tmp = ptr;
BinaryReader stringReader = reader.clone(ptr);
List<String> exportedNameslist = new ArrayList<>();
while (true) {
String str = reader.readAsciiString(tmp);
if (str == null || str.length() == 0) {
String str = stringReader.readNextAsciiString();
if (str.isEmpty()) {
break;
}
tmp += str.length() + 1;
exportedNameslist.add(str);
}
exportedNames = new String[exportedNameslist.size()];
exportedNameslist.toArray(exportedNames);
exportedNames = exportedNameslist.toArray(String[]::new);
ptr += exportedNamesSize;

View file

@ -220,13 +220,10 @@ public class CliBlobMarshalSpec extends CliBlob {
break;
case NATIVE_TYPE_CUSTOMMARSHALER:
customMarshallerGuidOrTypeName =
reader.readTerminatedString(reader.getPointerIndex(), '\0');
customMarshallerTypeName =
reader.readTerminatedString(reader.getPointerIndex(), '\0');
if (reader.peekNextByte() > 0) {
customMarshallerCookie =
reader.readTerminatedString(reader.getPointerIndex(), '\0');
customMarshallerGuidOrTypeName = reader.readNextUtf8String();
customMarshallerTypeName = reader.readNextUtf8String();
if (reader.peekNextByte() != 0) {
customMarshallerCookie = reader.readNextUtf8String();
}
break;
@ -267,6 +264,7 @@ public class CliBlobMarshalSpec extends CliBlob {
break;
case NATIVE_TYPE_CUSTOMMARSHALER:
// TODO: length of these UTF-8 strings isn't the same as number of characters
struct.add(UTF8, customMarshallerGuidOrTypeName.length(), "", "GUID or Type Name");
struct.add(UTF8, customMarshallerGuidOrTypeName.length(), "", "Type Name");
if (customMarshallerCookie.compareTo("") != 0) {

View file

@ -18,7 +18,6 @@ package ghidra.app.util.bin.format.pe.debug;
import java.io.IOException;
import ghidra.app.util.bin.BinaryReader;
import ghidra.util.Conv;
/**
* <pre>
@ -47,7 +46,7 @@ class DataSym32_new extends DebugSymbol {
byte nameLen = reader.readByte(ptr); ptr += BinaryReader.SIZEOF_BYTE;
this.name = reader.readAsciiString(ptr, Conv.byteToInt(nameLen));
this.name = reader.readAsciiString(ptr, Byte.toUnsignedInt(nameLen));
}
int getTypeIndex() {

View file

@ -139,7 +139,7 @@ public class DebugCOFFSymbol implements StructConverter {
//
int shortVal = reader.readInt(index);
if (shortVal != 0) {
name = reader.readAsciiString(index, NAME_LENGTH);
name = reader.readAsciiString(index, NAME_LENGTH).trim();
index += 8;
}
else {

View file

@ -188,7 +188,7 @@ public class DebugCOFFSymbolAux implements StructConverter {
private String name;
private AuxFile(BinaryReader reader, int index) throws IOException {
name = reader.readAsciiString(index, DebugCOFFSymbol.IMAGE_SIZEOF_SYMBOL);
name = reader.readAsciiString(index, DebugCOFFSymbol.IMAGE_SIZEOF_SYMBOL).trim();
}
String getName() {

View file

@ -15,9 +15,10 @@
*/
package ghidra.app.util.bin.format.pe.debug;
import java.io.IOException;
import java.util.ArrayList;
import java.io.IOException;
import ghidra.app.util.bin.BinaryReader;
import ghidra.util.Conv;
@ -80,8 +81,8 @@ public class OMFSrcModuleFile {
cbName = reader.readByte(index);
index += BinaryReader.SIZEOF_BYTE;
name = reader.readAsciiString(index, cbName);
index += cbName;
name = reader.readAsciiString(index, Byte.toUnsignedInt(cbName));
index += Byte.toUnsignedInt(cbName);
for (int i = 0; i < Conv.shortToInt(cSeg); ++i) {
//OMFSrcModuleLine line = new OMFSrcModuleLine(reader, index);

View file

@ -35,8 +35,8 @@ class S_LDATA32_NEW extends DebugSymbol{
byte nameLen = reader.readByte(ptr); ptr += BinaryReader.SIZEOF_BYTE;
this.name = reader.readAsciiString(ptr, Conv.byteToInt(nameLen));
ptr+=nameLen;
this.name = reader.readAsciiString(ptr, Byte.toUnsignedInt(nameLen));
ptr += Byte.toUnsignedInt(nameLen);
int sizeOfPadding = Conv.shortToInt(length) -
BinaryReader.SIZEOF_SHORT -

View file

@ -33,7 +33,8 @@ class S_OBJNAME extends DebugSymbol {
signature = reader.readInt(ptr); ptr += BinaryReader.SIZEOF_INT;
nameLen = reader.readByte(ptr); ptr += BinaryReader.SIZEOF_BYTE;
name = reader.readAsciiString(ptr, Conv.byteToInt(nameLen)); ptr += nameLen + 1;
name = reader.readAsciiString(ptr, Byte.toUnsignedInt(nameLen));
ptr += Byte.toUnsignedInt(nameLen) + 1;
int sizeOfPadding = BinaryReader.SIZEOF_SHORT+
BinaryReader.SIZEOF_INT+

View file

@ -15,13 +15,13 @@
*/
package ghidra.app.util.bin.format.pe.resource;
import java.io.IOException;
import ghidra.app.util.bin.BinaryReader;
import ghidra.app.util.bin.StructConverter;
import ghidra.program.model.data.*;
import ghidra.util.exception.DuplicateNameException;
import java.io.IOException;
/**
* <pre>
* typedef struct _IMAGE_RESOURCE_DIRECTORY_STRING {
@ -42,12 +42,9 @@ public class ResourceDirectoryString implements StructConverter {
* @param index the index where this resource string begins
*/
public ResourceDirectoryString(BinaryReader reader, int index) throws IOException {
length = reader.readShort(index);
nameString = reader.readAsciiString(index+BinaryReader.SIZEOF_SHORT);
if (nameString.length() != length) {
//todo:
throw new IllegalArgumentException("name string length != length");
}
BinaryReader stringReader = reader.clone(index);
length = stringReader.readNextShort();
nameString = stringReader.readNextAsciiString(Short.toUnsignedInt(length));
}
/**
@ -66,6 +63,7 @@ public class ResourceDirectoryString implements StructConverter {
return nameString;
}
@Override
public DataType toDataType() throws DuplicateNameException, IOException {
StructureDataType struct = new StructureDataType(NAME+"_"+length, 0);
struct.add(WORD, "Length", null);

View file

@ -42,8 +42,9 @@ public class ResourceDirectoryStringU implements StructConverter {
* @param index the index where this resource string begins
*/
public ResourceDirectoryStringU(BinaryReader reader, int index) throws IOException {
length = reader.readShort(index);
nameString = reader.readUnicodeString(index+BinaryReader.SIZEOF_SHORT, length);
BinaryReader stringReader = reader.clone(index);
length = stringReader.readNextShort();
nameString = stringReader.readNextUnicodeString(Short.toUnsignedInt(length));
}
/**
@ -62,6 +63,7 @@ public class ResourceDirectoryStringU implements StructConverter {
return nameString;
}
@Override
public DataType toDataType() throws DuplicateNameException, IOException {
StructureDataType struct = new StructureDataType(NAME+"_"+(length*2), 0);
struct.add(WORD, "Length", null);

View file

@ -18,8 +18,10 @@ package ghidra.app.util.bin;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.io.EOFException;
import java.io.IOException;
import org.junit.Assert;
import org.junit.Test;
import ghidra.util.NumberUtil;
@ -276,33 +278,68 @@ public class BinaryReaderTest {
// UTF-16 Unicode String
// ------------------------------------------------------------------------------------
@Test
public void testReadUnicodeString_LE() throws IOException {
public void test_ReadUnicodeString_fixedlen_LE() throws IOException {
BinaryReader br = br(true, 1, 1, 1, 'A', 0, 'B', 0, 'C', 0, 0, 0x80, 0);
assertEquals("ABC\u8000", br.readUnicodeString(3, 4));
}
@Test
public void testReadUnicodeString_BE() throws IOException {
public void test_ReadUnicodeString__fixedlen_BE() throws IOException {
BinaryReader br = br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C', 0x80, 0, 0);
assertEquals("ABC\u8000", br.readUnicodeString(3, 4));
}
@Test
public void testReadTerminatedUnicodeString_LE() throws IOException {
public void test_ReadUnicodeString_LE() throws IOException {
BinaryReader br = br(true, 1, 1, 1, 'A', 0, 'B', 0, 'C', 0, 0, 0x80, 0, 0);
assertEquals("ABC\u8000", br.readUnicodeString(3));
}
@Test
public void testReadTerminatedUnicodeString_BE() throws IOException {
public void test_ReadUnicodeString_BE() throws IOException {
BinaryReader br = br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C', 0x80, 0, 0, 0);
assertEquals("ABC\u8000", br.readUnicodeString(3));
}
@Test
public void testReadNextUnicodeString_LE() throws IOException {
public void test_ReadUnicodeString_EmptyStr() throws IOException {
BinaryReader br = br(false, 1, 1, 1, 0, 0, 0, 'B', 0, 'C', 0x80, 0, 0, 0);
assertEquals("", br.readUnicodeString(3));
}
@Test(expected = EOFException.class)
public void test_ReadUnicodeString_EofTerminated() throws IOException {
BinaryReader br = br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C');
br.readUnicodeString(3);
}
@Test(expected = EOFException.class)
public void test_ReadUnicodeString_Missing_Full_Terminator() throws IOException {
BinaryReader br = br(false, 0, 'A', 0, 'B', 0, 'C', 0);
br.readUnicodeString(0);
}
@Test(expected = EOFException.class)
public void test_ReadUnicodeString_AtEof() throws IOException {
BinaryReader br = br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C');
br.readUnicodeString(9);
}
@Test(expected = EOFException.class)
public void test_ReadUnicodeString_PastEof() throws IOException {
BinaryReader br = br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C');
br.readUnicodeString(10);
}
@Test
public void test_ReadNextUnicodeString_LE() throws IOException {
BinaryReader br =
br(true, 1, 1, 1, 'A', 0, 'B', 0, 'C', 0, 0, 0x80, 0, 0, /* magic flag value */ 42);
br.setPointerIndex(3);
@ -311,7 +348,7 @@ public class BinaryReaderTest {
}
@Test
public void testReadNextUnicodeString_BE() throws IOException {
public void test_ReadNextUnicodeString_BE() throws IOException {
BinaryReader br =
br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C', 0x80, 0, 0, 0, /* magic flag value */ 42);
br.setPointerIndex(3);
@ -319,4 +356,194 @@ public class BinaryReaderTest {
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextUnicodeString_WithTrailingWhitespace() throws IOException {
BinaryReader br =
br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C', 0, ' ', 0, 0, /* magic flag value */ 42);
br.setPointerIndex(3);
assertEquals("ABC ", br.readNextUnicodeString());
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextUnicodeString_Empty() throws IOException {
BinaryReader br =
br(false, 1, 1, 1, 0, 0, 0, 'B', 0, 'C', 0, 0, /* magic flag value */ 42);
br.setPointerIndex(3);
assertEquals("", br.readNextUnicodeString());
assertEquals("BC", br.readNextUnicodeString());
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextUnicodeString_Fixedlen() throws IOException {
BinaryReader br =
br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C', 0, ' ', /* magic flag value */ 42);
br.setPointerIndex(3);
assertEquals("ABC ", br.readNextUnicodeString(4));
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextUnicodeString_Fixedlen_Nullterms() throws IOException {
BinaryReader br =
br(false, 1, 1, 1, 0, 'A', 0, 'B', 0, 'C', 0, 0, 0, 0, /* magic flag value */ 42);
br.setPointerIndex(3);
assertEquals("ABC", br.readNextUnicodeString(5));
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextUnicodeString_Fixedlen_Emptystr() throws IOException {
BinaryReader br =
br(false, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* magic flag value */ 42);
br.setPointerIndex(3);
assertEquals("", br.readNextUnicodeString(5));
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextUnicodeString_NullTerm_AtEof() throws IOException {
BinaryReader br = br(false, 0, 'A', 0, 'B', 0, 'C', 0, 0);
assertEquals("ABC", br.readNextUnicodeString());
try {
br.readNextUnicodeString();
Assert.fail();
}
catch (IOException e) {
// good
}
}
@Test
public void test_ReadNextUnicodeString_NullTerm_ToEof() throws IOException {
BinaryReader br = br(false, 0, 'A', 0, 'B', 0, 'C');
try {
br.readNextUnicodeString();
Assert.fail();
}
catch (EOFException e) {
// good
}
}
// ------------------------------------------------------------------------------------
// 'Ascii' Strings
// ------------------------------------------------------------------------------------
@Test
public void test_ReadAsciiString_NullTerm() throws IOException {
BinaryReader br = br(true, 'A', 'B', 'C', 0, /* magic flag */ 42);
assertEquals("ABC", br.readAsciiString(0));
}
@Test(expected = EOFException.class)
public void test_ReadAsciiString_Not_Terminated() throws IOException {
BinaryReader br = br(true, 'A', 'B', 'C');
assertEquals("", br.readAsciiString(0));
}
@Test(expected = EOFException.class)
public void test_ReadAsciiString_AtEof() throws IOException {
BinaryReader br = br(true, 'A', 'B', 'C');
assertEquals("", br.readAsciiString(3));
}
@Test(expected = EOFException.class)
public void test_ReadAsciiString_PastEof() throws IOException {
BinaryReader br = br(true, 'A', 'B', 'C');
br.readAsciiString(4);
}
@Test
public void test_ReadAsciiString_NotTrimed() throws IOException {
BinaryReader br = br(true, ' ', 'A', 'B', 'C', ' ', 0, /* magic flag */ 42);
assertEquals(" ABC ", br.readAsciiString(0));
}
@Test
public void test_ReadAsciiString_tab_terminates() throws IOException {
// tests the readAsciiString() doesn't terminate on a '\t' tab char
BinaryReader br = br(true, 'A', 'B', 'C', '\t', 'D', 'E', 'F', 0, /* magic flag */ 42);
assertEquals("ABC\tDEF", br.readAsciiString(0));
}
@Test
public void test_ReadNextAsciiString() throws IOException {
BinaryReader br = br(true, ' ', 'A', 'B', 'C', ' ', 0, /* magic flag */ 42);
assertEquals(" ABC ", br.readNextAsciiString());
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextAsciiString_EmtpyStr() throws IOException {
BinaryReader br = br(true, ' ', 'A', 'B', 'C', ' ', 0, /* magic flag */ 42);
br.setPointerIndex(5);
assertEquals("", br.readNextAsciiString());
assertEquals(42, br.readNextUnsignedByte());
}
@Test
public void test_ReadNextAsciiString_AtEof() throws IOException {
BinaryReader br = br(true, 'A', 'B', 'C', 0);
assertEquals("ABC", br.readNextAsciiString());
try {
br.readNextAsciiString();
Assert.fail();
}
catch (IOException e) {
// good
}
}
@Test
public void test_ReadNextAsciiString_FixedLength() throws IOException {
BinaryReader br = br(true, 'A', 'B', 'C', 'D', 'E', 'F', 0, /* magic flag */ 42);
assertEquals("ABC", br.readNextAsciiString(3));
assertEquals("DEF", br.readNextAsciiString(4));
assertEquals(42, br.readNextByte());
}
@Test
public void test_ReadNextAsciiString_FixedLength_With_whitespace() throws IOException {
BinaryReader br = br(true, 'A', 'B', ' ', '\0', 'D', 'E', 'F', 0, /* magic flag */ 42);
assertEquals("AB ", br.readNextAsciiString(4));
assertEquals("DEF", br.readNextAsciiString());
assertEquals(42, br.readNextByte());
}
@Test
public void test_ReadNextAsciiString_FixedLength_EmptyStr() throws IOException {
BinaryReader br = br(true, 0, 0, 0, 'D', 'E', 'F', 0, /* magic flag */ 42);
assertEquals("", br.readNextAsciiString(3));
assertEquals("DEF", br.readNextAsciiString());
assertEquals(42, br.readNextByte());
}
@Test(expected = EOFException.class)
public void test_ReadAsciiString_ToEof() throws IOException {
BinaryReader br = br(true, 'A', 'B', 'C');
br.readAsciiString(0);
}
@Test
public void test_ReadUtf8String() throws IOException {
// tests variable length decoding
BinaryReader br = br(true, -22, -87, -107, -61, -65, 0);
assertEquals("\uaa55\u00ff", br.readUtf8String(0));
}
@Test
public void test_ReadNextUtf8String() throws IOException {
// tests variable length decoding
BinaryReader br = br(true, -22, -87, -107, -61, -65, 0, -22, -87, -107, 0);
assertEquals("\uaa55\u00ff", br.readNextUtf8String());
assertEquals("\uaa55", br.readNextUtf8String());
}
@Test
public void test_ReadUtf8String_bad_data() throws IOException {
// tests variable length decoding
BinaryReader br = br(true, -22, -87, 'A', 0);
assertEquals("\ufffdA" /* unicode replacement char, 'A'*/, br.readUtf8String(0));
}
}

View file

@ -15,12 +15,13 @@
*/
package ghidra.app.util.bin;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import java.util.Arrays;
import java.io.File;
import java.io.IOException;
import java.nio.file.AccessMode;
import java.util.Arrays;
import org.junit.Test;
@ -50,7 +51,7 @@ public class FileByteProviderTest extends AbstractGenericTest {
@Test
public void testSmallRead() throws IOException {
File file1 = createTempFileForTest("file1");
FileUtilities.writeStringToFile(file1, "testing\nsecond line");
FileUtilities.writeStringToFile(file1, "testing\0second line\0");
try (FileByteProvider fbp = new FileByteProvider(file1, null, AccessMode.READ)) {
BinaryReader br = new BinaryReader(fbp, true);
assertEquals("testing", br.readAsciiString(0));