1
0
Fork 0
mirror of https://github.com/codedread/bitjs synced 2025-10-04 10:09:16 +02:00

Fix issue #19: Support data descriptors in unzip.

This commit is contained in:
codedread 2020-04-30 23:54:21 -07:00
parent 97fcf8d630
commit 2db5b3b906
6 changed files with 123 additions and 54 deletions

View file

@ -31,7 +31,7 @@ let allLocalFiles = null;
let logToConsole = false; let logToConsole = false;
// Progress variables. // Progress variables.
let currentFilename = ""; let currentFilename = '';
let currentFileNumber = 0; let currentFileNumber = 0;
let currentBytesUnarchivedInFile = 0; let currentBytesUnarchivedInFile = 0;
let currentBytesUnarchived = 0; let currentBytesUnarchived = 0;
@ -64,6 +64,7 @@ const zCentralFileHeaderSignature = 0x02014b50;
const zDigitalSignatureSignature = 0x05054b50; const zDigitalSignatureSignature = 0x05054b50;
const zEndOfCentralDirSignature = 0x06054b50; const zEndOfCentralDirSignature = 0x06054b50;
const zEndOfCentralDirLocatorSignature = 0x07064b50; const zEndOfCentralDirLocatorSignature = 0x07064b50;
const zDataDescriptorSignature = 0x08074b50;
// mask for getting the Nth bit (zero-based) // mask for getting the Nth bit (zero-based)
const BIT = [0x01, 0x02, 0x04, 0x08, const BIT = [0x01, 0x02, 0x04, 0x08,
@ -99,48 +100,95 @@ class ZipLocalFile {
this.extraField = null; this.extraField = null;
if (this.extraFieldLength > 0) { if (this.extraFieldLength > 0) {
this.extraField = bstream.readString(this.extraFieldLength); this.extraField = bstream.readString(this.extraFieldLength);
//info(" extra field=" + this.extraField);
} }
// read in the compressed data // Data descriptor is present if this bit is set, compressed size should be zero.
this.hasDataDescriptor = ((this.generalPurpose & BIT[3]) !== 0);
if (this.hasDataDescriptor &&
(this.crc32 !== 0 || this.compressedSize !== 0 || this.uncompressedSize !== 0)) {
err('Zip local file with a data descriptor and non-zero crc/compressedSize/uncompressedSize');
}
// Read in the compressed data if we have no data descriptor.
this.fileData = null; this.fileData = null;
if (this.compressedSize > 0) { let descriptorSize = 0;
this.fileData = new Uint8Array(bstream.readBytes(this.compressedSize)); if (this.hasDataDescriptor) {
// Hold on to a reference to the bstream, since that is where the compressed file data begins.
let savedBstream = bstream.tee();
// Seek ahead one byte at a time, looking for the next local file header signature or the end
// of all local files.
let foundDataDescriptor = false;
let numBytesSeeked = 0;
while (!foundDataDescriptor) {
while (bstream.peekNumber(4) !== zLocalFileHeaderSignature &&
bstream.peekNumber(4) !== zArchiveExtraDataSignature &&
bstream.peekNumber(4) !== zCentralFileHeaderSignature) {
numBytesSeeked++;
bstream.readBytes(1);
} }
// TODO: deal with data descriptor if present (we currently assume no data descriptor!) // Copy all the read bytes into a buffer and examine the last 16 bytes to see if they are the
// "This descriptor exists only if bit 3 of the general purpose bit flag is set" // data descriptor.
// But how do you figure out how big the file data is if you don't know the compressedSize let bufferedByteArr = savedBstream.peekBytes(numBytesSeeked);
// from the header?!? const descriptorStream = new bitjs.io.ByteStream(bufferedByteArr.buffer, numBytesSeeked - 16, 16);
if ((this.generalPurpose & BIT[3]) != 0) { const maybeDescriptorSig = descriptorStream.readNumber(4);
this.crc32 = bstream.readNumber(4); const maybeCrc32 = descriptorStream.readNumber(4);
this.compressedSize = bstream.readNumber(4); const maybeCompressedSize = descriptorStream.readNumber(4);
this.uncompressedSize = bstream.readNumber(4); const maybeUncompressedSize = descriptorStream.readNumber(4);
// From the PKZIP App Note: "The signature value 0x08074b50 is also used by some ZIP
// implementations as a marker for the Data Descriptor record".
if (maybeDescriptorSig === zDataDescriptorSignature) {
if (maybeCompressedSize === (numBytesSeeked - 16)) {
foundDataDescriptor = true;
descriptorSize = 16;
} }
} else if (maybeCompressedSize === (numBytesSeeked - 12)) {
foundDataDescriptor = true;
descriptorSize = 12;
}
if (foundDataDescriptor) {
this.crc32 = maybeCrc32;
this.compressedSize = maybeCompressedSize;
this.uncompressedSize = maybeUncompressedSize;
}
}
bstream = savedBstream;
}
this.fileData = new Uint8Array(bstream.readBytes(this.compressedSize));
bstream.readBytes(descriptorSize);
// Now that we have all the bytes for this file, we can print out some information. // Now that we have all the bytes for this file, we can print out some information.
if (logToConsole) { if (logToConsole) {
info("Zip Local File Header:"); info('Zip Local File Header:');
info(" version=" + this.version); info(` version=${this.version}`);
info(" general purpose=" + this.generalPurpose); info(` general purpose=${this.generalPurpose}`);
info(" compression method=" + this.compressionMethod); info(` compression method=${this.compressionMethod}`);
info(" last mod file time=" + this.lastModFileTime); info(` last mod file time=${this.lastModFileTime}`);
info(" last mod file date=" + this.lastModFileDate); info(` last mod file date=${this.lastModFileDate}`);
info(" crc32=" + this.crc32); info(` crc32=${this.crc32}`);
info(" compressed size=" + this.compressedSize); info(` compressed size=${this.compressedSize}`);
info(" uncompressed size=" + this.uncompressedSize); info(` uncompressed size=${this.uncompressedSize}`);
info(" file name length=" + this.fileNameLength); info(` file name length=${this.fileNameLength}`);
info(" extra field length=" + this.extraFieldLength); info(` extra field length=${this.extraFieldLength}`);
info(" filename = '" + this.filename + "'"); info(` filename = '${this.filename}'`);
info(` hasDataDescriptor = ${this.hasDataDescriptor}`);
} }
} }
// determine what kind of compressed data we have and decompress // determine what kind of compressed data we have and decompress
unzip() { unzip() {
if (!this.fileData) {
err('unzip() called on a file with out compressed file data');
}
// Zip Version 1.0, no compression (store only) // Zip Version 1.0, no compression (store only)
if (this.compressionMethod == 0) { if (this.compressionMethod == 0) {
if (logToConsole) { if (logToConsole) {
info("ZIP v" + this.version + ", store only: " + this.filename + " (" + this.compressedSize + " bytes)"); info(`ZIP v${this.version}, store only: ${this.filename} (${this.compressedSize} bytes)`);
} }
currentBytesUnarchivedInFile = this.compressedSize; currentBytesUnarchivedInFile = this.compressedSize;
currentBytesUnarchived += this.compressedSize; currentBytesUnarchived += this.compressedSize;
@ -148,12 +196,14 @@ class ZipLocalFile {
// version == 20, compression method == 8 (DEFLATE) // version == 20, compression method == 8 (DEFLATE)
else if (this.compressionMethod == 8) { else if (this.compressionMethod == 8) {
if (logToConsole) { if (logToConsole) {
info("ZIP v2.0, DEFLATE: " + this.filename + " (" + this.compressedSize + " bytes)"); info(`ZIP v2.0, DEFLATE: ${this.filename} (${this.compressedSize} bytes)`);
} }
this.fileData = inflate(this.fileData, this.uncompressedSize); this.fileData = inflate(this.fileData, this.uncompressedSize);
} }
else { else {
err("UNSUPPORTED VERSION/FORMAT: ZIP v" + this.version + ", compression method=" + this.compressionMethod + ": " + this.filename + " (" + this.compressedSize + " bytes)"); err(`UNSUPPORTED VERSION/FORMAT: ZIP v${this.version}, ` +
`compression method=${this.compressionMethod}: ` +
`${this.filename} (${this.compressedSize} bytes)`);
this.fileData = null; this.fileData = null;
} }
} }
@ -165,7 +215,7 @@ class ZipLocalFile {
function getHuffmanCodes(bitLengths) { function getHuffmanCodes(bitLengths) {
// ensure bitLengths is an array containing at least one element // ensure bitLengths is an array containing at least one element
if (typeof bitLengths != typeof [] || bitLengths.length < 1) { if (typeof bitLengths != typeof [] || bitLengths.length < 1) {
err("Error! getHuffmanCodes() called with an invalid array"); err('Error! getHuffmanCodes() called with an invalid array');
return null; return null;
} }
@ -179,7 +229,7 @@ function getHuffmanCodes(bitLengths) {
const length = bitLengths[i]; const length = bitLengths[i];
// test to ensure each bit length is a positive, non-zero number // test to ensure each bit length is a positive, non-zero number
if (typeof length != typeof 1 || length < 0) { if (typeof length != typeof 1 || length < 0) {
err("bitLengths contained an invalid number in getHuffmanCodes(): " + length + " of type " + (typeof length)); err(`bitLengths contained an invalid number in getHuffmanCodes(): ${length} of type ${typeof length}`);
return null; return null;
} }
// increment the appropriate bitlength count // increment the appropriate bitlength count
@ -281,8 +331,8 @@ function decodeSymbol(bstream, hcTable) {
break; break;
} }
if (len > hcTable.maxLength) { if (len > hcTable.maxLength) {
err("Bit stream out of sync, didn't find a Huffman Code, length was " + len + err(`Bit stream out of sync, didn't find a Huffman Code, length was ${len} ` +
" and table only max code length of " + hcTable.maxLength); `and table only max code length of ${hcTable.maxLength}`);
break; break;
} }
} }
@ -508,7 +558,7 @@ function inflate(compressedData, numDecompressedBytes) {
const hcDistanceTable = getHuffmanCodes(distanceCodeLengths); const hcDistanceTable = getHuffmanCodes(distanceCodeLengths);
blockSize = inflateBlockData(bstream, hcLiteralTable, hcDistanceTable, buffer); blockSize = inflateBlockData(bstream, hcLiteralTable, hcDistanceTable, buffer);
} else { // error } else { // error
err("Error! Encountered deflate block of type 3"); err('Error! Encountered deflate block of type 3');
return null; return null;
} }
@ -523,11 +573,13 @@ function inflate(compressedData, numDecompressedBytes) {
return buffer.data; return buffer.data;
} }
function unzip() { function archiveUnzip() {
let bstream = bytestream.tee(); let bstream = bytestream.tee();
// loop until we don't see any more local files // loop until we don't see any more local files or we find a data descriptor.
while (bstream.peekNumber(4) == zLocalFileHeaderSignature) { while (bstream.peekNumber(4) == zLocalFileHeaderSignature) {
// Note that this could throw an error if the bstream overflows, which is caught in the
// message handler.
const oneLocalFile = new ZipLocalFile(bstream); const oneLocalFile = new ZipLocalFile(bstream);
// this should strip out directories/folders // this should strip out directories/folders
if (oneLocalFile && oneLocalFile.uncompressedSize > 0 && oneLocalFile.fileData) { if (oneLocalFile && oneLocalFile.uncompressedSize > 0 && oneLocalFile.fileData) {
@ -557,7 +609,7 @@ function unzip() {
// archive extra data record // archive extra data record
if (bstream.peekNumber(4) == zArchiveExtraDataSignature) { if (bstream.peekNumber(4) == zArchiveExtraDataSignature) {
if (logToConsole) { if (logToConsole) {
info(" Found an Archive Extra Data Signature"); info(' Found an Archive Extra Data Signature');
} }
// skipping this record for now // skipping this record for now
@ -568,9 +620,9 @@ function unzip() {
// central directory structure // central directory structure
// TODO: handle the rest of the structures (Zip64 stuff) // TODO: handle the rest of the structures (Zip64 stuff)
if (bytestream.peekNumber(4) == zCentralFileHeaderSignature) { if (bstream.peekNumber(4) == zCentralFileHeaderSignature) {
if (logToConsole) { if (logToConsole) {
info(" Found a Central File Header"); info(' Found a Central File Header');
} }
// read all file headers // read all file headers
@ -609,7 +661,7 @@ function unzip() {
// digital signature // digital signature
if (bstream.peekNumber(4) == zDigitalSignatureSignature) { if (bstream.peekNumber(4) == zDigitalSignatureSignature) {
if (logToConsole) { if (logToConsole) {
info(" Found a Digital Signature"); info(' Found a Digital Signature');
} }
bstream.readNumber(4); bstream.readNumber(4);
@ -661,7 +713,7 @@ onmessage = function (event) {
} }
if (unarchiveState === UnarchiveState.NOT_STARTED) { if (unarchiveState === UnarchiveState.NOT_STARTED) {
currentFilename = ""; currentFilename = '';
currentFileNumber = 0; currentFileNumber = 0;
currentBytesUnarchivedInFile = 0; currentBytesUnarchivedInFile = 0;
currentBytesUnarchived = 0; currentBytesUnarchived = 0;
@ -680,7 +732,7 @@ onmessage = function (event) {
if (unarchiveState === UnarchiveState.UNARCHIVING || if (unarchiveState === UnarchiveState.UNARCHIVING ||
unarchiveState === UnarchiveState.WAITING) { unarchiveState === UnarchiveState.WAITING) {
try { try {
unzip(); archiveUnzip();
} catch (e) { } catch (e) {
if (typeof e === 'string' && e.startsWith('Error! Overflowed')) { if (typeof e === 'string' && e.startsWith('Error! Overflowed')) {
// Overrun the buffer. // Overrun the buffer.

View file

@ -11,6 +11,7 @@ import { assertEquals, runTests } from './muther.js';
const testInputs = { const testInputs = {
'testUnzipDeflate': 'archive-testfiles/test-unzip-deflate.json', 'testUnzipDeflate': 'archive-testfiles/test-unzip-deflate.json',
'testUnzipDescriptor': 'archive-testfiles/test-unzip-descriptor.json',
'testUnzipStore': 'archive-testfiles/test-unzip-store.json', 'testUnzipStore': 'archive-testfiles/test-unzip-store.json',
'testUnrarM1': 'archive-testfiles/test-unrar-m1.json', 'testUnrarM1': 'archive-testfiles/test-unrar-m1.json',
'testUnrarM2': 'archive-testfiles/test-unrar-m2.json', 'testUnrarM2': 'archive-testfiles/test-unrar-m2.json',
@ -23,6 +24,9 @@ const testInputs = {
'testUntar': 'archive-testfiles/test-untar-1.json', 'testUntar': 'archive-testfiles/test-untar-1.json',
}; };
// TODO: It is an error for the Unarchiver worker not to terminate or send a FINISH event.
// We need to be able to test that here.
const testSuite = { tests: {} }; const testSuite = { tests: {} };
for (let testName in testInputs) { for (let testName in testInputs) {
const testInputFilename = testInputs[testName]; const testInputFilename = testInputs[testName];
@ -37,7 +41,9 @@ for (let testName in testInputs) {
atob(testFile.archivedFile).split(',').map(str => parseInt(str))); atob(testFile.archivedFile).split(',').map(str => parseInt(str)));
const unarchivedFile = new Uint8Array( const unarchivedFile = new Uint8Array(
atob(testFile.unarchivedFile).split(',').map(str => parseInt(str))); atob(testFile.unarchivedFile).split(',').map(str => parseInt(str)));
const unarchiver = getUnarchiver(archivedFile.buffer, '../'); const unarchiver = getUnarchiver(archivedFile.buffer, {
pathToBitJS: '../',
});
unarchiver.addEventListener(UnarchiveEventType.EXTRACT, evt => { unarchiver.addEventListener(UnarchiveEventType.EXTRACT, evt => {
const theUnarchivedFile = evt.unarchivedFile.fileData; const theUnarchivedFile = evt.unarchivedFile.fileData;
try { try {

View file

@ -0,0 +1,5 @@
# Test files for unarchivers.
1. Create a zip or rar file with just one file inside it.
2. Use test-uploader.html and choose the archived file and the unarchived file.
3. Paste that jSON output into a test json file.

View file

@ -0,0 +1,4 @@
window.archiveTestFile = {
"archivedFile": "ODAsNzUsMyw0LDIwLDAsOCwwLDgsMCwyNiw0MCwzNyw3MSwwLDAsMCwwLDAsMCwwLDAsNjQsMSwwLDAsMTAsMCwyOCwwLDExNSwxMDEsOTksMTExLDExMCwxMDAsNDYsMTE2LDEyMCwxMTYsODUsODQsOSwwLDMsMTE2LDIxNywyMzQsODUsOTUsMTkwLDE3MSw5NCwxMTcsMTIwLDExLDAsMSw0LDI0MiwxODIsMSwwLDQsODMsOTUsMSwwLDExLDExOCwxMTcsMjQ2LDI0NywxMTUsODEsOCwzOCwxNDMsMjI2LDIyOSwzNCw4MywyMjcsMTc2LDIwOSwxNSwwLDgwLDc1LDcsOCwyMjQsOCwyLDkwLDE5LDAsMCwwLDY0LDEsMCwwLDgwLDc1LDEsMiwzMCwzLDIwLDAsOCwwLDgsMCwyNiw0MCwzNyw3MSwyMjQsOCwyLDkwLDE5LDAsMCwwLDY0LDEsMCwwLDEwLDAsMjQsMCwwLDAsMCwwLDEsMCwwLDAsMTY0LDEyOSwwLDAsMCwwLDExNSwxMDEsOTksMTExLDExMCwxMDAsNDYsMTE2LDEyMCwxMTYsODUsODQsNSwwLDMsMTE2LDIxNywyMzQsODUsMTE3LDEyMCwxMSwwLDEsNCwyNDIsMTgyLDEsMCw0LDgzLDk1LDEsMCw4MCw3NSw1LDYsMCwwLDAsMCwxLDAsMSwwLDgwLDAsMCwwLDEwMywwLDAsMCwwLDA=",
"unarchivedFile": "ODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTA="
}

View file

@ -12,5 +12,7 @@
<input id="unarchive-uploader" type="file" disabled> <input id="unarchive-uploader" type="file" disabled>
<span id="unarchive-uploader-label">Select unarchived file</span> <span id="unarchive-uploader-label">Select unarchived file</span>
</div> </div>
<div id='json' style='white-space: pre'>
</div>
</body> </body>
</html> </html>

View file

@ -62,7 +62,7 @@ function output() {
json += ' "archivedFile": "' + archivedFileAsText + '",\n'; json += ' "archivedFile": "' + archivedFileAsText + '",\n';
json += ' "unarchivedFile": "' + unarchivedFileAsText + '"\n'; json += ' "unarchivedFile": "' + unarchivedFileAsText + '"\n';
json += '}'; json += '}';
window.open('data:application/json;utf8,' + json); document.getElementById('json').textContent = json;
} }
// To turn the base64 string back into bytes: // To turn the base64 string back into bytes: