diff --git a/archive/unzip.js b/archive/unzip.js index 697167b..3f63944 100644 --- a/archive/unzip.js +++ b/archive/unzip.js @@ -31,7 +31,7 @@ let allLocalFiles = null; let logToConsole = false; // Progress variables. -let currentFilename = ""; +let currentFilename = ''; let currentFileNumber = 0; let currentBytesUnarchivedInFile = 0; let currentBytesUnarchived = 0; @@ -64,6 +64,7 @@ const zCentralFileHeaderSignature = 0x02014b50; const zDigitalSignatureSignature = 0x05054b50; const zEndOfCentralDirSignature = 0x06054b50; const zEndOfCentralDirLocatorSignature = 0x07064b50; +const zDataDescriptorSignature = 0x08074b50; // mask for getting the Nth bit (zero-based) const BIT = [0x01, 0x02, 0x04, 0x08, @@ -99,48 +100,95 @@ class ZipLocalFile { this.extraField = null; if (this.extraFieldLength > 0) { this.extraField = bstream.readString(this.extraFieldLength); - //info(" extra field=" + this.extraField); } - // read in the compressed data + // Data descriptor is present if this bit is set, compressed size should be zero. + this.hasDataDescriptor = ((this.generalPurpose & BIT[3]) !== 0); + if (this.hasDataDescriptor && + (this.crc32 !== 0 || this.compressedSize !== 0 || this.uncompressedSize !== 0)) { + err('Zip local file with a data descriptor and non-zero crc/compressedSize/uncompressedSize'); + } + + // Read in the compressed data if we have no data descriptor. this.fileData = null; - if (this.compressedSize > 0) { - this.fileData = new Uint8Array(bstream.readBytes(this.compressedSize)); + let descriptorSize = 0; + if (this.hasDataDescriptor) { + // Hold on to a reference to the bstream, since that is where the compressed file data begins. + let savedBstream = bstream.tee(); + + // Seek ahead one byte at a time, looking for the next local file header signature or the end + // of all local files. + let foundDataDescriptor = false; + let numBytesSeeked = 0; + while (!foundDataDescriptor) { + while (bstream.peekNumber(4) !== zLocalFileHeaderSignature && + bstream.peekNumber(4) !== zArchiveExtraDataSignature && + bstream.peekNumber(4) !== zCentralFileHeaderSignature) { + numBytesSeeked++; + bstream.readBytes(1); + } + + // Copy all the read bytes into a buffer and examine the last 16 bytes to see if they are the + // data descriptor. + let bufferedByteArr = savedBstream.peekBytes(numBytesSeeked); + const descriptorStream = new bitjs.io.ByteStream(bufferedByteArr.buffer, numBytesSeeked - 16, 16); + const maybeDescriptorSig = descriptorStream.readNumber(4); + const maybeCrc32 = descriptorStream.readNumber(4); + const maybeCompressedSize = descriptorStream.readNumber(4); + const maybeUncompressedSize = descriptorStream.readNumber(4); + + // From the PKZIP App Note: "The signature value 0x08074b50 is also used by some ZIP + // implementations as a marker for the Data Descriptor record". + if (maybeDescriptorSig === zDataDescriptorSignature) { + if (maybeCompressedSize === (numBytesSeeked - 16)) { + foundDataDescriptor = true; + descriptorSize = 16; + } + } else if (maybeCompressedSize === (numBytesSeeked - 12)) { + foundDataDescriptor = true; + descriptorSize = 12; + } + + if (foundDataDescriptor) { + this.crc32 = maybeCrc32; + this.compressedSize = maybeCompressedSize; + this.uncompressedSize = maybeUncompressedSize; + } + } + bstream = savedBstream; } - // TODO: deal with data descriptor if present (we currently assume no data descriptor!) - // "This descriptor exists only if bit 3 of the general purpose bit flag is set" - // But how do you figure out how big the file data is if you don't know the compressedSize - // from the header?!? - if ((this.generalPurpose & BIT[3]) != 0) { - this.crc32 = bstream.readNumber(4); - this.compressedSize = bstream.readNumber(4); - this.uncompressedSize = bstream.readNumber(4); - } + this.fileData = new Uint8Array(bstream.readBytes(this.compressedSize)); + bstream.readBytes(descriptorSize); // Now that we have all the bytes for this file, we can print out some information. if (logToConsole) { - info("Zip Local File Header:"); - info(" version=" + this.version); - info(" general purpose=" + this.generalPurpose); - info(" compression method=" + this.compressionMethod); - info(" last mod file time=" + this.lastModFileTime); - info(" last mod file date=" + this.lastModFileDate); - info(" crc32=" + this.crc32); - info(" compressed size=" + this.compressedSize); - info(" uncompressed size=" + this.uncompressedSize); - info(" file name length=" + this.fileNameLength); - info(" extra field length=" + this.extraFieldLength); - info(" filename = '" + this.filename + "'"); + info('Zip Local File Header:'); + info(` version=${this.version}`); + info(` general purpose=${this.generalPurpose}`); + info(` compression method=${this.compressionMethod}`); + info(` last mod file time=${this.lastModFileTime}`); + info(` last mod file date=${this.lastModFileDate}`); + info(` crc32=${this.crc32}`); + info(` compressed size=${this.compressedSize}`); + info(` uncompressed size=${this.uncompressedSize}`); + info(` file name length=${this.fileNameLength}`); + info(` extra field length=${this.extraFieldLength}`); + info(` filename = '${this.filename}'`); + info(` hasDataDescriptor = ${this.hasDataDescriptor}`); } } // determine what kind of compressed data we have and decompress unzip() { + if (!this.fileData) { + err('unzip() called on a file with out compressed file data'); + } + // Zip Version 1.0, no compression (store only) if (this.compressionMethod == 0) { if (logToConsole) { - info("ZIP v" + this.version + ", store only: " + this.filename + " (" + this.compressedSize + " bytes)"); + info(`ZIP v${this.version}, store only: ${this.filename} (${this.compressedSize} bytes)`); } currentBytesUnarchivedInFile = this.compressedSize; currentBytesUnarchived += this.compressedSize; @@ -148,12 +196,14 @@ class ZipLocalFile { // version == 20, compression method == 8 (DEFLATE) else if (this.compressionMethod == 8) { if (logToConsole) { - info("ZIP v2.0, DEFLATE: " + this.filename + " (" + this.compressedSize + " bytes)"); + info(`ZIP v2.0, DEFLATE: ${this.filename} (${this.compressedSize} bytes)`); } this.fileData = inflate(this.fileData, this.uncompressedSize); } else { - err("UNSUPPORTED VERSION/FORMAT: ZIP v" + this.version + ", compression method=" + this.compressionMethod + ": " + this.filename + " (" + this.compressedSize + " bytes)"); + err(`UNSUPPORTED VERSION/FORMAT: ZIP v${this.version}, ` + + `compression method=${this.compressionMethod}: ` + + `${this.filename} (${this.compressedSize} bytes)`); this.fileData = null; } } @@ -165,7 +215,7 @@ class ZipLocalFile { function getHuffmanCodes(bitLengths) { // ensure bitLengths is an array containing at least one element if (typeof bitLengths != typeof [] || bitLengths.length < 1) { - err("Error! getHuffmanCodes() called with an invalid array"); + err('Error! getHuffmanCodes() called with an invalid array'); return null; } @@ -179,7 +229,7 @@ function getHuffmanCodes(bitLengths) { const length = bitLengths[i]; // test to ensure each bit length is a positive, non-zero number if (typeof length != typeof 1 || length < 0) { - err("bitLengths contained an invalid number in getHuffmanCodes(): " + length + " of type " + (typeof length)); + err(`bitLengths contained an invalid number in getHuffmanCodes(): ${length} of type ${typeof length}`); return null; } // increment the appropriate bitlength count @@ -281,8 +331,8 @@ function decodeSymbol(bstream, hcTable) { break; } if (len > hcTable.maxLength) { - err("Bit stream out of sync, didn't find a Huffman Code, length was " + len + - " and table only max code length of " + hcTable.maxLength); + err(`Bit stream out of sync, didn't find a Huffman Code, length was ${len} ` + + `and table only max code length of ${hcTable.maxLength}`); break; } } @@ -508,7 +558,7 @@ function inflate(compressedData, numDecompressedBytes) { const hcDistanceTable = getHuffmanCodes(distanceCodeLengths); blockSize = inflateBlockData(bstream, hcLiteralTable, hcDistanceTable, buffer); } else { // error - err("Error! Encountered deflate block of type 3"); + err('Error! Encountered deflate block of type 3'); return null; } @@ -523,11 +573,13 @@ function inflate(compressedData, numDecompressedBytes) { return buffer.data; } -function unzip() { +function archiveUnzip() { let bstream = bytestream.tee(); - // loop until we don't see any more local files + // loop until we don't see any more local files or we find a data descriptor. while (bstream.peekNumber(4) == zLocalFileHeaderSignature) { + // Note that this could throw an error if the bstream overflows, which is caught in the + // message handler. const oneLocalFile = new ZipLocalFile(bstream); // this should strip out directories/folders if (oneLocalFile && oneLocalFile.uncompressedSize > 0 && oneLocalFile.fileData) { @@ -557,7 +609,7 @@ function unzip() { // archive extra data record if (bstream.peekNumber(4) == zArchiveExtraDataSignature) { if (logToConsole) { - info(" Found an Archive Extra Data Signature"); + info(' Found an Archive Extra Data Signature'); } // skipping this record for now @@ -568,9 +620,9 @@ function unzip() { // central directory structure // TODO: handle the rest of the structures (Zip64 stuff) - if (bytestream.peekNumber(4) == zCentralFileHeaderSignature) { + if (bstream.peekNumber(4) == zCentralFileHeaderSignature) { if (logToConsole) { - info(" Found a Central File Header"); + info(' Found a Central File Header'); } // read all file headers @@ -609,7 +661,7 @@ function unzip() { // digital signature if (bstream.peekNumber(4) == zDigitalSignatureSignature) { if (logToConsole) { - info(" Found a Digital Signature"); + info(' Found a Digital Signature'); } bstream.readNumber(4); @@ -661,7 +713,7 @@ onmessage = function (event) { } if (unarchiveState === UnarchiveState.NOT_STARTED) { - currentFilename = ""; + currentFilename = ''; currentFileNumber = 0; currentBytesUnarchivedInFile = 0; currentBytesUnarchived = 0; @@ -680,7 +732,7 @@ onmessage = function (event) { if (unarchiveState === UnarchiveState.UNARCHIVING || unarchiveState === UnarchiveState.WAITING) { try { - unzip(); + archiveUnzip(); } catch (e) { if (typeof e === 'string' && e.startsWith('Error! Overflowed')) { // Overrun the buffer. diff --git a/tests/archive-test.js b/tests/archive-test.js index 91ec4c2..e977891 100644 --- a/tests/archive-test.js +++ b/tests/archive-test.js @@ -11,6 +11,7 @@ import { assertEquals, runTests } from './muther.js'; const testInputs = { 'testUnzipDeflate': 'archive-testfiles/test-unzip-deflate.json', + 'testUnzipDescriptor': 'archive-testfiles/test-unzip-descriptor.json', 'testUnzipStore': 'archive-testfiles/test-unzip-store.json', 'testUnrarM1': 'archive-testfiles/test-unrar-m1.json', 'testUnrarM2': 'archive-testfiles/test-unrar-m2.json', @@ -23,6 +24,9 @@ const testInputs = { 'testUntar': 'archive-testfiles/test-untar-1.json', }; +// TODO: It is an error for the Unarchiver worker not to terminate or send a FINISH event. +// We need to be able to test that here. + const testSuite = { tests: {} }; for (let testName in testInputs) { const testInputFilename = testInputs[testName]; @@ -37,7 +41,9 @@ for (let testName in testInputs) { atob(testFile.archivedFile).split(',').map(str => parseInt(str))); const unarchivedFile = new Uint8Array( atob(testFile.unarchivedFile).split(',').map(str => parseInt(str))); - const unarchiver = getUnarchiver(archivedFile.buffer, '../'); + const unarchiver = getUnarchiver(archivedFile.buffer, { + pathToBitJS: '../', + }); unarchiver.addEventListener(UnarchiveEventType.EXTRACT, evt => { const theUnarchivedFile = evt.unarchivedFile.fileData; try { diff --git a/tests/archive-testfiles/README.md b/tests/archive-testfiles/README.md new file mode 100644 index 0000000..f965951 --- /dev/null +++ b/tests/archive-testfiles/README.md @@ -0,0 +1,5 @@ +# Test files for unarchivers. + + 1. Create a zip or rar file with just one file inside it. + 2. Use test-uploader.html and choose the archived file and the unarchived file. + 3. Paste that jSON output into a test json file. diff --git a/tests/archive-testfiles/test-unzip-descriptor.json b/tests/archive-testfiles/test-unzip-descriptor.json new file mode 100644 index 0000000..71fc9a4 --- /dev/null +++ b/tests/archive-testfiles/test-unzip-descriptor.json @@ -0,0 +1,4 @@ +window.archiveTestFile = { + "archivedFile": "ODAsNzUsMyw0LDIwLDAsOCwwLDgsMCwyNiw0MCwzNyw3MSwwLDAsMCwwLDAsMCwwLDAsNjQsMSwwLDAsMTAsMCwyOCwwLDExNSwxMDEsOTksMTExLDExMCwxMDAsNDYsMTE2LDEyMCwxMTYsODUsODQsOSwwLDMsMTE2LDIxNywyMzQsODUsOTUsMTkwLDE3MSw5NCwxMTcsMTIwLDExLDAsMSw0LDI0MiwxODIsMSwwLDQsODMsOTUsMSwwLDExLDExOCwxMTcsMjQ2LDI0NywxMTUsODEsOCwzOCwxNDMsMjI2LDIyOSwzNCw4MywyMjcsMTc2LDIwOSwxNSwwLDgwLDc1LDcsOCwyMjQsOCwyLDkwLDE5LDAsMCwwLDY0LDEsMCwwLDgwLDc1LDEsMiwzMCwzLDIwLDAsOCwwLDgsMCwyNiw0MCwzNyw3MSwyMjQsOCwyLDkwLDE5LDAsMCwwLDY0LDEsMCwwLDEwLDAsMjQsMCwwLDAsMCwwLDEsMCwwLDAsMTY0LDEyOSwwLDAsMCwwLDExNSwxMDEsOTksMTExLDExMCwxMDAsNDYsMTE2LDEyMCwxMTYsODUsODQsNSwwLDMsMTE2LDIxNywyMzQsODUsMTE3LDEyMCwxMSwwLDEsNCwyNDIsMTgyLDEsMCw0LDgzLDk1LDEsMCw4MCw3NSw1LDYsMCwwLDAsMCwxLDAsMSwwLDgwLDAsMCwwLDEwMywwLDAsMCwwLDA=", + "unarchivedFile": "ODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTA=" +} \ No newline at end of file diff --git a/tests/test-uploader.html b/tests/test-uploader.html index e05ae20..5fe470e 100644 --- a/tests/test-uploader.html +++ b/tests/test-uploader.html @@ -12,5 +12,7 @@ Select unarchived file +
+
diff --git a/tests/test-uploader.js b/tests/test-uploader.js index 9bdddc5..f1b5c2f 100644 --- a/tests/test-uploader.js +++ b/tests/test-uploader.js @@ -36,11 +36,11 @@ function init() { function getArchivedFile(evt) { const filelist = evt.target.files; const fr = new FileReader(); - fr.onload = function() { - const arr = new Uint8Array(fr.result); - archivedFileAsText = btoa(arr); - archiveUploaderEl.setAttribute('disabled', 'true'); - unarchiveUploaderEl.removeAttribute('disabled'); + fr.onload = function () { + const arr = new Uint8Array(fr.result); + archivedFileAsText = btoa(arr); + archiveUploaderEl.setAttribute('disabled', 'true'); + unarchiveUploaderEl.removeAttribute('disabled'); }; fr.readAsArrayBuffer(filelist[0]); } @@ -48,11 +48,11 @@ function getArchivedFile(evt) { function getUnarchivedFile(evt) { const filelist = evt.target.files; const fr = new FileReader(); - fr.onload = function() { - const arr = new Uint8Array(fr.result); - unarchivedFileAsText = btoa(arr); - unarchiveUploaderEl.setAttribute('disabled', 'true'); - output(); + fr.onload = function () { + const arr = new Uint8Array(fr.result); + unarchivedFileAsText = btoa(arr); + unarchiveUploaderEl.setAttribute('disabled', 'true'); + output(); }; fr.readAsArrayBuffer(filelist[0]); } @@ -62,7 +62,7 @@ function output() { json += ' "archivedFile": "' + archivedFileAsText + '",\n'; json += ' "unarchivedFile": "' + unarchivedFileAsText + '"\n'; json += '}'; - window.open('data:application/json;utf8,' + json); + document.getElementById('json').textContent = json; } // To turn the base64 string back into bytes: