mirror of
https://github.com/codedread/bitjs
synced 2025-10-04 10:09:16 +02:00
Fix issue #19: Support data descriptors in unzip.
This commit is contained in:
parent
97fcf8d630
commit
2db5b3b906
6 changed files with 123 additions and 54 deletions
134
archive/unzip.js
134
archive/unzip.js
|
@ -31,7 +31,7 @@ let allLocalFiles = null;
|
|||
let logToConsole = false;
|
||||
|
||||
// Progress variables.
|
||||
let currentFilename = "";
|
||||
let currentFilename = '';
|
||||
let currentFileNumber = 0;
|
||||
let currentBytesUnarchivedInFile = 0;
|
||||
let currentBytesUnarchived = 0;
|
||||
|
@ -64,6 +64,7 @@ const zCentralFileHeaderSignature = 0x02014b50;
|
|||
const zDigitalSignatureSignature = 0x05054b50;
|
||||
const zEndOfCentralDirSignature = 0x06054b50;
|
||||
const zEndOfCentralDirLocatorSignature = 0x07064b50;
|
||||
const zDataDescriptorSignature = 0x08074b50;
|
||||
|
||||
// mask for getting the Nth bit (zero-based)
|
||||
const BIT = [0x01, 0x02, 0x04, 0x08,
|
||||
|
@ -99,48 +100,95 @@ class ZipLocalFile {
|
|||
this.extraField = null;
|
||||
if (this.extraFieldLength > 0) {
|
||||
this.extraField = bstream.readString(this.extraFieldLength);
|
||||
//info(" extra field=" + this.extraField);
|
||||
}
|
||||
|
||||
// read in the compressed data
|
||||
// Data descriptor is present if this bit is set, compressed size should be zero.
|
||||
this.hasDataDescriptor = ((this.generalPurpose & BIT[3]) !== 0);
|
||||
if (this.hasDataDescriptor &&
|
||||
(this.crc32 !== 0 || this.compressedSize !== 0 || this.uncompressedSize !== 0)) {
|
||||
err('Zip local file with a data descriptor and non-zero crc/compressedSize/uncompressedSize');
|
||||
}
|
||||
|
||||
// Read in the compressed data if we have no data descriptor.
|
||||
this.fileData = null;
|
||||
if (this.compressedSize > 0) {
|
||||
this.fileData = new Uint8Array(bstream.readBytes(this.compressedSize));
|
||||
let descriptorSize = 0;
|
||||
if (this.hasDataDescriptor) {
|
||||
// Hold on to a reference to the bstream, since that is where the compressed file data begins.
|
||||
let savedBstream = bstream.tee();
|
||||
|
||||
// Seek ahead one byte at a time, looking for the next local file header signature or the end
|
||||
// of all local files.
|
||||
let foundDataDescriptor = false;
|
||||
let numBytesSeeked = 0;
|
||||
while (!foundDataDescriptor) {
|
||||
while (bstream.peekNumber(4) !== zLocalFileHeaderSignature &&
|
||||
bstream.peekNumber(4) !== zArchiveExtraDataSignature &&
|
||||
bstream.peekNumber(4) !== zCentralFileHeaderSignature) {
|
||||
numBytesSeeked++;
|
||||
bstream.readBytes(1);
|
||||
}
|
||||
|
||||
// TODO: deal with data descriptor if present (we currently assume no data descriptor!)
|
||||
// "This descriptor exists only if bit 3 of the general purpose bit flag is set"
|
||||
// But how do you figure out how big the file data is if you don't know the compressedSize
|
||||
// from the header?!?
|
||||
if ((this.generalPurpose & BIT[3]) != 0) {
|
||||
this.crc32 = bstream.readNumber(4);
|
||||
this.compressedSize = bstream.readNumber(4);
|
||||
this.uncompressedSize = bstream.readNumber(4);
|
||||
// Copy all the read bytes into a buffer and examine the last 16 bytes to see if they are the
|
||||
// data descriptor.
|
||||
let bufferedByteArr = savedBstream.peekBytes(numBytesSeeked);
|
||||
const descriptorStream = new bitjs.io.ByteStream(bufferedByteArr.buffer, numBytesSeeked - 16, 16);
|
||||
const maybeDescriptorSig = descriptorStream.readNumber(4);
|
||||
const maybeCrc32 = descriptorStream.readNumber(4);
|
||||
const maybeCompressedSize = descriptorStream.readNumber(4);
|
||||
const maybeUncompressedSize = descriptorStream.readNumber(4);
|
||||
|
||||
// From the PKZIP App Note: "The signature value 0x08074b50 is also used by some ZIP
|
||||
// implementations as a marker for the Data Descriptor record".
|
||||
if (maybeDescriptorSig === zDataDescriptorSignature) {
|
||||
if (maybeCompressedSize === (numBytesSeeked - 16)) {
|
||||
foundDataDescriptor = true;
|
||||
descriptorSize = 16;
|
||||
}
|
||||
} else if (maybeCompressedSize === (numBytesSeeked - 12)) {
|
||||
foundDataDescriptor = true;
|
||||
descriptorSize = 12;
|
||||
}
|
||||
|
||||
if (foundDataDescriptor) {
|
||||
this.crc32 = maybeCrc32;
|
||||
this.compressedSize = maybeCompressedSize;
|
||||
this.uncompressedSize = maybeUncompressedSize;
|
||||
}
|
||||
}
|
||||
bstream = savedBstream;
|
||||
}
|
||||
|
||||
this.fileData = new Uint8Array(bstream.readBytes(this.compressedSize));
|
||||
bstream.readBytes(descriptorSize);
|
||||
|
||||
// Now that we have all the bytes for this file, we can print out some information.
|
||||
if (logToConsole) {
|
||||
info("Zip Local File Header:");
|
||||
info(" version=" + this.version);
|
||||
info(" general purpose=" + this.generalPurpose);
|
||||
info(" compression method=" + this.compressionMethod);
|
||||
info(" last mod file time=" + this.lastModFileTime);
|
||||
info(" last mod file date=" + this.lastModFileDate);
|
||||
info(" crc32=" + this.crc32);
|
||||
info(" compressed size=" + this.compressedSize);
|
||||
info(" uncompressed size=" + this.uncompressedSize);
|
||||
info(" file name length=" + this.fileNameLength);
|
||||
info(" extra field length=" + this.extraFieldLength);
|
||||
info(" filename = '" + this.filename + "'");
|
||||
info('Zip Local File Header:');
|
||||
info(` version=${this.version}`);
|
||||
info(` general purpose=${this.generalPurpose}`);
|
||||
info(` compression method=${this.compressionMethod}`);
|
||||
info(` last mod file time=${this.lastModFileTime}`);
|
||||
info(` last mod file date=${this.lastModFileDate}`);
|
||||
info(` crc32=${this.crc32}`);
|
||||
info(` compressed size=${this.compressedSize}`);
|
||||
info(` uncompressed size=${this.uncompressedSize}`);
|
||||
info(` file name length=${this.fileNameLength}`);
|
||||
info(` extra field length=${this.extraFieldLength}`);
|
||||
info(` filename = '${this.filename}'`);
|
||||
info(` hasDataDescriptor = ${this.hasDataDescriptor}`);
|
||||
}
|
||||
}
|
||||
|
||||
// determine what kind of compressed data we have and decompress
|
||||
unzip() {
|
||||
if (!this.fileData) {
|
||||
err('unzip() called on a file with out compressed file data');
|
||||
}
|
||||
|
||||
// Zip Version 1.0, no compression (store only)
|
||||
if (this.compressionMethod == 0) {
|
||||
if (logToConsole) {
|
||||
info("ZIP v" + this.version + ", store only: " + this.filename + " (" + this.compressedSize + " bytes)");
|
||||
info(`ZIP v${this.version}, store only: ${this.filename} (${this.compressedSize} bytes)`);
|
||||
}
|
||||
currentBytesUnarchivedInFile = this.compressedSize;
|
||||
currentBytesUnarchived += this.compressedSize;
|
||||
|
@ -148,12 +196,14 @@ class ZipLocalFile {
|
|||
// version == 20, compression method == 8 (DEFLATE)
|
||||
else if (this.compressionMethod == 8) {
|
||||
if (logToConsole) {
|
||||
info("ZIP v2.0, DEFLATE: " + this.filename + " (" + this.compressedSize + " bytes)");
|
||||
info(`ZIP v2.0, DEFLATE: ${this.filename} (${this.compressedSize} bytes)`);
|
||||
}
|
||||
this.fileData = inflate(this.fileData, this.uncompressedSize);
|
||||
}
|
||||
else {
|
||||
err("UNSUPPORTED VERSION/FORMAT: ZIP v" + this.version + ", compression method=" + this.compressionMethod + ": " + this.filename + " (" + this.compressedSize + " bytes)");
|
||||
err(`UNSUPPORTED VERSION/FORMAT: ZIP v${this.version}, ` +
|
||||
`compression method=${this.compressionMethod}: ` +
|
||||
`${this.filename} (${this.compressedSize} bytes)`);
|
||||
this.fileData = null;
|
||||
}
|
||||
}
|
||||
|
@ -165,7 +215,7 @@ class ZipLocalFile {
|
|||
function getHuffmanCodes(bitLengths) {
|
||||
// ensure bitLengths is an array containing at least one element
|
||||
if (typeof bitLengths != typeof [] || bitLengths.length < 1) {
|
||||
err("Error! getHuffmanCodes() called with an invalid array");
|
||||
err('Error! getHuffmanCodes() called with an invalid array');
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -179,7 +229,7 @@ function getHuffmanCodes(bitLengths) {
|
|||
const length = bitLengths[i];
|
||||
// test to ensure each bit length is a positive, non-zero number
|
||||
if (typeof length != typeof 1 || length < 0) {
|
||||
err("bitLengths contained an invalid number in getHuffmanCodes(): " + length + " of type " + (typeof length));
|
||||
err(`bitLengths contained an invalid number in getHuffmanCodes(): ${length} of type ${typeof length}`);
|
||||
return null;
|
||||
}
|
||||
// increment the appropriate bitlength count
|
||||
|
@ -281,8 +331,8 @@ function decodeSymbol(bstream, hcTable) {
|
|||
break;
|
||||
}
|
||||
if (len > hcTable.maxLength) {
|
||||
err("Bit stream out of sync, didn't find a Huffman Code, length was " + len +
|
||||
" and table only max code length of " + hcTable.maxLength);
|
||||
err(`Bit stream out of sync, didn't find a Huffman Code, length was ${len} ` +
|
||||
`and table only max code length of ${hcTable.maxLength}`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -508,7 +558,7 @@ function inflate(compressedData, numDecompressedBytes) {
|
|||
const hcDistanceTable = getHuffmanCodes(distanceCodeLengths);
|
||||
blockSize = inflateBlockData(bstream, hcLiteralTable, hcDistanceTable, buffer);
|
||||
} else { // error
|
||||
err("Error! Encountered deflate block of type 3");
|
||||
err('Error! Encountered deflate block of type 3');
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -523,11 +573,13 @@ function inflate(compressedData, numDecompressedBytes) {
|
|||
return buffer.data;
|
||||
}
|
||||
|
||||
function unzip() {
|
||||
function archiveUnzip() {
|
||||
let bstream = bytestream.tee();
|
||||
|
||||
// loop until we don't see any more local files
|
||||
// loop until we don't see any more local files or we find a data descriptor.
|
||||
while (bstream.peekNumber(4) == zLocalFileHeaderSignature) {
|
||||
// Note that this could throw an error if the bstream overflows, which is caught in the
|
||||
// message handler.
|
||||
const oneLocalFile = new ZipLocalFile(bstream);
|
||||
// this should strip out directories/folders
|
||||
if (oneLocalFile && oneLocalFile.uncompressedSize > 0 && oneLocalFile.fileData) {
|
||||
|
@ -557,7 +609,7 @@ function unzip() {
|
|||
// archive extra data record
|
||||
if (bstream.peekNumber(4) == zArchiveExtraDataSignature) {
|
||||
if (logToConsole) {
|
||||
info(" Found an Archive Extra Data Signature");
|
||||
info(' Found an Archive Extra Data Signature');
|
||||
}
|
||||
|
||||
// skipping this record for now
|
||||
|
@ -568,9 +620,9 @@ function unzip() {
|
|||
|
||||
// central directory structure
|
||||
// TODO: handle the rest of the structures (Zip64 stuff)
|
||||
if (bytestream.peekNumber(4) == zCentralFileHeaderSignature) {
|
||||
if (bstream.peekNumber(4) == zCentralFileHeaderSignature) {
|
||||
if (logToConsole) {
|
||||
info(" Found a Central File Header");
|
||||
info(' Found a Central File Header');
|
||||
}
|
||||
|
||||
// read all file headers
|
||||
|
@ -609,7 +661,7 @@ function unzip() {
|
|||
// digital signature
|
||||
if (bstream.peekNumber(4) == zDigitalSignatureSignature) {
|
||||
if (logToConsole) {
|
||||
info(" Found a Digital Signature");
|
||||
info(' Found a Digital Signature');
|
||||
}
|
||||
|
||||
bstream.readNumber(4);
|
||||
|
@ -661,7 +713,7 @@ onmessage = function (event) {
|
|||
}
|
||||
|
||||
if (unarchiveState === UnarchiveState.NOT_STARTED) {
|
||||
currentFilename = "";
|
||||
currentFilename = '';
|
||||
currentFileNumber = 0;
|
||||
currentBytesUnarchivedInFile = 0;
|
||||
currentBytesUnarchived = 0;
|
||||
|
@ -680,7 +732,7 @@ onmessage = function (event) {
|
|||
if (unarchiveState === UnarchiveState.UNARCHIVING ||
|
||||
unarchiveState === UnarchiveState.WAITING) {
|
||||
try {
|
||||
unzip();
|
||||
archiveUnzip();
|
||||
} catch (e) {
|
||||
if (typeof e === 'string' && e.startsWith('Error! Overflowed')) {
|
||||
// Overrun the buffer.
|
||||
|
|
|
@ -11,6 +11,7 @@ import { assertEquals, runTests } from './muther.js';
|
|||
|
||||
const testInputs = {
|
||||
'testUnzipDeflate': 'archive-testfiles/test-unzip-deflate.json',
|
||||
'testUnzipDescriptor': 'archive-testfiles/test-unzip-descriptor.json',
|
||||
'testUnzipStore': 'archive-testfiles/test-unzip-store.json',
|
||||
'testUnrarM1': 'archive-testfiles/test-unrar-m1.json',
|
||||
'testUnrarM2': 'archive-testfiles/test-unrar-m2.json',
|
||||
|
@ -23,6 +24,9 @@ const testInputs = {
|
|||
'testUntar': 'archive-testfiles/test-untar-1.json',
|
||||
};
|
||||
|
||||
// TODO: It is an error for the Unarchiver worker not to terminate or send a FINISH event.
|
||||
// We need to be able to test that here.
|
||||
|
||||
const testSuite = { tests: {} };
|
||||
for (let testName in testInputs) {
|
||||
const testInputFilename = testInputs[testName];
|
||||
|
@ -37,7 +41,9 @@ for (let testName in testInputs) {
|
|||
atob(testFile.archivedFile).split(',').map(str => parseInt(str)));
|
||||
const unarchivedFile = new Uint8Array(
|
||||
atob(testFile.unarchivedFile).split(',').map(str => parseInt(str)));
|
||||
const unarchiver = getUnarchiver(archivedFile.buffer, '../');
|
||||
const unarchiver = getUnarchiver(archivedFile.buffer, {
|
||||
pathToBitJS: '../',
|
||||
});
|
||||
unarchiver.addEventListener(UnarchiveEventType.EXTRACT, evt => {
|
||||
const theUnarchivedFile = evt.unarchivedFile.fileData;
|
||||
try {
|
||||
|
|
5
tests/archive-testfiles/README.md
Normal file
5
tests/archive-testfiles/README.md
Normal file
|
@ -0,0 +1,5 @@
|
|||
# Test files for unarchivers.
|
||||
|
||||
1. Create a zip or rar file with just one file inside it.
|
||||
2. Use test-uploader.html and choose the archived file and the unarchived file.
|
||||
3. Paste that jSON output into a test json file.
|
4
tests/archive-testfiles/test-unzip-descriptor.json
Normal file
4
tests/archive-testfiles/test-unzip-descriptor.json
Normal file
|
@ -0,0 +1,4 @@
|
|||
window.archiveTestFile = {
|
||||
"archivedFile": "ODAsNzUsMyw0LDIwLDAsOCwwLDgsMCwyNiw0MCwzNyw3MSwwLDAsMCwwLDAsMCwwLDAsNjQsMSwwLDAsMTAsMCwyOCwwLDExNSwxMDEsOTksMTExLDExMCwxMDAsNDYsMTE2LDEyMCwxMTYsODUsODQsOSwwLDMsMTE2LDIxNywyMzQsODUsOTUsMTkwLDE3MSw5NCwxMTcsMTIwLDExLDAsMSw0LDI0MiwxODIsMSwwLDQsODMsOTUsMSwwLDExLDExOCwxMTcsMjQ2LDI0NywxMTUsODEsOCwzOCwxNDMsMjI2LDIyOSwzNCw4MywyMjcsMTc2LDIwOSwxNSwwLDgwLDc1LDcsOCwyMjQsOCwyLDkwLDE5LDAsMCwwLDY0LDEsMCwwLDgwLDc1LDEsMiwzMCwzLDIwLDAsOCwwLDgsMCwyNiw0MCwzNyw3MSwyMjQsOCwyLDkwLDE5LDAsMCwwLDY0LDEsMCwwLDEwLDAsMjQsMCwwLDAsMCwwLDEsMCwwLDAsMTY0LDEyOSwwLDAsMCwwLDExNSwxMDEsOTksMTExLDExMCwxMDAsNDYsMTE2LDEyMCwxMTYsODUsODQsNSwwLDMsMTE2LDIxNywyMzQsODUsMTE3LDEyMCwxMSwwLDEsNCwyNDIsMTgyLDEsMCw0LDgzLDk1LDEsMCw4MCw3NSw1LDYsMCwwLDAsMCwxLDAsMSwwLDgwLDAsMCwwLDEwMywwLDAsMCwwLDA=",
|
||||
"unarchivedFile": "ODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTAsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMzIsODMsNjksNjcsNzksNzgsNjgsMTMsMTA="
|
||||
}
|
|
@ -12,5 +12,7 @@
|
|||
<input id="unarchive-uploader" type="file" disabled>
|
||||
<span id="unarchive-uploader-label">Select unarchived file</span>
|
||||
</div>
|
||||
<div id='json' style='white-space: pre'>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -36,7 +36,7 @@ function init() {
|
|||
function getArchivedFile(evt) {
|
||||
const filelist = evt.target.files;
|
||||
const fr = new FileReader();
|
||||
fr.onload = function() {
|
||||
fr.onload = function () {
|
||||
const arr = new Uint8Array(fr.result);
|
||||
archivedFileAsText = btoa(arr);
|
||||
archiveUploaderEl.setAttribute('disabled', 'true');
|
||||
|
@ -48,7 +48,7 @@ function getArchivedFile(evt) {
|
|||
function getUnarchivedFile(evt) {
|
||||
const filelist = evt.target.files;
|
||||
const fr = new FileReader();
|
||||
fr.onload = function() {
|
||||
fr.onload = function () {
|
||||
const arr = new Uint8Array(fr.result);
|
||||
unarchivedFileAsText = btoa(arr);
|
||||
unarchiveUploaderEl.setAttribute('disabled', 'true');
|
||||
|
@ -62,7 +62,7 @@ function output() {
|
|||
json += ' "archivedFile": "' + archivedFileAsText + '",\n';
|
||||
json += ' "unarchivedFile": "' + unarchivedFileAsText + '"\n';
|
||||
json += '}';
|
||||
window.open('data:application/json;utf8,' + json);
|
||||
document.getElementById('json').textContent = json;
|
||||
}
|
||||
|
||||
// To turn the base64 string back into bytes:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue