mirror of
https://github.com/codedread/bitjs
synced 2025-10-03 17:49:16 +02:00
414 lines
14 KiB
JavaScript
414 lines
14 KiB
JavaScript
/**
|
|
* unzip.js
|
|
*
|
|
* Licensed under the MIT License
|
|
*
|
|
* Copyright(c) 2011 Google Inc.
|
|
* Copyright(c) 2011 antimatter15
|
|
*
|
|
* Reference Documentation:
|
|
*
|
|
* ZIP format: http://www.pkware.com/documents/casestudies/APPNOTE.TXT
|
|
* DEFLATE format: http://tools.ietf.org/html/rfc1951
|
|
*/
|
|
|
|
import { ByteStream } from '../io/bytestream.js';
|
|
import { ARCHIVE_EXTRA_DATA_SIG, CENTRAL_FILE_HEADER_SIG, CRC32_MAGIC_NUMBER,
|
|
DATA_DESCRIPTOR_SIG, DIGITAL_SIGNATURE_SIG, END_OF_CENTRAL_DIR_SIG,
|
|
LOCAL_FILE_HEADER_SIG } from './common.js';
|
|
import { inflate } from './inflate.js';
|
|
|
|
const UnarchiveState = {
|
|
NOT_STARTED: 0,
|
|
UNARCHIVING: 1,
|
|
WAITING: 2,
|
|
FINISHED: 3,
|
|
};
|
|
|
|
/** @type {MessagePort} */
|
|
let hostPort;
|
|
|
|
// State - consider putting these into a class.
|
|
let unarchiveState = UnarchiveState.NOT_STARTED;
|
|
/** @type {ByteStream} */
|
|
let bytestream = null;
|
|
let allLocalFiles = null;
|
|
let logToConsole = false;
|
|
|
|
// Progress variables.
|
|
let currentFilename = '';
|
|
let currentFileNumber = 0;
|
|
let currentBytesUnarchivedInFile = 0;
|
|
let currentBytesUnarchived = 0;
|
|
let totalUncompressedBytesInArchive = 0;
|
|
let totalFilesInArchive = 0;
|
|
|
|
// Helper functions.
|
|
const info = function (str) {
|
|
hostPort.postMessage({ type: 'info', msg: str });
|
|
};
|
|
const err = function (str) {
|
|
hostPort.postMessage({ type: 'error', msg: str });
|
|
};
|
|
const postProgress = function () {
|
|
hostPort.postMessage({
|
|
type: 'progress',
|
|
currentFilename,
|
|
currentFileNumber,
|
|
currentBytesUnarchivedInFile,
|
|
currentBytesUnarchived,
|
|
totalUncompressedBytesInArchive,
|
|
totalFilesInArchive,
|
|
totalCompressedBytesRead: bytestream.getNumBytesRead(),
|
|
});
|
|
};
|
|
|
|
// mask for getting the Nth bit (zero-based)
|
|
const BIT = [0x01, 0x02, 0x04, 0x08,
|
|
0x10, 0x20, 0x40, 0x80,
|
|
0x100, 0x200, 0x400, 0x800,
|
|
0x1000, 0x2000, 0x4000, 0x8000];
|
|
|
|
class ZipLocalFile {
|
|
/** @param {ByteStream} bstream */
|
|
constructor(bstream) {
|
|
if (typeof bstream != typeof {} || !bstream.readNumber || typeof bstream.readNumber != typeof function () { }) {
|
|
return null;
|
|
}
|
|
|
|
bstream.readNumber(4); // swallow signature
|
|
this.version = bstream.readNumber(2);
|
|
this.generalPurpose = bstream.readNumber(2);
|
|
this.compressionMethod = bstream.readNumber(2);
|
|
this.lastModFileTime = bstream.readNumber(2);
|
|
this.lastModFileDate = bstream.readNumber(2);
|
|
this.crc32 = bstream.readNumber(4);
|
|
this.compressedSize = bstream.readNumber(4);
|
|
this.uncompressedSize = bstream.readNumber(4);
|
|
this.fileNameLength = bstream.readNumber(2);
|
|
this.extraFieldLength = bstream.readNumber(2);
|
|
|
|
this.filename = null;
|
|
if (this.fileNameLength > 0) {
|
|
this.filename = bstream.readString(this.fileNameLength);
|
|
}
|
|
|
|
this.extraField = null;
|
|
if (this.extraFieldLength > 0) {
|
|
this.extraField = bstream.readString(this.extraFieldLength);
|
|
}
|
|
|
|
// Data descriptor is present if this bit is set, compressed size should be zero.
|
|
this.hasDataDescriptor = ((this.generalPurpose & BIT[3]) !== 0);
|
|
if (this.hasDataDescriptor &&
|
|
(this.crc32 !== 0 || this.compressedSize !== 0 || this.uncompressedSize !== 0)) {
|
|
err('Zip local file with a data descriptor and non-zero crc/compressedSize/uncompressedSize');
|
|
}
|
|
|
|
// Read in the compressed data if we have no data descriptor.
|
|
/** @type {Uint8Array} */
|
|
this.fileData = null;
|
|
let descriptorSize = 0;
|
|
if (this.hasDataDescriptor) {
|
|
// Hold on to a reference to the bstream, since that is where the compressed file data begins.
|
|
let savedBstream = bstream.tee();
|
|
|
|
// Seek ahead one byte at a time, looking for the next local file header signature or the end
|
|
// of all local files.
|
|
let foundDataDescriptor = false;
|
|
let numBytesSeeked = 0;
|
|
while (!foundDataDescriptor) {
|
|
while (bstream.peekNumber(4) !== LOCAL_FILE_HEADER_SIG &&
|
|
bstream.peekNumber(4) !== ARCHIVE_EXTRA_DATA_SIG &&
|
|
bstream.peekNumber(4) !== CENTRAL_FILE_HEADER_SIG) {
|
|
numBytesSeeked++;
|
|
bstream.readBytes(1);
|
|
}
|
|
|
|
// Copy all the read bytes into a buffer and examine the last 16 bytes to see if they are the
|
|
// data descriptor.
|
|
let bufferedByteArr = savedBstream.peekBytes(numBytesSeeked);
|
|
const descriptorStream = new ByteStream(bufferedByteArr.buffer, numBytesSeeked - 16, 16);
|
|
const maybeDescriptorSig = descriptorStream.readNumber(4);
|
|
const maybeCrc32 = descriptorStream.readNumber(4);
|
|
const maybeCompressedSize = descriptorStream.readNumber(4);
|
|
const maybeUncompressedSize = descriptorStream.readNumber(4);
|
|
|
|
// From the PKZIP App Note: "The signature value 0x08074b50 is also used by some ZIP
|
|
// implementations as a marker for the Data Descriptor record".
|
|
if (maybeDescriptorSig === DATA_DESCRIPTOR_SIG) {
|
|
if (maybeCompressedSize === (numBytesSeeked - 16)) {
|
|
foundDataDescriptor = true;
|
|
descriptorSize = 16;
|
|
}
|
|
} else if (maybeCompressedSize === (numBytesSeeked - 12)) {
|
|
foundDataDescriptor = true;
|
|
descriptorSize = 12;
|
|
}
|
|
|
|
if (foundDataDescriptor) {
|
|
this.crc32 = maybeCrc32;
|
|
this.compressedSize = maybeCompressedSize;
|
|
this.uncompressedSize = maybeUncompressedSize;
|
|
}
|
|
}
|
|
bstream = savedBstream;
|
|
}
|
|
|
|
this.fileData = new Uint8Array(bstream.readBytes(this.compressedSize));
|
|
bstream.readBytes(descriptorSize);
|
|
|
|
// Now that we have all the bytes for this file, we can print out some information.
|
|
if (logToConsole) {
|
|
info('Zip Local File Header:');
|
|
info(` version=${this.version}`);
|
|
info(` general purpose=${this.generalPurpose}`);
|
|
info(` compression method=${this.compressionMethod}`);
|
|
info(` last mod file time=${this.lastModFileTime}`);
|
|
info(` last mod file date=${this.lastModFileDate}`);
|
|
info(` crc32=${this.crc32}`);
|
|
info(` compressed size=${this.compressedSize}`);
|
|
info(` uncompressed size=${this.uncompressedSize}`);
|
|
info(` file name length=${this.fileNameLength}`);
|
|
info(` extra field length=${this.extraFieldLength}`);
|
|
info(` filename = '${this.filename}'`);
|
|
info(` hasDataDescriptor = ${this.hasDataDescriptor}`);
|
|
}
|
|
}
|
|
|
|
// determine what kind of compressed data we have and decompress
|
|
async unzip() {
|
|
if (!this.fileData) {
|
|
err('unzip() called on a file with out compressed file data');
|
|
}
|
|
|
|
// Zip Version 1.0, no compression (store only)
|
|
if (this.compressionMethod == 0) {
|
|
if (logToConsole) {
|
|
info(`ZIP v${this.version}, store only: ${this.filename} (${this.compressedSize} bytes)`);
|
|
}
|
|
currentBytesUnarchivedInFile = this.compressedSize;
|
|
currentBytesUnarchived += this.compressedSize;
|
|
}
|
|
// version == 20, compression method == 8 (DEFLATE)
|
|
else if (this.compressionMethod == 8) {
|
|
if (logToConsole) {
|
|
info(`ZIP v2.0, DEFLATE: ${this.filename} (${this.compressedSize} bytes)`);
|
|
}
|
|
this.fileData = await inflate(this.fileData, this.uncompressedSize);
|
|
}
|
|
else {
|
|
err(`UNSUPPORTED VERSION/FORMAT: ZIP v${this.version}, ` +
|
|
`compression method=${this.compressionMethod}: ` +
|
|
`${this.filename} (${this.compressedSize} bytes)`);
|
|
this.fileData = null;
|
|
}
|
|
}
|
|
}
|
|
|
|
async function archiveUnzip() {
|
|
let bstream = bytestream.tee();
|
|
|
|
// loop until we don't see any more local files or we find a data descriptor.
|
|
while (bstream.peekNumber(4) == LOCAL_FILE_HEADER_SIG) {
|
|
// Note that this could throw an error if the bstream overflows, which is caught in the
|
|
// message handler.
|
|
const oneLocalFile = new ZipLocalFile(bstream);
|
|
// this should strip out directories/folders
|
|
if (oneLocalFile && oneLocalFile.uncompressedSize > 0 && oneLocalFile.fileData) {
|
|
// If we make it to this point and haven't thrown an error, we have successfully
|
|
// read in the data for a local file, so we can update the actual bytestream.
|
|
bytestream = bstream.tee();
|
|
|
|
allLocalFiles.push(oneLocalFile);
|
|
totalUncompressedBytesInArchive += oneLocalFile.uncompressedSize;
|
|
|
|
// update progress
|
|
currentFilename = oneLocalFile.filename;
|
|
currentFileNumber = allLocalFiles.length - 1;
|
|
currentBytesUnarchivedInFile = 0;
|
|
|
|
// Actually do the unzipping.
|
|
await oneLocalFile.unzip();
|
|
|
|
if (oneLocalFile.fileData != null) {
|
|
hostPort.postMessage({ type: 'extract', unarchivedFile: oneLocalFile }, [oneLocalFile.fileData.buffer]);
|
|
postProgress();
|
|
}
|
|
}
|
|
}
|
|
totalFilesInArchive = allLocalFiles.length;
|
|
|
|
// archive extra data record
|
|
if (bstream.peekNumber(4) == ARCHIVE_EXTRA_DATA_SIG) {
|
|
if (logToConsole) {
|
|
info(' Found an Archive Extra Data Signature');
|
|
}
|
|
|
|
// skipping this record for now
|
|
bstream.readNumber(4);
|
|
const archiveExtraFieldLength = bstream.readNumber(4);
|
|
bstream.readString(archiveExtraFieldLength);
|
|
}
|
|
|
|
// central directory structure
|
|
// TODO: handle the rest of the structures (Zip64 stuff)
|
|
if (bstream.peekNumber(4) == CENTRAL_FILE_HEADER_SIG) {
|
|
if (logToConsole) {
|
|
info(' Found a Central File Header');
|
|
}
|
|
|
|
// read all file headers
|
|
while (bstream.peekNumber(4) == CENTRAL_FILE_HEADER_SIG) {
|
|
bstream.readNumber(4); // signature
|
|
const cdfh = {
|
|
versionMadeBy: bstream.readNumber(2),
|
|
versionNeededToExtract: bstream.readNumber(2),
|
|
generalPurposeBitFlag: bstream.readNumber(2),
|
|
compressionMethod: bstream.readNumber(2),
|
|
lastModFileTime: bstream.readNumber(2),
|
|
lastModFileDate: bstream.readNumber(2),
|
|
crc32: bstream.readNumber(4),
|
|
compressedSize: bstream.readNumber(4),
|
|
uncompressedSize: bstream.readNumber(4),
|
|
fileNameLength: bstream.readNumber(2),
|
|
extraFieldLength: bstream.readNumber(2),
|
|
fileCommentLength: bstream.readNumber(2),
|
|
diskNumberStart: bstream.readNumber(2),
|
|
internalFileAttributes: bstream.readNumber(2),
|
|
externalFileAttributes: bstream.readNumber(4),
|
|
relativeOffset: bstream.readNumber(4),
|
|
};
|
|
cdfh.fileName = bstream.readString(cdfh.fileNameLength);
|
|
cdfh.extraField = bstream.readString(cdfh.extraFieldLength);
|
|
cdfh.fileComment = bstream.readString(cdfh.fileCommentLength);
|
|
if (logToConsole) {
|
|
console.log('Central Directory File Header:');
|
|
for (const field in cdfh) {
|
|
console.log(` ${field} = ${cdfh[field]}`);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// digital signature
|
|
if (bstream.peekNumber(4) == DIGITAL_SIGNATURE_SIG) {
|
|
if (logToConsole) {
|
|
info(' Found a Digital Signature');
|
|
}
|
|
|
|
bstream.readNumber(4);
|
|
const sizeOfSignature = bstream.readNumber(2);
|
|
bstream.readString(sizeOfSignature); // digital signature data
|
|
}
|
|
|
|
let metadata = {};
|
|
if (bstream.peekNumber(4) == END_OF_CENTRAL_DIR_SIG) {
|
|
bstream.readNumber(4); // signature
|
|
const eocds = {
|
|
numberOfThisDisk: bstream.readNumber(2),
|
|
diskWhereCentralDirectoryStarts: bstream.readNumber(2),
|
|
numberOfCentralDirectoryRecordsOnThisDisk: bstream.readNumber(2),
|
|
totalNumberOfCentralDirectoryRecords: bstream.readNumber(2),
|
|
sizeOfCentralDirectory: bstream.readNumber(4),
|
|
offsetOfStartOfCentralDirectory: bstream.readNumber(4),
|
|
commentLength: bstream.readNumber(2),
|
|
};
|
|
eocds.comment = bstream.readString(eocds.commentLength);
|
|
if (logToConsole) {
|
|
console.log('End of Central Dir Signature:');
|
|
for (const field in eocds) {
|
|
console.log(` ${field} = ${eocds[field]}`);
|
|
}
|
|
}
|
|
metadata.comment = eocds.comment;
|
|
}
|
|
|
|
postProgress();
|
|
|
|
bytestream = bstream.tee();
|
|
|
|
unarchiveState = UnarchiveState.FINISHED;
|
|
hostPort.postMessage({ type: 'finish', metadata });
|
|
}
|
|
|
|
// event.data.file has the first ArrayBuffer.
|
|
// event.data.bytes has all subsequent ArrayBuffers.
|
|
const onmessage = async function (event) {
|
|
const bytes = event.data.file || event.data.bytes;
|
|
logToConsole = !!event.data.logToConsole;
|
|
|
|
// This is the very first time we have been called. Initialize the bytestream.
|
|
if (!bytestream) {
|
|
bytestream = new ByteStream(bytes);
|
|
} else {
|
|
bytestream.push(bytes);
|
|
}
|
|
|
|
if (unarchiveState === UnarchiveState.NOT_STARTED) {
|
|
currentFilename = '';
|
|
currentFileNumber = 0;
|
|
currentBytesUnarchivedInFile = 0;
|
|
currentBytesUnarchived = 0;
|
|
totalUncompressedBytesInArchive = 0;
|
|
totalFilesInArchive = 0;
|
|
currentBytesUnarchived = 0;
|
|
allLocalFiles = [];
|
|
|
|
hostPort.postMessage({ type: 'start' });
|
|
|
|
unarchiveState = UnarchiveState.UNARCHIVING;
|
|
|
|
postProgress();
|
|
}
|
|
|
|
if (unarchiveState === UnarchiveState.UNARCHIVING ||
|
|
unarchiveState === UnarchiveState.WAITING) {
|
|
try {
|
|
await archiveUnzip();
|
|
} catch (e) {
|
|
if (typeof e === 'string' && e.startsWith('Error! Overflowed')) {
|
|
// Overrun the buffer.
|
|
unarchiveState = UnarchiveState.WAITING;
|
|
} else {
|
|
console.error('Found an error while unzipping');
|
|
console.dir(e);
|
|
throw e;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Connect the host to the unzip implementation with the given MessagePort.
|
|
* @param {MessagePort} port
|
|
*/
|
|
export function connect(port) {
|
|
if (hostPort) {
|
|
throw `hostPort already connected in unzip.js`;
|
|
}
|
|
|
|
hostPort = port;
|
|
port.onmessage = onmessage;
|
|
}
|
|
|
|
export function disconnect() {
|
|
if (!hostPort) {
|
|
throw `hostPort was not connected in unzip.js`;
|
|
}
|
|
|
|
hostPort = null;
|
|
|
|
unarchiveState = UnarchiveState.NOT_STARTED;
|
|
bytestream = null;
|
|
allLocalFiles = null;
|
|
logToConsole = false;
|
|
|
|
// Progress variables.
|
|
currentFilename = '';
|
|
currentFileNumber = 0;
|
|
currentBytesUnarchivedInFile = 0;
|
|
currentBytesUnarchived = 0;
|
|
totalUncompressedBytesInArchive = 0;
|
|
totalFilesInArchive = 0;
|
|
}
|