1
0
Fork 0
mirror of https://github.com/codedread/bitjs synced 2025-10-03 17:49:16 +02:00

Rework decompression implementations to not be Workers, but instead communicate to the host via a MessagePort. This doesn't address issue #44 directly yet (still need a way to swap out port connection with a non-Web Worker).

This commit is contained in:
Jeff Schiller 2023-12-11 00:12:42 -08:00
parent 1ab380c7c4
commit df5c5cfa0f
6 changed files with 185 additions and 107 deletions

View file

@ -33,7 +33,7 @@ export const UnarchiveEventType = {
/**
* An unarchive event.
*/
export class UnarchiveEvent extends Event {
export class UnarchiveEvent extends Event {
/**
* @param {string} type The event type.
*/
@ -45,7 +45,7 @@ export const UnarchiveEventType = {
/**
* Updates all Archiver listeners that an append has occurred.
*/
export class UnarchiveAppendEvent extends UnarchiveEvent {
export class UnarchiveAppendEvent extends UnarchiveEvent {
/**
* @param {number} numBytes The number of bytes appended.
*/
@ -167,18 +167,34 @@ export class UnarchiveExtractEvent extends UnarchiveEvent {
* Base class for all Unarchivers.
*/
export class Unarchiver extends EventTarget {
/**
* A handle to the decompressor implementation context.
* @type {Worker|*}
* @private
*/
implRef_;
/**
* The client-side port that sends messages to, and receives messages from the
* decompressor implementation.
* @type {MessagePort}
* @private
*/
port_;
/**
* @param {ArrayBuffer} arrayBuffer The Array Buffer. Note that this ArrayBuffer must not be
* referenced once it is sent to the Unarchiver, since it is marked as Transferable and sent
* to the Worker.
* @param {Function(string):Worker} createWorkerFn A function that creates a Worker from a script file.
* to the decompress implementation.
* @param {Function(string, MessagePort):Promise<*>} connectPortFn A function that takes a path
* to a JS decompression implementation (unzip.js) and connects it to a MessagePort.
* @param {Object|string} options An optional object of options, or a string representing where
* the BitJS files are located. The string version of this argument is deprecated.
* Available options:
* 'pathToBitJS': A string indicating where the BitJS files are located.
* 'debug': A boolean where true indicates that the archivers should log debug output.
*/
constructor(arrayBuffer, createWorkerFn, options = {}) {
constructor(arrayBuffer, connectPortFn, options = {}) {
super();
if (typeof options === 'string') {
@ -195,11 +211,11 @@ export class UnarchiveExtractEvent extends UnarchiveEvent {
this.ab = arrayBuffer;
/**
* A factory method that creates a Worker that does the unarchive work.
* @type {Function(string): Worker}
* A factory method that connects a port to the decompress implementation.
* @type {Function(MessagePort): Promise<*>}
* @private
*/
this.createWorkerFn_ = createWorkerFn;
this.connectPortFn_ = connectPortFn;
/**
* The path to the BitJS files.
@ -213,13 +229,6 @@ export class UnarchiveExtractEvent extends UnarchiveEvent {
* @type {boolean}
*/
this.debugMode_ = !!(options.debug);
/**
* Private web worker initialized during start().
* @private
* @type {Worker}
*/
this.worker_ = null;
}
/**
@ -241,7 +250,7 @@ export class UnarchiveExtractEvent extends UnarchiveEvent {
}
/**
* Create an UnarchiveEvent out of the object sent back from the Worker.
* Create an UnarchiveEvent out of the object sent back from the implementation.
* @param {Object} obj
* @returns {UnarchiveEvent}
* @private
@ -276,70 +285,71 @@ export class UnarchiveExtractEvent extends UnarchiveEvent {
* @param {Object} obj
* @private
*/
handleWorkerEvent_(obj) {
handlePortEvent_(obj) {
const type = obj.type;
if (type && Object.values(UnarchiveEventType).includes(type)) {
const evt = this.createUnarchiveEvent_(obj);
this.dispatchEvent(evt);
if (evt.type == UnarchiveEventType.FINISH) {
this.worker_.terminate();
this.stop();
}
} else {
console.log(`Unknown object received from worker: ${obj}`);
console.log(`Unknown object received from port: ${obj}`);
}
}
/**
* Starts the unarchive in a separate Web Worker thread and returns immediately.
* Starts the unarchive by connecting the ports and sending the first ArrayBuffer.
*/
start() {
const me = this;
const scriptFileName = this.pathToBitJS_ + this.getScriptFileName();
if (scriptFileName) {
this.worker_ = this.createWorkerFn_(scriptFileName);
const messageChannel = new MessageChannel();
this.port_ = messageChannel.port1;
this.connectPortFn_(this.pathToBitJS_,
this.getScriptFileName(),
messageChannel.port2).then((implRef) => {
this.implRef_ = implRef;
this.worker_.onerror = function (e) {
console.log('Worker error: message = ' + e.message);
this.port_.onerror = function (e) {
console.log('Impl error: message = ' + e.message);
throw e;
};
this.worker_.onmessage = function (e) {
this.port_.onmessage = function (e) {
if (typeof e.data == 'string') {
// Just log any strings the workers pump our way.
// Just log any strings the port pumps our way.
console.log(e.data);
} else {
me.handleWorkerEvent_(e.data);
me.handlePortEvent_(e.data);
}
};
const ab = this.ab;
this.worker_.postMessage({
this.port_.postMessage({
file: ab,
logToConsole: this.debugMode_,
}, [ab]);
this.ab = null;
}
this.ab = null;
});
}
// TODO: Create a startSync() method that does not use a worker for Node.
/**
* Adds more bytes to the unarchiver's Worker thread.
* Adds more bytes to the unarchiver.
* @param {ArrayBuffer} ab The ArrayBuffer with more bytes in it. If opt_transferable is
* set to true, this ArrayBuffer must not be referenced after calling update(), since it
* is marked as Transferable and sent to the Worker.
* is marked as Transferable and sent to the implementation.
* @param {boolean=} opt_transferable Optional boolean whether to mark this ArrayBuffer
* as a Tranferable object, which means it can no longer be referenced outside of
* the Worker thread.
* the implementation context.
*/
update(ab, opt_transferable = false) {
const numBytes = ab.byteLength;
if (this.worker_) {
if (this.port_) {
// Send the ArrayBuffer over, and mark it as a Transferable object if necessary.
if (opt_transferable) {
this.worker_.postMessage({ bytes: ab }, [ab]);
this.port_.postMessage({ bytes: ab }, [ab]);
} else {
this.worker_.postMessage({ bytes: ab });
this.port_.postMessage({ bytes: ab });
}
}
@ -347,18 +357,25 @@ export class UnarchiveExtractEvent extends UnarchiveEvent {
}
/**
* Terminates the Web Worker for this Unarchiver and returns immediately.
* Closes the port to the decompressor implementation and terminates it.
*/
stop() {
if (this.worker_) {
this.worker_.terminate();
if (this.port_) {
this.port_.close();
this.port_ = null;
}
if (this.implRef_) {
if (this.implRef_ instanceof Worker) {
this.implRef_.terminate();
this.implRef_ = null;
}
}
}
}
export class UnzipperInternal extends Unarchiver {
constructor(arrayBuffer, createWorkerFn, options) {
super(arrayBuffer, createWorkerFn, options);
constructor(arrayBuffer, connectPortFn, options) {
super(arrayBuffer, connectPortFn, options);
}
getMIMEType() { return 'application/zip'; }
@ -366,8 +383,8 @@ export class UnzipperInternal extends Unarchiver {
}
export class UnrarrerInternal extends Unarchiver {
constructor(arrayBuffer, createWorkerFn, options) {
super(arrayBuffer, createWorkerFn, options);
constructor(arrayBuffer, connectPortFn, options) {
super(arrayBuffer, connectPortFn, options);
}
getMIMEType() { return 'application/x-rar-compressed'; }
@ -375,8 +392,8 @@ export class UnrarrerInternal extends Unarchiver {
}
export class UntarrerInternal extends Unarchiver {
constructor(arrayBuffer, createWorkerFn, options) {
super(arrayBuffer, createWorkerFn, options);
constructor(arrayBuffer, connectPortFn, options) {
super(arrayBuffer, connectPortFn, options);
}
getMIMEType() { return 'application/x-tar'; }
@ -387,12 +404,12 @@ export class UntarrerInternal extends Unarchiver {
* Factory method that creates an unarchiver based on the byte signature found
* in the arrayBuffer.
* @param {ArrayBuffer} ab
* @param {Function(string):Worker} createWorkerFn A function that creates a Worker from a script file.
* @param {Function(string):Promise<*>} connectPortFn A function that connects the impl port.
* @param {Object|string} options An optional object of options, or a string representing where
* the path to the unarchiver script files.
* @returns {Unarchiver}
*/
export function getUnarchiverInternal(ab, createWorkerFn, options = {}) {
export function getUnarchiverInternal(ab, connectPortFn, options = {}) {
if (ab.byteLength < 10) {
return null;
}
@ -401,11 +418,11 @@ export class UntarrerInternal extends Unarchiver {
const mimeType = findMimeType(ab);
if (mimeType === 'application/x-rar-compressed') { // Rar!
unarchiver = new UnrarrerInternal(ab, createWorkerFn, options);
unarchiver = new UnrarrerInternal(ab, connectPortFn, options);
} else if (mimeType === 'application/zip') { // PK (Zip)
unarchiver = new UnzipperInternal(ab, createWorkerFn, options);
unarchiver = new UnzipperInternal(ab, connectPortFn, options);
} else { // Try with tar
unarchiver = new UntarrerInternal(ab, createWorkerFn, options);
unarchiver = new UntarrerInternal(ab, connectPortFn, options);
}
return unarchiver;
}

View file

@ -39,46 +39,50 @@ export {
*/
/**
* The goal is to make this testable - send getUnarchiver() an array buffer of
* an archive, call start on the unarchiver, expect the returned result.
*
* Problem: It relies on Web Workers, and that won't work in a nodejs context.
* Solution: Make archive.js very thin, have it feed web-specific things into
* an internal module that is isomorphic JavaScript.
*
* TODO:
* - write unit tests for archive-internal.js that use the nodejs Worker
* equivalent.
* - maybe use @pgriess/node-webworker or @audreyt/node-webworker-threads or
* just node's worker_threads ?
*/
* Creates a WebWorker with the given decompressor implementation (i.e. unzip.js)
* and transfers a MessagePort for communication. Returns a Promise to the Worker.
* @param {string} pathToBitJS The path to the bitjs folder.
* @param {string} implFilename The decompressor implementation filename
* relative to the bitjs root (e.g. archive/unzip.js)
* @param {MessagePort} implPort The MessagePort to connect to the decompressor
* implementation.
* @returns {Promise<*>} Returns a Promise that resolves to the Worker object.
*/
const connectPortFn = (pathToBitJS, implFilename, implPort) => {
return new Promise((resolve, reject) => {
const worker = new Worker(pathToBitJS + 'archive/unarchiver-webworker.js', {
type: 'module'
});
const createWorkerFn = (scriptFilename) => new Worker(scriptFilename, { type: 'module' });
worker.postMessage({ implSrc: (pathToBitJS + implFilename), }, [implPort]);
resolve(worker);
});
};
// Thin wrappers of compressors for clients who want to construct a specific
// Thin wrappers of decompressors for clients who want to construct a specific
// unarchiver themselves rather than use getUnarchiver().
export class Unzipper extends UnzipperInternal {
constructor(ab, options) { super(ab, createWorkerFn, options); }
constructor(ab, options) { super(ab, connectPortFn, options); }
}
export class Unrarrer extends UnrarrerInternal {
constructor(ab, options) { super(ab, createWorkerFn, options); }
constructor(ab, options) { super(ab, connectPortFn, options); }
}
export class Untarrer extends UntarrerInternal {
constructor(ab, options) { super(ab, createWorkerFn, options); }
constructor(ab, options) { super(ab, connectPortFn, options); }
}
/**
* Factory method that creates an unarchiver based on the byte signature found
* in the arrayBuffer.
* in the ArrayBuffer.
* @param {ArrayBuffer} ab The ArrayBuffer to unarchive. Note that this ArrayBuffer
* must not be referenced after calling this method, as the ArrayBuffer is marked
* as Transferable and sent to a Worker thread once start() is called.
* must not be referenced after calling this method, as the ArrayBuffer may be
* tranferred to a different JS context once start() is called.
* @param {Object|string} options An optional object of options, or a string
* representing where the path to the unarchiver script files.
* @returns {Unarchiver}
*/
export function getUnarchiver(ab, options = {}) {
return getUnarchiverInternal(ab, createWorkerFn, options);
return getUnarchiverInternal(ab, connectPortFn, options);
}

View file

@ -0,0 +1,21 @@
/**
* unarchiver-webworker.js
*
* Licensed under the MIT License
*
* Copyright(c) 2023 Google Inc.
*/
/**
* A WebWorker wrapper for a decompress implementation. Upon creation and being
* sent its first message, it dynamically loads the correct decompressor and
* connects the message port
*/
/** @type {MessagePort} */
let implPort;
onmessage = async (evt) => {
const module = await import(evt.data.implSrc);
module.connect(evt.ports[0]);
};

View file

@ -11,9 +11,6 @@
// of a BitStream so that it throws properly when not enough bytes are
// present.
// This file expects to be invoked as a Worker (see onmessage below).
// TODO: Make this a plain ES Module and then write a thin wrapper around it for Worker-ization.
// This will allow us to write proper unit tests for it.
import { BitStream } from '../io/bitstream.js';
import { ByteStream } from '../io/bytestream.js';
import { ByteBuffer } from '../io/bytebuffer.js';
@ -26,6 +23,9 @@ const UnarchiveState = {
FINISHED: 3,
};
/** @type {MessagePort} */
let hostPort;
// State - consider putting these into a class.
let unarchiveState = UnarchiveState.NOT_STARTED;
let bytestream = null;
@ -42,13 +42,13 @@ let totalFilesInArchive = 0;
// Helper functions.
const info = function (str) {
postMessage({ type: 'info', msg: str });
hostPort.postMessage({ type: 'info', msg: str });
};
const err = function (str) {
postMessage({ type: 'error', msg: str });
hostPort.postMessage({ type: 'error', msg: str });
};
const postProgress = function () {
postMessage({
hostPort.postMessage({
type: 'progress',
currentFilename,
currentFileNumber,
@ -1380,7 +1380,7 @@ function unrar() {
localFile.unrar();
if (localFile.isValid) {
postMessage({ type: 'extract', unarchivedFile: localFile }, [localFile.fileData.buffer]);
hostPort.postMessage({ type: 'extract', unarchivedFile: localFile }, [localFile.fileData.buffer]);
postProgress();
}
} else if (localFile.header.packSize == 0 && localFile.header.unpackedSize == 0) {
@ -1398,7 +1398,7 @@ function unrar() {
// event.data.file has the first ArrayBuffer.
// event.data.bytes has all subsequent ArrayBuffers.
onmessage = function (event) {
const onmessage = function (event) {
const bytes = event.data.file || event.data.bytes;
logToConsole = !!event.data.logToConsole;
@ -1413,7 +1413,7 @@ onmessage = function (event) {
totalUncompressedBytesInArchive = 0;
totalFilesInArchive = 0;
allLocalFiles = [];
postMessage({ type: 'start' });
hostPort.postMessage({ type: 'start' });
} else {
bytestream.push(bytes);
}
@ -1443,7 +1443,7 @@ onmessage = function (event) {
try {
unrar();
unarchiveState = UnarchiveState.FINISHED;
postMessage({ type: 'finish', metadata: {} });
hostPort.postMessage({ type: 'finish', metadata: {} });
} catch (e) {
if (typeof e === 'string' && e.startsWith('Error! Overflowed')) {
if (logToConsole) {
@ -1459,3 +1459,15 @@ onmessage = function (event) {
}
}
};
/**
* Connect the host to the unrar implementation with the given MessagePort.
* @param {MessagePort} port
*/
export function connect(port) {
if (hostPort) {
throw `hostPort already connected`;
}
hostPort = port;
port.onmessage = onmessage;
}

View file

@ -10,9 +10,6 @@
* TAR format: http://www.gnu.org/software/automake/manual/tar/Standard.html
*/
// This file expects to be invoked as a Worker (see onmessage below).
// TODO: Make this a plain ES Module and then write a thin wrapper around it for Worker-ization.
// This will allow us to write proper unit tests for it.
import { ByteStream } from '../io/bytestream.js';
const UnarchiveState = {
@ -22,6 +19,9 @@ const UnarchiveState = {
FINISHED: 3,
};
/** @type {MessagePort} */
let hostPort;
// State - consider putting these into a class.
let unarchiveState = UnarchiveState.NOT_STARTED;
let bytestream = null;
@ -38,13 +38,13 @@ let totalFilesInArchive = 0;
// Helper functions.
const info = function (str) {
postMessage({ type: 'info', msg: str });
hostPort.postMessage({ type: 'info', msg: str });
};
const err = function (str) {
postMessage({ type: 'error', msg: str });
hostPort.postMessage({ type: 'error', msg: str });
};
const postProgress = function () {
postMessage({
hostPort.postMessage({
type: 'progress',
currentFilename,
currentFileNumber,
@ -156,7 +156,7 @@ const untar = function () {
currentFileNumber = totalFilesInArchive++;
currentBytesUnarchivedInFile = oneLocalFile.size;
currentBytesUnarchived += oneLocalFile.size;
postMessage({ type: 'extract', unarchivedFile: oneLocalFile }, [oneLocalFile.fileData.buffer]);
hostPort.postMessage({ type: 'extract', unarchivedFile: oneLocalFile }, [oneLocalFile.fileData.buffer]);
postProgress();
}
}
@ -169,7 +169,7 @@ const untar = function () {
// event.data.file has the first ArrayBuffer.
// event.data.bytes has all subsequent ArrayBuffers.
onmessage = function (event) {
const onmessage = function (event) {
const bytes = event.data.file || event.data.bytes;
logToConsole = !!event.data.logToConsole;
@ -189,7 +189,7 @@ onmessage = function (event) {
totalFilesInArchive = 0;
allLocalFiles = [];
postMessage({ type: 'start' });
hostPort.postMessage({ type: 'start' });
unarchiveState = UnarchiveState.UNARCHIVING;
@ -201,7 +201,7 @@ onmessage = function (event) {
try {
untar();
unarchiveState = UnarchiveState.FINISHED;
postMessage({ type: 'finish', metadata: {} });
hostPort.postMessage({ type: 'finish', metadata: {} });
} catch (e) {
if (typeof e === 'string' && e.startsWith('Error! Overflowed')) {
// Overrun the buffer.
@ -214,3 +214,15 @@ onmessage = function (event) {
}
}
};
/**
* Connect the host to the untar implementation with the given MessagePort.
* @param {MessagePort} port
*/
export function connect(port) {
if (hostPort) {
throw `hostPort already connected`;
}
hostPort = port;
port.onmessage = onmessage;
}

View file

@ -12,9 +12,6 @@
* DEFLATE format: http://tools.ietf.org/html/rfc1951
*/
// This file expects to be invoked as a Worker (see onmessage below).
// TODO: Make this a plain ES Module and then write a thin wrapper around it for Worker-ization.
// This will allow us to write proper unit tests for it.
import { BitStream } from '../io/bitstream.js';
import { ByteBuffer } from '../io/bytebuffer.js';
import { ByteStream } from '../io/bytestream.js';
@ -26,6 +23,9 @@ const UnarchiveState = {
FINISHED: 3,
};
/** @type {MessagePort} */
let hostPort;
// State - consider putting these into a class.
let unarchiveState = UnarchiveState.NOT_STARTED;
let bytestream = null;
@ -42,13 +42,13 @@ let totalFilesInArchive = 0;
// Helper functions.
const info = function (str) {
postMessage({ type: 'info', msg: str });
hostPort.postMessage({ type: 'info', msg: str });
};
const err = function (str) {
postMessage({ type: 'error', msg: str });
hostPort.postMessage({ type: 'error', msg: str });
};
const postProgress = function () {
postMessage({
hostPort.postMessage({
type: 'progress',
currentFilename,
currentFileNumber,
@ -628,7 +628,7 @@ function archiveUnzip() {
oneLocalFile.unzip();
if (oneLocalFile.fileData != null) {
postMessage({ type: 'extract', unarchivedFile: oneLocalFile }, [oneLocalFile.fileData.buffer]);
hostPort.postMessage({ type: 'extract', unarchivedFile: oneLocalFile }, [oneLocalFile.fileData.buffer]);
postProgress();
}
}
@ -725,12 +725,12 @@ function archiveUnzip() {
bytestream = bstream.tee();
unarchiveState = UnarchiveState.FINISHED;
postMessage({ type: 'finish', metadata });
hostPort.postMessage({ type: 'finish', metadata });
}
// event.data.file has the first ArrayBuffer.
// event.data.bytes has all subsequent ArrayBuffers.
onmessage = function (event) {
const onmessage = function (event) {
const bytes = event.data.file || event.data.bytes;
logToConsole = !!event.data.logToConsole;
@ -751,7 +751,7 @@ onmessage = function (event) {
currentBytesUnarchived = 0;
allLocalFiles = [];
postMessage({ type: 'start' });
hostPort.postMessage({ type: 'start' });
unarchiveState = UnarchiveState.UNARCHIVING;
@ -774,3 +774,15 @@ onmessage = function (event) {
}
}
};
/**
* Connect the host to the unzip implementation with the given MessagePort.
* @param {MessagePort} port
*/
export function connect(port) {
if (hostPort) {
throw `hostPort already connected`;
}
hostPort = port;
port.onmessage = onmessage;
}