From 813b154e8c495ff7b372be7f2c26f16b546b1120 Mon Sep 17 00:00:00 2001 From: Jeff Schiller Date: Sun, 4 Feb 2024 20:54:51 -0800 Subject: [PATCH] For issue #48, add Gunzipper that relies on DecompressionStream('gzip'). --- archive/decompress.js | 27 ++++ archive/gunzip.js | 125 ++++++++++++++++++ tests/archive-decompress.spec.js | 28 +++- .../archive-testfiles/sample-1-slowest.txt.gz | Bin 0 -> 176 bytes 4 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 archive/gunzip.js create mode 100644 tests/archive-testfiles/sample-1-slowest.txt.gz diff --git a/archive/decompress.js b/archive/decompress.js index 0f45f44..3d7dcfb 100644 --- a/archive/decompress.js +++ b/archive/decompress.js @@ -319,6 +319,31 @@ export class Untarrer extends Unarchiver { getScriptFileName() { return './untar.js'; }; } +/** + * IMPORTANT NOTES for Gunzipper: + * 1) A Gunzipper will only ever emit one EXTRACT event, because a gzipped file only ever contains + * a single file. + * 2) If the gzipped file does not include the original filename as a FNAME block, then the + * UnarchivedFile in the UnarchiveExtractEvent will not include a filename. It will be up to the + * client to re-assemble the filename (if needed). + * 3) update() is not supported on a Gunzipper, since the current implementation relies on runtime + * support for DecompressionStream('gzip') which can throw hard-to-detect errors reading only + * only part of a file. + * 4) PROGRESS events are not yet supported in Gunzipper. + */ +export class Gunzipper extends Unarchiver { + /** + * @param {ArrayBuffer} ab + * @param {UnarchiverOptions} options + */ + constructor(ab, options = {}) { + super(ab, options); + } + + getMIMEType() { return 'application/gzip'; } + getScriptFileName() { return './gunzip.js'; } +} + // TODO(2.0): When up-revving to a major new version, remove the string type for options. /** @@ -344,6 +369,8 @@ export function getUnarchiver(ab, options = {}) { unarchiver = new Unrarrer(ab, options); } else if (mimeType === 'application/zip') { // PK (Zip) unarchiver = new Unzipper(ab, options); + } else if (mimeType === 'application/gzip') { // GZIP + unarchiver = new Gunzipper(ab, options); } else { // Try with tar unarchiver = new Untarrer(ab, options); } diff --git a/archive/gunzip.js b/archive/gunzip.js new file mode 100644 index 0000000..2f32631 --- /dev/null +++ b/archive/gunzip.js @@ -0,0 +1,125 @@ +/** + * gunzip.js + * + * Licensed under the MIT License + * + * Copyright(c) 2024 Google Inc. + * + * Reference Documentation: + * + * https://www.ietf.org/rfc/rfc1952.txt + */ + +import { BitStream } from '../io/bitstream.js'; +import { ByteStream } from '../io/bytestream.js'; + +/** @type {MessagePort} */ +let hostPort; + +/** @type {ByteStream} */ +let bstream = null; +// undefined unless a FNAME block is present. +let filename; + +const err = str => hostPort.postMessage({ type: 'error', msg: str }); + +async function gunzip() { + const sig = bstream.readBytes(2); + if (sig[0] !== 0x1F || sig[1] !== 0x8B) { + const errMsg = `First two bytes not 0x1F, 0x8B: ${sig[0].toString(16)} ${sig[1].toString(16)}`; + err(errMsg); + return; + } + const compressionMethod = bstream.readNumber(1); + if (compressionMethod !== 8) { + const errMsg = `Compression method ${compressionMethod} not supported`; + err(errMsg); + return; + } + + // Parse the GZIP header to see if we can find a filename (FNAME block). + const flags = new BitStream(bstream.readBytes(1).buffer); + flags.skip(1); // skip FTEXT bit + const fhcrc = flags.readBits(1); + const fextra = flags.readBits(1); + const fname = flags.readBits(1); + const fcomment = flags.readBits(1); + + bstream.skip(4); // MTIME + bstream.skip(1); // XFL + bstream.skip(1); // OS + + if (fextra) { + const xlen = bstream.readNumber(2); + bstream.skip(xlen); + } + + if (fname) { + // Find the null-terminator byte. + let numBytes = 0; + const findNull = bstream.tee(); + while (findNull.readNumber(1) !== 0) numBytes++; + filename = bstream.readString(numBytes); + } + + if (fcomment) { + // Find the null-terminator byte. + let numBytes = 0; + const findNull = bstream.tee(); + while (findNull.readNumber(1) !== 0) numBytes++; + bstream.skip(numBytes); // COMMENT + } + + if (fhcrc) { + bstream.readNumber(2); // CRC16 + } + + // Now try to use native implementation of INFLATE, if supported by the runtime. + const blob = new Blob([bstream.bytes.buffer]); + const decompressedStream = blob.stream().pipeThrough(new DecompressionStream('gzip')); + const fileData = new Uint8Array(await new Response(decompressedStream).arrayBuffer()); + const unarchivedFile = { filename, fileData }; + hostPort.postMessage({ type: 'extract', unarchivedFile }, [fileData.buffer]); + + // TODO: Supported chunked decompression? + // TODO: Fall through to non-native implementation via inflate() ? + + hostPort.postMessage({ type: 'finish', metadata: {} }); +} + +// event.data.file has the first ArrayBuffer. +const onmessage = async function (event) { + const bytes = event.data.file; + + if (!bstream) { + bstream = new ByteStream(bytes); + bstream.setLittleEndian(true); + } else { + throw `Gunzipper does not calling update() with more bytes. Send the whole file with start().` + } + + await gunzip(); +}; + +/** + * Connect the host to the gunzip implementation with the given MessagePort. + * @param {MessagePort} port + */ +export function connect(port) { + if (hostPort) { + throw `connect(): hostPort already connected in gunzip.js`; + } + + hostPort = port; + port.onmessage = onmessage; +} + +export function disconnect() { + if (!hostPort) { + throw `disconnect(): hostPort was not connected in gunzip.js`; + } + + hostPort = null; + bstream = null; + filename = undefined; +} diff --git a/tests/archive-decompress.spec.js b/tests/archive-decompress.spec.js index 8cd5db3..c0c38d5 100644 --- a/tests/archive-decompress.spec.js +++ b/tests/archive-decompress.spec.js @@ -2,7 +2,7 @@ import * as fs from 'node:fs'; import 'mocha'; import { expect } from 'chai'; -import { Unarchiver, Unrarrer, Untarrer, Unzipper, getUnarchiver } from '../archive/decompress.js'; +import { Gunzipper, Unarchiver, getUnarchiver } from '../archive/decompress.js'; const PATH = `tests/archive-testfiles/`; @@ -69,4 +69,30 @@ describe('bitjs.archive.decompress', () => { expect(extractEvtFiredForOnExtract).equals(true); }); } + + describe('gunzip', () => { + it('can unzip a file', async () => { + const bufs = new Map(inputArrayBuffers); + const nodeBuf = fs.readFileSync(`${PATH}sample-1-slowest.txt.gz`); + const ab = nodeBuf.buffer.slice(nodeBuf.byteOffset, nodeBuf.byteOffset + nodeBuf.length); + let gunzipper = getUnarchiver(ab, {debug: true}); + expect(gunzipper instanceof Gunzipper).equals(true); + let extractEvtFiredForOnExtract = false; + + gunzipper.onExtract(evt => { + extractEvtFiredForOnExtract = true; + const {filename, fileData} = evt.unarchivedFile; + expect(filename).equals('sample-1.txt'); + + const ab = bufs.get('sample-1.txt'); + expect(fileData.byteLength).equals(ab.byteLength); + for (let b = 0; b < fileData.byteLength; ++b) { + expect(fileData[b] === ab[b]); + } + }); + + await gunzipper.start(); + expect(extractEvtFiredForOnExtract).equals(true); + }); + }); }); diff --git a/tests/archive-testfiles/sample-1-slowest.txt.gz b/tests/archive-testfiles/sample-1-slowest.txt.gz new file mode 100644 index 0000000000000000000000000000000000000000..f284e4fe8baa3add300d489a0b539c9224ace169 GIT binary patch literal 176 zcmV;h08jrPiwFp?_Izam19M?*aBO8QF)nm?bO7y--3`Mq2!!7~#SKtp8ZUqx8l(76 zHc_iiUz}Eb>jsf{f&1V%A7#LbFetagP<|@zl6Zli+OMBfqP{V>)MJa#icC8rbS9Ou zxtFw)9LTjBUz9tKDcvxn1Vd|XHX2e3B2Cv2X$p4hQTq;?)5oQ|??b1zM$&U*JzA1L e%9CI1^#akwIe+E2a$Gt7=6D08o|T;F0{{TAT~QtY literal 0 HcmV?d00001