From 7746b8c03fafa4025fec2d42fbb280ff3849663e Mon Sep 17 00:00:00 2001 From: Jeff Schiller Date: Fri, 14 Jan 2022 12:17:03 -0800 Subject: [PATCH] Tweak the file signature sniffer so byte tree initialization happens lazily. --- archive/decompress-internal.js | 6 +++ archive/decompress.js | 8 ++-- file/sniffer.js | 75 +++++++++++++++++++++------------- 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/archive/decompress-internal.js b/archive/decompress-internal.js index 0049deb..50f2f06 100644 --- a/archive/decompress-internal.js +++ b/archive/decompress-internal.js @@ -11,6 +11,12 @@ import { findMimeType } from '../file/sniffer.js'; +/** + * @typedef UnarchivedFile + * @property {string} filename + * @property {Uint8Array} fileData + */ + /** * The UnarchiveEvent types. */ diff --git a/archive/decompress.js b/archive/decompress.js index e5cc5e6..2aa1b06 100644 --- a/archive/decompress.js +++ b/archive/decompress.js @@ -33,10 +33,10 @@ export { */ /** -* @typedef UnarchivedFile -* @property {string} filename -* @property {Uint8Array} fileData -*/ + * @typedef UnarchivedFile + * @property {string} filename + * @property {Uint8Array} fileData + */ /** * The goal is to make this testable - send getUnarchiver() an array buffer of diff --git a/file/sniffer.js b/file/sniffer.js index 69046e1..eeed7a1 100644 --- a/file/sniffer.js +++ b/file/sniffer.js @@ -38,6 +38,10 @@ const fileSignatures = { // * an OGG container can be resolved to OGG Audio, OGG Video // * an HEIF container can be resolved to AVIF, HEIC +/** + * Represents a single byte in the tree. If this node terminates a known MIME type (see magic + * numbers above), then the mimeType field will be set. + */ class Node { /** @param {number} value */ constructor(value) { @@ -47,49 +51,64 @@ class Node { } } -// Top-level node in the tree. -const root = new Node(); +/** Top-level node in the byte tree. */ +let root = null; +/** The maximum depth of the byte tree. */ let maxDepth = 0; -// Construct the tree, erroring if overlapping mime types are possible. -for (const mimeType in fileSignatures) { - for (const signature of fileSignatures[mimeType]) { - let curNode = root; - let depth = 0; - for (const byte of signature) { - if (curNode.children[byte] === undefined) { - if (byte === '??' && !curNode.children['??'] && Object.keys(curNode.children).length > 0) { - throw 'Cannot add a placeholder child to a node that has non-placeholder children'; - } else if (byte !== '??' && curNode.children['??']) { - throw 'Cannot add a non-placeholder child to a node that has a placeholder child'; +/** + * This function initializes the byte tree. It is lazily called upon findMimeType(), but if you care + * about when the tree initializes (like in startup, etc), you can call it yourself here. + */ +export function initialize() { + root = new Node(); + + // Construct the tree, erroring if overlapping mime types are possible. + for (const mimeType in fileSignatures) { + for (const signature of fileSignatures[mimeType]) { + let curNode = root; + let depth = 0; + for (const byte of signature) { + if (curNode.children[byte] === undefined) { + if (byte === '??' && !curNode.children['??'] && Object.keys(curNode.children).length > 0) { + throw 'Cannot add a placeholder child to a node that has non-placeholder children'; + } else if (byte !== '??' && curNode.children['??']) { + throw 'Cannot add a non-placeholder child to a node that has a placeholder child'; + } + curNode.children[byte] = new Node(byte); } - curNode.children[byte] = new Node(byte); + depth++; + curNode = curNode.children[byte]; + } // for each byte + + if (maxDepth < depth) { + maxDepth = depth; } - depth++; - curNode = curNode.children[byte]; - } // for each byte - if (maxDepth < depth) { - maxDepth = depth; - } - - if (curNode.mimeType) { - throw `File signature collision: ${curNode.mimeType} overlaps with ${mimeType}`; - } else if (Object.keys(curNode.children).length > 0) { - throw `${mimeType} signature is not unique, it collides with other mime types`; - } - curNode.mimeType = mimeType; - } // for each signature + if (curNode.mimeType) { + throw `File signature collision: ${curNode.mimeType} overlaps with ${mimeType}`; + } else if (Object.keys(curNode.children).length > 0) { + throw `${mimeType} signature is not unique, it collides with other mime types`; + } + curNode.mimeType = mimeType; + } // for each signature + } } /** + * Finds the likely MIME type represented by the ArrayBuffer. * @param {ArrayBuffer} ab * @return {string} The MIME type of the buffer, or undefined. */ export function findMimeType(ab) { + if (!root) { + initializeTree(); + } + const depth = ab.byteLength < maxDepth ? ab.byteLength : maxDepth; const arr = new Uint8Array(ab).subarray(0, depth); let curNode = root; + // Step through bytes, updating curNode as it walks down the byte tree. for (const byte of arr) { // If this node has a placeholder child, just step into it. if (curNode.children['??']) {