1
0
Fork 0
mirror of https://github.com/codedread/bitjs synced 2025-10-04 01:59:15 +02:00

Tweak the file signature sniffer so byte tree initialization happens lazily.

This commit is contained in:
Jeff Schiller 2022-01-14 12:17:03 -08:00
parent 96ba25aef7
commit 7746b8c03f
3 changed files with 57 additions and 32 deletions

View file

@ -11,6 +11,12 @@
import { findMimeType } from '../file/sniffer.js'; import { findMimeType } from '../file/sniffer.js';
/**
* @typedef UnarchivedFile
* @property {string} filename
* @property {Uint8Array} fileData
*/
/** /**
* The UnarchiveEvent types. * The UnarchiveEvent types.
*/ */

View file

@ -33,10 +33,10 @@ export {
*/ */
/** /**
* @typedef UnarchivedFile * @typedef UnarchivedFile
* @property {string} filename * @property {string} filename
* @property {Uint8Array} fileData * @property {Uint8Array} fileData
*/ */
/** /**
* The goal is to make this testable - send getUnarchiver() an array buffer of * The goal is to make this testable - send getUnarchiver() an array buffer of

View file

@ -38,6 +38,10 @@ const fileSignatures = {
// * an OGG container can be resolved to OGG Audio, OGG Video // * an OGG container can be resolved to OGG Audio, OGG Video
// * an HEIF container can be resolved to AVIF, HEIC // * an HEIF container can be resolved to AVIF, HEIC
/**
* Represents a single byte in the tree. If this node terminates a known MIME type (see magic
* numbers above), then the mimeType field will be set.
*/
class Node { class Node {
/** @param {number} value */ /** @param {number} value */
constructor(value) { constructor(value) {
@ -47,49 +51,64 @@ class Node {
} }
} }
// Top-level node in the tree. /** Top-level node in the byte tree. */
const root = new Node(); let root = null;
/** The maximum depth of the byte tree. */
let maxDepth = 0; let maxDepth = 0;
// Construct the tree, erroring if overlapping mime types are possible. /**
for (const mimeType in fileSignatures) { * This function initializes the byte tree. It is lazily called upon findMimeType(), but if you care
for (const signature of fileSignatures[mimeType]) { * about when the tree initializes (like in startup, etc), you can call it yourself here.
let curNode = root; */
let depth = 0; export function initialize() {
for (const byte of signature) { root = new Node();
if (curNode.children[byte] === undefined) {
if (byte === '??' && !curNode.children['??'] && Object.keys(curNode.children).length > 0) { // Construct the tree, erroring if overlapping mime types are possible.
throw 'Cannot add a placeholder child to a node that has non-placeholder children'; for (const mimeType in fileSignatures) {
} else if (byte !== '??' && curNode.children['??']) { for (const signature of fileSignatures[mimeType]) {
throw 'Cannot add a non-placeholder child to a node that has a placeholder child'; let curNode = root;
let depth = 0;
for (const byte of signature) {
if (curNode.children[byte] === undefined) {
if (byte === '??' && !curNode.children['??'] && Object.keys(curNode.children).length > 0) {
throw 'Cannot add a placeholder child to a node that has non-placeholder children';
} else if (byte !== '??' && curNode.children['??']) {
throw 'Cannot add a non-placeholder child to a node that has a placeholder child';
}
curNode.children[byte] = new Node(byte);
} }
curNode.children[byte] = new Node(byte); depth++;
curNode = curNode.children[byte];
} // for each byte
if (maxDepth < depth) {
maxDepth = depth;
} }
depth++;
curNode = curNode.children[byte];
} // for each byte
if (maxDepth < depth) { if (curNode.mimeType) {
maxDepth = depth; throw `File signature collision: ${curNode.mimeType} overlaps with ${mimeType}`;
} } else if (Object.keys(curNode.children).length > 0) {
throw `${mimeType} signature is not unique, it collides with other mime types`;
if (curNode.mimeType) { }
throw `File signature collision: ${curNode.mimeType} overlaps with ${mimeType}`; curNode.mimeType = mimeType;
} else if (Object.keys(curNode.children).length > 0) { } // for each signature
throw `${mimeType} signature is not unique, it collides with other mime types`; }
}
curNode.mimeType = mimeType;
} // for each signature
} }
/** /**
* Finds the likely MIME type represented by the ArrayBuffer.
* @param {ArrayBuffer} ab * @param {ArrayBuffer} ab
* @return {string} The MIME type of the buffer, or undefined. * @return {string} The MIME type of the buffer, or undefined.
*/ */
export function findMimeType(ab) { export function findMimeType(ab) {
if (!root) {
initializeTree();
}
const depth = ab.byteLength < maxDepth ? ab.byteLength : maxDepth; const depth = ab.byteLength < maxDepth ? ab.byteLength : maxDepth;
const arr = new Uint8Array(ab).subarray(0, depth); const arr = new Uint8Array(ab).subarray(0, depth);
let curNode = root; let curNode = root;
// Step through bytes, updating curNode as it walks down the byte tree.
for (const byte of arr) { for (const byte of arr) {
// If this node has a placeholder child, just step into it. // If this node has a placeholder child, just step into it.
if (curNode.children['??']) { if (curNode.children['??']) {