1
0
Fork 0
mirror of https://github.com/codedread/bitjs synced 2025-10-03 17:49:16 +02:00
bitjs/file/sniffer.js
2021-06-20 12:59:11 -07:00

103 lines
3.5 KiB
JavaScript

/**
* File Sniffer.
* Makes an attempt to resolve a byte stream into a MIME type.
*
* Licensed under the MIT License
*
* Copyright(c) 2020 Google Inc.
*/
// A selection from https://en.wikipedia.org/wiki/List_of_file_signatures.
// Mapping of MIME type to magic numbers. Each file type can have multiple signatures.
// '??' is used as a placeholder value.
const fileSignatures = {
// Document formats.
'application/pdf': [[0x25, 0x50, 0x44, 0x46, 0x2d]],
// Archive formats:
'application/x-tar': [
[0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30],
[0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00],
],
// Compressed archive formats.
'application/x-7z-compressed': [[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]],
'application/x-bzip2': [[0x42, 0x5A, 0x68]],
'application/x-rar-compressed': [[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]],
'application/zip': [[0x50, 0x4B, 0x03, 0x04], [0x50, 0x4B, 0x05, 0x06], [0x50, 0x4B, 0x07, 0x08]],
// Image formats.
'image/bmp': [[0x42, 0x4D]],
'image/gif': [[0x47, 0x49, 0x46, 0x38]],
'image/jpeg': [[0xFF, 0xD8, 0xFF]],
'image/png': [[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]],
'image/webp': [[0x52, 0x49, 0x46, 0x46, '??', '??', '??', '??', 0x57, 0x45, 0x42, 0x50]],
// Audio/Video formats.
'application/ogg': [[0x4F, 0x67, 0x67, 0x53]],
'audio/mpeg': [[0xFF, 0xFB], [0xFF, 0xF3], [0xFF, 0xF2], [0x49, 0x44, 0x33]],
};
// TODO: Eventually add support for various container formats so that:
// * an OGG container can be resolved to OGG Audio, OGG Video
// * an HEIF container can be resolved to AVIF, HEIC
class Node {
/** @param {number} value */
constructor(value) {
this.value = value;
this.children = {};
this.mimeType = undefined;
}
}
// Top-level node in the tree.
const root = new Node();
let maxDepth = 0;
// Construct the tree, erroring if overlapping mime types are possible.
for (const mimeType in fileSignatures) {
for (const signature of fileSignatures[mimeType]) {
let curNode = root;
let depth = 0;
for (const byte of signature) {
if (curNode.children[byte] === undefined) {
if (byte === '??' && !curNode.children['??'] && Object.keys(curNode.children).length > 0) {
throw 'Cannot add a placeholder child to a node that has non-placeholder children';
} else if (byte !== '??' && curNode.children['??']) {
throw 'Cannot add a non-placeholder child to a node that has a placeholder child';
}
curNode.children[byte] = new Node(byte);
}
depth++;
curNode = curNode.children[byte];
} // for each byte
if (maxDepth < depth) {
maxDepth = depth;
}
if (curNode.mimeType) {
throw `File signature collision: ${curNode.mimeType} overlaps with ${mimeType}`;
} else if (Object.keys(curNode.children).length > 0) {
throw `${mimeType} signature is not unique, it collides with other mime types`;
}
curNode.mimeType = mimeType;
} // for each signature
}
/**
* @param {ArrayBuffer} ab
* @return {string} The MIME type of the buffer, or undefined.
*/
export function findMimeType(ab) {
const depth = ab.byteLength < maxDepth ? ab.byteLength : maxDepth;
const arr = new Uint8Array(ab).subarray(0, depth);
let curNode = root;
for (const byte of arr) {
// If this node has a placeholder child, just step into it.
if (curNode.children['??']) {
curNode = curNode.children['??'];
continue;
}
if (curNode.children[byte] === undefined) return undefined;
curNode = curNode.children[byte];
if (curNode.mimeType) return curNode.mimeType;
}
}