mirror of
https://github.com/codedread/bitjs
synced 2025-10-03 17:49:16 +02:00
103 lines
3.5 KiB
JavaScript
103 lines
3.5 KiB
JavaScript
/**
|
|
* File Sniffer.
|
|
* Makes an attempt to resolve a byte stream into a MIME type.
|
|
*
|
|
* Licensed under the MIT License
|
|
*
|
|
* Copyright(c) 2020 Google Inc.
|
|
*/
|
|
|
|
// A selection from https://en.wikipedia.org/wiki/List_of_file_signatures.
|
|
// Mapping of MIME type to magic numbers. Each file type can have multiple signatures.
|
|
// '??' is used as a placeholder value.
|
|
const fileSignatures = {
|
|
// Document formats.
|
|
'application/pdf': [[0x25, 0x50, 0x44, 0x46, 0x2d]],
|
|
// Archive formats:
|
|
'application/x-tar': [
|
|
[0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30],
|
|
[0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00],
|
|
],
|
|
// Compressed archive formats.
|
|
'application/x-7z-compressed': [[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]],
|
|
'application/x-bzip2': [[0x42, 0x5A, 0x68]],
|
|
'application/x-rar-compressed': [[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]],
|
|
'application/zip': [[0x50, 0x4B, 0x03, 0x04], [0x50, 0x4B, 0x05, 0x06], [0x50, 0x4B, 0x07, 0x08]],
|
|
// Image formats.
|
|
'image/bmp': [[0x42, 0x4D]],
|
|
'image/gif': [[0x47, 0x49, 0x46, 0x38]],
|
|
'image/jpeg': [[0xFF, 0xD8, 0xFF]],
|
|
'image/png': [[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]],
|
|
'image/webp': [[0x52, 0x49, 0x46, 0x46, '??', '??', '??', '??', 0x57, 0x45, 0x42, 0x50]],
|
|
// Audio/Video formats.
|
|
'application/ogg': [[0x4F, 0x67, 0x67, 0x53]],
|
|
'audio/mpeg': [[0xFF, 0xFB], [0xFF, 0xF3], [0xFF, 0xF2], [0x49, 0x44, 0x33]],
|
|
};
|
|
|
|
// TODO: Eventually add support for various container formats so that:
|
|
// * an OGG container can be resolved to OGG Audio, OGG Video
|
|
// * an HEIF container can be resolved to AVIF, HEIC
|
|
|
|
class Node {
|
|
/** @param {number} value */
|
|
constructor(value) {
|
|
this.value = value;
|
|
this.children = {};
|
|
this.mimeType = undefined;
|
|
}
|
|
}
|
|
|
|
// Top-level node in the tree.
|
|
const root = new Node();
|
|
let maxDepth = 0;
|
|
|
|
// Construct the tree, erroring if overlapping mime types are possible.
|
|
for (const mimeType in fileSignatures) {
|
|
for (const signature of fileSignatures[mimeType]) {
|
|
let curNode = root;
|
|
let depth = 0;
|
|
for (const byte of signature) {
|
|
if (curNode.children[byte] === undefined) {
|
|
if (byte === '??' && !curNode.children['??'] && Object.keys(curNode.children).length > 0) {
|
|
throw 'Cannot add a placeholder child to a node that has non-placeholder children';
|
|
} else if (byte !== '??' && curNode.children['??']) {
|
|
throw 'Cannot add a non-placeholder child to a node that has a placeholder child';
|
|
}
|
|
curNode.children[byte] = new Node(byte);
|
|
}
|
|
depth++;
|
|
curNode = curNode.children[byte];
|
|
} // for each byte
|
|
|
|
if (maxDepth < depth) {
|
|
maxDepth = depth;
|
|
}
|
|
|
|
if (curNode.mimeType) {
|
|
throw `File signature collision: ${curNode.mimeType} overlaps with ${mimeType}`;
|
|
} else if (Object.keys(curNode.children).length > 0) {
|
|
throw `${mimeType} signature is not unique, it collides with other mime types`;
|
|
}
|
|
curNode.mimeType = mimeType;
|
|
} // for each signature
|
|
}
|
|
|
|
/**
|
|
* @param {ArrayBuffer} ab
|
|
* @return {string} The MIME type of the buffer, or undefined.
|
|
*/
|
|
export function findMimeType(ab) {
|
|
const depth = ab.byteLength < maxDepth ? ab.byteLength : maxDepth;
|
|
const arr = new Uint8Array(ab).subarray(0, depth);
|
|
let curNode = root;
|
|
for (const byte of arr) {
|
|
// If this node has a placeholder child, just step into it.
|
|
if (curNode.children['??']) {
|
|
curNode = curNode.children['??'];
|
|
continue;
|
|
}
|
|
if (curNode.children[byte] === undefined) return undefined;
|
|
curNode = curNode.children[byte];
|
|
if (curNode.mimeType) return curNode.mimeType;
|
|
}
|
|
}
|