1
0
Fork 0
mirror of https://github.com/codedread/bitjs synced 2025-10-06 10:49:55 +02:00

Add some better documentation for the file sniffer.

This commit is contained in:
Jeff Schiller 2023-02-15 22:52:02 -08:00
parent 83a07c970d
commit 871915f070

View file

@ -7,35 +7,57 @@
* Copyright(c) 2020 Google Inc. * Copyright(c) 2020 Google Inc.
*/ */
// A selection from https://en.wikipedia.org/wiki/List_of_file_signatures. // There are basically two major "container" families for modern audio-visual formats:
// 1) the ISO-BMFF family (MP4, HEVC, AVIF, etc)
// 2) the Matroska family (MKV, WebM, WebP)
// The ISO-BMFF container needs special processing because of its "compatible brands" array :(
// The Matroska container needs special processing because the sub-type can appear anywhere :(
// NOTE: Because the ICO format also starts with a couple zero bytes, this tree will rely on the
// File Type box never going beyond 255 bytes in length which, seems unlikely according to
// https://dev.to/alfg/a-quick-dive-into-mp4-57fo.
// 'ISO-BMFF': [[0x00, 0x00, 0x00, '??', 0x66, 0x74, 0x79, 0x70]], // box_length, then 'ftyp'
// 'MATROSKA': [[0x1A, 0x45, 0xDF, 0xA3]]
// A subset of "modern" formats from https://en.wikipedia.org/wiki/List_of_file_signatures.
// Mapping of MIME type to magic numbers. Each file type can have multiple signatures. // Mapping of MIME type to magic numbers. Each file type can have multiple signatures.
// '??' is used as a placeholder value. // '??' is used as a placeholder value.
const fileSignatures = { const fileSignatures = {
// Document formats. // Document formats.
'application/pdf': [[0x25, 0x50, 0x44, 0x46, 0x2d]], 'application/pdf': [[0x25, 0x50, 0x44, 0x46, 0x2d]], // '%PDF-'
// Archive formats: // Archive formats:
'application/x-tar': [ 'application/x-tar': [ // 'ustar'
[0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30], [0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30],
[0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00], [0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00],
], ],
'application/x-7z-compressed': [[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]], 'application/x-7z-compressed': [[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]], // '7z'
'application/x-bzip2': [[0x42, 0x5A, 0x68]], 'application/x-bzip2': [[0x42, 0x5A, 0x68]], // 'BZh'
'application/x-rar-compressed': [[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]], 'application/x-rar-compressed': [[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]], // 'Rar!'
'application/zip': [[0x50, 0x4B, 0x03, 0x04], [0x50, 0x4B, 0x05, 0x06], [0x50, 0x4B, 0x07, 0x08]], 'application/zip': [ // 'PK'
[0x50, 0x4B, 0x03, 0x04],
[0x50, 0x4B, 0x05, 0x06],
[0x50, 0x4B, 0x07, 0x08],
],
// Image formats. // Image formats.
'image/bmp': [[0x42, 0x4D]], 'image/bmp': [[0x42, 0x4D]], // 'BM'
'image/gif': [[0x47, 0x49, 0x46, 0x38]], 'image/gif': [[0x47, 0x49, 0x46, 0x38]], // 'GIF8'
'image/jpeg': [[0xFF, 0xD8, 0xFF]], 'image/jpeg': [[0xFF, 0xD8, 0xFF]],
'image/png': [[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]], 'image/png': [[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]],
'image/webp': [[0x52, 0x49, 0x46, 0x46, '??', '??', '??', '??', 0x57, 0x45, 0x42, 0x50]], 'image/webp': [[0x52, 0x49, 0x46, 0x46, '??', '??', '??', '??', 0x57, 0x45, 0x42, 0x50]], // 'RIFF....WEBP'
'image/x-icon': [[0x00, 0x00, 0x01, 0x00]], 'image/x-icon': [[0x00, 0x00, 0x01, 0x00]],
// Audio/Video formats. // Audio/Video formats.
'application/ogg': [[0x4F, 0x67, 0x67, 0x53]], 'application/ogg': [[0x4F, 0x67, 0x67, 0x53]], // 'OggS'
'audio/flac': [[0x66, 0x4C, 0x61, 0x43]], 'audio/flac': [[0x66, 0x4C, 0x61, 0x43]], // 'fLaC'
'audio/mpeg': [[0xFF, 0xFB], [0xFF, 0xF3], [0xFF, 0xF2], [0x49, 0x44, 0x33]], 'audio/mpeg': [
[0xFF, 0xFB],
[0xFF, 0xF3],
[0xFF, 0xF2],
[0x49, 0x44, 0x33], // 'ID3'
],
}; };
// TODO: Eventually add support for various container formats so that: // TODO: Eventually add support for various container formats so that: