diff --git a/file/sniffer.js b/file/sniffer.js index 7218fd9..1551e67 100644 --- a/file/sniffer.js +++ b/file/sniffer.js @@ -7,35 +7,57 @@ * Copyright(c) 2020 Google Inc. */ -// A selection from https://en.wikipedia.org/wiki/List_of_file_signatures. +// There are basically two major "container" families for modern audio-visual formats: +// 1) the ISO-BMFF family (MP4, HEVC, AVIF, etc) +// 2) the Matroska family (MKV, WebM, WebP) + +// The ISO-BMFF container needs special processing because of its "compatible brands" array :( +// The Matroska container needs special processing because the sub-type can appear anywhere :( + +// NOTE: Because the ICO format also starts with a couple zero bytes, this tree will rely on the +// File Type box never going beyond 255 bytes in length which, seems unlikely according to +// https://dev.to/alfg/a-quick-dive-into-mp4-57fo. +// 'ISO-BMFF': [[0x00, 0x00, 0x00, '??', 0x66, 0x74, 0x79, 0x70]], // box_length, then 'ftyp' +// 'MATROSKA': [[0x1A, 0x45, 0xDF, 0xA3]] + +// A subset of "modern" formats from https://en.wikipedia.org/wiki/List_of_file_signatures. // Mapping of MIME type to magic numbers. Each file type can have multiple signatures. // '??' is used as a placeholder value. const fileSignatures = { // Document formats. - 'application/pdf': [[0x25, 0x50, 0x44, 0x46, 0x2d]], + 'application/pdf': [[0x25, 0x50, 0x44, 0x46, 0x2d]], // '%PDF-' // Archive formats: - 'application/x-tar': [ + 'application/x-tar': [ // 'ustar' [0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30], [0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00], ], - 'application/x-7z-compressed': [[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]], - 'application/x-bzip2': [[0x42, 0x5A, 0x68]], - 'application/x-rar-compressed': [[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]], - 'application/zip': [[0x50, 0x4B, 0x03, 0x04], [0x50, 0x4B, 0x05, 0x06], [0x50, 0x4B, 0x07, 0x08]], + 'application/x-7z-compressed': [[0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]], // '7z' + 'application/x-bzip2': [[0x42, 0x5A, 0x68]], // 'BZh' + 'application/x-rar-compressed': [[0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]], // 'Rar!' + 'application/zip': [ // 'PK' + [0x50, 0x4B, 0x03, 0x04], + [0x50, 0x4B, 0x05, 0x06], + [0x50, 0x4B, 0x07, 0x08], + ], // Image formats. - 'image/bmp': [[0x42, 0x4D]], - 'image/gif': [[0x47, 0x49, 0x46, 0x38]], + 'image/bmp': [[0x42, 0x4D]], // 'BM' + 'image/gif': [[0x47, 0x49, 0x46, 0x38]], // 'GIF8' 'image/jpeg': [[0xFF, 0xD8, 0xFF]], 'image/png': [[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]], - 'image/webp': [[0x52, 0x49, 0x46, 0x46, '??', '??', '??', '??', 0x57, 0x45, 0x42, 0x50]], + 'image/webp': [[0x52, 0x49, 0x46, 0x46, '??', '??', '??', '??', 0x57, 0x45, 0x42, 0x50]], // 'RIFF....WEBP' 'image/x-icon': [[0x00, 0x00, 0x01, 0x00]], // Audio/Video formats. - 'application/ogg': [[0x4F, 0x67, 0x67, 0x53]], - 'audio/flac': [[0x66, 0x4C, 0x61, 0x43]], - 'audio/mpeg': [[0xFF, 0xFB], [0xFF, 0xF3], [0xFF, 0xF2], [0x49, 0x44, 0x33]], + 'application/ogg': [[0x4F, 0x67, 0x67, 0x53]], // 'OggS' + 'audio/flac': [[0x66, 0x4C, 0x61, 0x43]], // 'fLaC' + 'audio/mpeg': [ + [0xFF, 0xFB], + [0xFF, 0xF3], + [0xFF, 0xF2], + [0x49, 0x44, 0x33], // 'ID3' + ], }; // TODO: Eventually add support for various container formats so that: