From 1f8ae56690bb23573280caaeea0849dd4d0e48d9 Mon Sep 17 00:00:00 2001 From: Borewit Date: Thu, 19 Dec 2024 18:14:45 +0100 Subject: [PATCH] Sync and improve custom detector documentation --- core.d.ts | 34 +++++++++++++++++--------------- readme.md | 59 +++++++++++++++++++++++++++++++++---------------------- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/core.d.ts b/core.d.ts index d67ee617..09d1393b 100644 --- a/core.d.ts +++ b/core.d.ts @@ -116,29 +116,31 @@ export declare function fileTypeFromBlob(blob: Blob): Promise { - const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // 'UNICORN' as decimal string + const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" in ASCII decimal - const buffer = Buffer.alloc(7); + const buffer = new Uint8Array(unicornHeader.length); await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true}); if (unicornHeader.every((value, index) => value === buffer[index])) { return {ext: 'unicorn', mime: 'application/unicorn'}; @@ -148,15 +150,15 @@ const customDetectors = [ }, ]; -const buffer = Buffer.from('UNICORN'); +const buffer = new Uint8Array([85, 78, 73, 67, 79, 82, 78]); const parser = new FileTypeParser({customDetectors}); const fileType = await parser.fromBuffer(buffer); -console.log(fileType); +console.log(fileType); // {ext: 'unicorn', mime: 'application/unicorn'} ``` -@param tokenizer - The [tokenizer](https://github.com/Borewit/strtok3#tokenizer) used to read the file content from. -@param fileType - The file type detected by the standard detections or a previous custom detection, or `undefined`` if no matching file type could be found. -@returns The detected file type, or `undefined` when there is no match. +@param tokenizer - The [tokenizer](https://github.com/Borewit/strtok3#tokenizer) used to read file content. +@param fileType - The file type detected by standard or previous custom detectors, or `undefined` if no match is found. +@returns The detected file type, or `undefined` if no match is found. */ export type Detector = (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise; diff --git a/readme.md b/readme.md index e416b466..2ead1f0e 100644 --- a/readme.md +++ b/readme.md @@ -340,44 +340,55 @@ Returns a `Set` of supported MIME types. ## Custom detectors -A custom detector is a function that allows specifying custom detection mechanisms. +A custom file type detector. -Detectors can be added via the constructor options, or by adding it directly to `FileTypeParser#detectors`. +Detectors can be added via the constructor options or by directly modifying `FileTypeParser#detectors`. -The detectors provided via the constructor options, are called before the default detectors are called. +Detectors provided through the constructor options are executed before the default detectors. -Custom detectors can be used to add new `FileTypeResults` or to modify return behaviour of existing `FileTypeResult` detections. +Custom detectors allow for: +- Introducing new `FileTypeResult` entries. +- Modifying the detection behavior of existing `FileTypeResult` types. -If the detector returns `undefined`, there are 2 possible scenarios: +### Detector Execution Flow +If a detector returns `undefined`, the following rules apply: -1. The detector has not read from the tokenizer, it will be proceeded with the next available detector. -2. The detector has read from the tokenizer (`tokenizer.position` has been increased). - In that case no further detectors will be executed and the final conclusion is that file-type returns undefined. - Note that this an exceptional scenario, as the detector takes the opportunity from any other detector to determine the file type. +1. **No Tokenizer Interaction**: If the detector does not modify the tokenizer's position, the next detector in the sequence is executed. +2. **Tokenizer Interaction**: If the detector modifies the tokenizer's position (`tokenizer.position` is advanced), no further detectors are executed. In this case, the file type remains `undefined`, as subsequent detectors cannot evaluate the content. This is an exceptional scenario, as it prevents any other detectors from determining the file type. -Example detector array which can be extended and provided to each public method via the `fileTypeOptions` argument: +### Example Usage +Below is an example of a custom detector array. This can be passed to the `FileTypeParser` via the `fileTypeOptions` argument. ```js import {FileTypeParser} from 'file-type'; -const customDetector = async tokenizer => { - const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // 'UNICORN' as decimal string +const customDetectors = [ + async tokenizer => { + const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" in ASCII decimal - const buffer = new Uint8Array(7); - await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true}); + const buffer = new Uint8Array(unicornHeader.length); + await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true}); + if (unicornHeader.every((value, index) => value === buffer[index])) { + return {ext: 'unicorn', mime: 'application/unicorn'}; + } - if (unicornHeader.every((value, index) => value === buffer[index])) { - return {ext: 'unicorn', mime: 'application/unicorn'}; - } + return undefined; + }, +]; - return undefined; -}; - -const buffer = new Uint8Array(new TextEncoder().encode('UNICORN')); -const parser = new FileTypeParser(); -parser.detectors.unshift(customDetector); // Make customDetector the first detector +const buffer = new Uint8Array([85, 78, 73, 67, 79, 82, 78]); +const parser = new FileTypeParser({customDetectors}); const fileType = await parser.fromBuffer(buffer); -console.log(fileType); +console.log(fileType); // {ext: 'unicorn', mime: 'application/unicorn'} +``` + +```ts +/** +@param tokenizer - The [tokenizer](https://github.com/Borewit/strtok3#tokenizer) used to read file content. +@param fileType - The file type detected by standard or previous custom detectors, or `undefined` if no match is found. +@returns The detected file type, or `undefined` if no match is found. +*/ +export type Detector = (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise; ``` ## Abort signal