From 1c66e0ab72290af7b0b470947388a5f1ad1ee287 Mon Sep 17 00:00:00 2001 From: Borewit Date: Fri, 3 Jan 2025 11:42:04 +0100 Subject: [PATCH] Rename false positive probability classifications: - Confident (`core`): Represents detections with a high degree of certainty in identifying the correct file type. - Imprecise (`core.imprecise`): Represents detections with limited supporting data, resulting in a higher likelihood of false positives. --- .github/pull_request_template.md | 4 +++- core.js | 12 +++++++----- readme.md | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 84c18d8d..1607fe55 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -5,11 +5,13 @@ If you're adding support for a new file type, please follow the below steps: - Add the file extension to the `extensions` array in `supported.js`. - Add the file's MIME type to the `types` array in `supported.js`. - Add the file type detection logic to the `core.js` file +- Determine the appropriate detection confidence category: + - `detectConfident()`: Detections with a high degree of certainty in identifying the correct file type + - `detectImprecise()`: Detections with limited supporting data, resulting in a higher likelihood of false positives - Respect the sequence: - Signature with shorter sample size (counted from offset 0 until the last required byte position) will be executed first. - Only the initial determination for the file type counts for the sequence. - Existing signatures requiring same sample length (same *signature group*) will be tested prior to your new detections. Yours will be last. (rational: common formats first). -- Unsafe detection (higher risk false positive detection) will be performed after safe detections, they should be added to `detectUnsafe()` - Add the file extension to the `Supported file types` section of the readme in alphabetical order, in the format ```- [``](URL) - Format name```, for example, ```- [`png`](https://en.wikipedia.org/wiki/Portable_Network_Graphics) - Portable Network Graphics``` - Add the file extension to the `keywords` array in the `package.json` file. - Run `$ npm test` to ensure the tests pass. diff --git a/core.js b/core.js index a89c12e5..cfd4815f 100644 --- a/core.js +++ b/core.js @@ -130,8 +130,8 @@ export async function fileTypeStream(webStream, options) { export class FileTypeParser { constructor(options) { this.detectors = [...(options?.customDetectors ?? []), - {id: 'core.safe', detect: this.detectCore}, - {id: 'core.unsafe', detect: this.detectUnsafe}]; + {id: 'core', detect: this.detectConfident}, + {id: 'core.imprecise', detect: this.detectImprecise}]; this.tokenizerOptions = { abortSignal: options?.signal, }; @@ -233,7 +233,8 @@ export class FileTypeParser { return this.check(stringToBytes(header), options); } - detectCore = async tokenizer => { + // Detections with a high degree of certainty in identifying the correct file type + detectConfident = async tokenizer => { this.buffer = new Uint8Array(reasonableDetectionSizeInBytes); // Keep reading until EOF if the file size is unknown. @@ -323,7 +324,7 @@ export class FileTypeParser { if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM // Strip off UTF-8-BOM this.tokenizer.ignore(3); - return this.detectCore(tokenizer); + return this.detectConfident(tokenizer); } if (this.check([0x47, 0x49, 0x46])) { @@ -1590,7 +1591,8 @@ export class FileTypeParser { } }; - detectUnsafe = async tokenizer => { + // Detections with limited supporting data, resulting in a higher likelihood of false positives + detectImprecise = async tokenizer => { this.buffer = new Uint8Array(reasonableDetectionSizeInBytes); // Read initial sample size of 8 bytes diff --git a/readme.md b/readme.md index cfc4ee1d..43915f15 100644 --- a/readme.md +++ b/readme.md @@ -365,7 +365,7 @@ Below is an example of a custom detector array. This can be passed to the `FileT import {FileTypeParser} from 'file-type'; const unicornDetector = { - id: 'unicorn', + id: 'unicorn', // May be used to recognize the detector in the detector list async detect(tokenizer) { const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" in ASCII decimal