From d56d117d144caa681635960a390ac9404f5652b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=93lafur=20P=C3=A1ll=20Geirsson?= Date: Tue, 11 Jul 2023 11:33:13 +0200 Subject: [PATCH] Skip indexing large files above 1mb, add option `--max-file-byte-size` (#271) * Add configurable option to skip large files Previously, scip-typescript indexed all files regardless of file size. This could result in scip-typescript stalling progress to index very large files that were (frequently) auto-generated. This commit changes the default behavior to skip indexing files that are larger than 1mb, and makes this threshold configurable via the new `--max-file-byte-size` flag. * Print out when large files are skipped --- src/CommandLineOptions.ts | 27 ++++++++++++++--- src/FileIndexer.ts | 18 ++++++++++++ src/parseHumanByteSizeIntoNumber.test.ts | 37 ++++++++++++++++++++++++ src/parseHumanByteSizeIntoNumber.ts | 32 ++++++++++++++++++++ 4 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 src/parseHumanByteSizeIntoNumber.test.ts create mode 100644 src/parseHumanByteSizeIntoNumber.ts diff --git a/src/CommandLineOptions.ts b/src/CommandLineOptions.ts index 53880baf..e8b1a2f9 100644 --- a/src/CommandLineOptions.ts +++ b/src/CommandLineOptions.ts @@ -4,6 +4,7 @@ import ts from 'typescript' import packageJson from '../package.json' +import { parseHumanByteSizeIntoNumber } from './parseHumanByteSizeIntoNumber' import * as scip from './scip' /** Configuration options to index a multi-project workspace. */ @@ -14,6 +15,8 @@ export interface MultiProjectOptions { yarnBerryWorkspaces: boolean pnpmWorkspaces: boolean globalCaches: boolean + maxFileByteSize?: string + maxFileByteSizeNumber?: number cwd: string output: string indexedProjects: Set @@ -36,7 +39,7 @@ export interface GlobalCache { } export function mainCommand( - indexAction: (projects: string[], otpions: MultiProjectOptions) => void + indexAction: (projects: string[], options: MultiProjectOptions) => void ): Command { const command = new Command() command @@ -67,12 +70,28 @@ export function mainCommand( '--no-global-caches', 'whether to disable global caches between TypeScript projects' ) + .option( + '--max-file-byte-size ', + 'skip files that have a larger byte size than the provided value. Supported formats: 1kb, 1mb, 1gb.', + '1mb' + ) .argument('[projects...]') .action((parsedProjects, parsedOptions) => { - indexAction( - parsedProjects as string[], - parsedOptions as MultiProjectOptions + const options = parsedOptions as MultiProjectOptions + + // Parse and validate human-provided --max-file-byte-size value + options.maxFileByteSizeNumber = parseHumanByteSizeIntoNumber( + options.maxFileByteSize ?? '1mb' ) + if (isNaN(options.maxFileByteSizeNumber)) { + console.error( + `invalid byte size '${options.maxFileByteSize}'. To fix this problem, change the value of the flag --max-file-byte-size to use a valid byte size format: 1kb, 1mb, 1gb.` + ) + process.exitCode = 1 + return + } + + indexAction(parsedProjects as string[], options) }) return command } diff --git a/src/FileIndexer.ts b/src/FileIndexer.ts index 1f889d42..044e8c53 100644 --- a/src/FileIndexer.ts +++ b/src/FileIndexer.ts @@ -15,6 +15,7 @@ import { } from './Descriptor' import { Input } from './Input' import { Packages } from './Packages' +import { formatByteSizeAsHumanReadable } from './parseHumanByteSizeIntoNumber' import { Range } from './Range' import * as scip from './scip' import { ScipSymbol } from './ScipSymbol' @@ -42,6 +43,23 @@ export class FileIndexer { // if (!this.sourceFile.fileName.includes('constructor')) { // return // } + + const byteSize = Buffer.from(this.sourceFile.getText()).length + if ( + this.options.maxFileByteSizeNumber && + byteSize > this.options.maxFileByteSizeNumber + ) { + const humanSize = formatByteSizeAsHumanReadable(byteSize) + const humanMaxSize = formatByteSizeAsHumanReadable( + this.options.maxFileByteSizeNumber + ) + console.log( + `info: skipping file '${this.sourceFile.fileName}' because it has byte size ${humanSize} that exceeds the maximum threshold ${humanMaxSize}. ` + + 'If you intended to index this file, use the flag --max-file-byte-size to configure the maximum file size threshold.' + ) + return + } + this.emitSourceFileOccurrence() this.visit(this.sourceFile) } diff --git a/src/parseHumanByteSizeIntoNumber.test.ts b/src/parseHumanByteSizeIntoNumber.test.ts new file mode 100644 index 00000000..8c7b0651 --- /dev/null +++ b/src/parseHumanByteSizeIntoNumber.test.ts @@ -0,0 +1,37 @@ +import { test } from 'uvu' +import * as assert from 'uvu/assert' + +import { parseHumanByteSizeIntoNumber } from './parseHumanByteSizeIntoNumber' + +function checkHumanByteSize( + humanInput: string, + expectedByteNumber: number +): void { + test(humanInput, () => { + const obtained = parseHumanByteSizeIntoNumber(humanInput) + assert.equal(obtained, expectedByteNumber) + }) +} + +// Invalid formats +checkHumanByteSize('invalid', NaN) +checkHumanByteSize('15tb', NaN) +checkHumanByteSize('15b', NaN) + +// All numeral +checkHumanByteSize('1001', 1001) + +// All lowercase +checkHumanByteSize('1.2kb', 1_200) +checkHumanByteSize('1.2mb', 1_200_000) +checkHumanByteSize('1.2gb', 1_200_000_000) + +// All uppercase +checkHumanByteSize('1.2KB', 1_200) +checkHumanByteSize('1.2MB', 1_200_000) +checkHumanByteSize('1.2GB', 1_200_000_000) + +// Mixed case +checkHumanByteSize('1.2Kb', 1_200) +checkHumanByteSize('1.2Mb', 1_200_000) +checkHumanByteSize('1.2Gb', 1_200_000_000) diff --git a/src/parseHumanByteSizeIntoNumber.ts b/src/parseHumanByteSizeIntoNumber.ts new file mode 100644 index 00000000..ef82b081 --- /dev/null +++ b/src/parseHumanByteSizeIntoNumber.ts @@ -0,0 +1,32 @@ +const kilo = 1_000 +const mega = 1_000_000 +const giga = 1_000_000_000 + +export function parseHumanByteSizeIntoNumber(humanByteSize: string): number { + let value = humanByteSize.toLowerCase() + let multiplier = 1 + if (value.endsWith('kb')) { + multiplier = kilo + value = value.slice(0, -2) + } else if (value.endsWith('mb')) { + multiplier = mega + value = value.slice(0, -2) + } else if (value.endsWith('gb')) { + multiplier = giga + value = value.slice(0, -2) + } + return Number.parseFloat(value) * multiplier +} + +export function formatByteSizeAsHumanReadable(byteSize: number): string { + if (byteSize > giga) { + return `${byteSize / giga}gb` + } + if (byteSize > mega) { + return `${byteSize / mega}mb` + } + if (byteSize > kilo) { + return `${byteSize / kilo}kb` + } + return byteSize.toString() +}