Skip to content

Commit

Permalink
fix: include zero byte tiffs when validating files TDE-1348 (#1156)
Browse files Browse the repository at this point in the history
#### Motivation

zero byte tiffs are currently just skipped when validating files

#### Modification

allow file readers to opt into listing zero byte files

#### Checklist

_If not applicable, provide explanation of why._

- [ ] Tests updated
- [ ] Docs updated
- [ ] Issue linked in Title
  • Loading branch information
blacha authored Dec 9, 2024
1 parent 19bd251 commit a1e54de
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,22 @@ describe('validate', () => {
]);
});

it('should fail with 0 byte tiffs', async () => {
await fsa.write('/tmp/empty/foo.tiff', Buffer.from(''));
const ret = await commandTileIndexValidate
.handler({
...baseArguments,
location: ['/tmp/empty/'],
retile: false,
validate: true,
scale: 1000,
forceOutput: true,
})
.catch((e: Error) => e);

assert.ok(String(ret).startsWith('Error: Tiff loading failed: '));
});

it('should not fail if duplicate tiles are detected but --retile is used', async (t) => {
// Input source/a/AS21_1000_0101.tiff source/b/AS21_1000_0101.tiff
t.mock.method(TiffLoader, 'load', () =>
Expand Down
3 changes: 2 additions & 1 deletion src/commands/tileindex-validate/tileindex.validate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ export const TiffLoader = {
* @returns Initialized tiff
*/
async load(locations: string[], args?: FileFilter): Promise<Tiff[]> {
const files = await getFiles(locations, args);
// Include 0 byte files and filter them out with {@see isTiff}
const files = await getFiles(locations, { ...args, sizeMin: 0 });
const tiffLocations = files.flat().filter(isTiff);
const startTime = performance.now();
logger.info({ count: tiffLocations.length }, 'Tiff:Load:Start');
Expand Down
6 changes: 6 additions & 0 deletions src/utils/__test__/chunk.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,10 @@ describe('getFiles', () => {
const files = await getFiles(['gf://a/;gf://b/\ngf://c/']);
assert.deepEqual(files, [['gf://a/a.txt', 'gf://b/b.txt', 'gf://c/c.txt']]);
});

it('should skip zero byte files by default', async () => {
await fsa.write('gf://a/a.txt', Buffer.from(''));
assert.deepEqual(await getFiles(['gf://a/']), []);
assert.deepEqual(await getFiles(['gf://a/'], { sizeMin: 0 }), [['gf://a/a.txt']]);
});
});
59 changes: 51 additions & 8 deletions src/utils/chunk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,52 @@ export function splitPaths(paths: string[]): string[] {
return paths.map((m) => m.split(PathSplitCharacters)).flat();
}

export type FileFilter = { include?: string; exclude?: string; limit?: number; group?: number; groupSize?: string };
export type FileFilter = {
include?: string;
exclude?: string;

/**
* Limit the number of the output files
*
* @default -1 - No limit
*/
limit?: number;

/**
* Group files into this number of items group
*
* @default -1 - No limit
*/
group?: number;

/**
* Group the files into this size groups, see {@link parseSize}
*
* @example
* ```
* 5GB // 5 GB chunks
* ```
*
* @default -1 - No limit
*/
groupSize?: string;

/**
* Files less than size are ignored
*
* @example
*
* ```typescript
* if(file.size < sizeMin) continue
* ```
* @default 1
*/
sizeMin?: number;
};
export async function getFiles(paths: string[], args: FileFilter = {}): Promise<string[][]> {
const limit = args.limit ?? -1; // no limit by default
const maxSize = parseSize(args.groupSize ?? '-1');
const minSize = args.sizeMin ?? 1; // ignore 0 byte files
const groupSize = parseSize(args.groupSize ?? '-1');
const maxLength = args.group ?? -1;
const outputFiles: FileSizeInfo[] = [];

Expand All @@ -76,18 +118,19 @@ export async function getFiles(paths: string[], args: FileFilter = {}): Promise<
const fileList = await fsa.toArray(asyncFilter(fsa.details(targetPath), args));
logger.info({ path: targetPath, fileCount: fileList.length }, 'List:Count');

let size = 0;
let totalSize = 0;
for (const file of fileList) {
// Skip empty files
if (file.size === 0) continue;
if (file.size != null) size += file.size;
if (file.size != null) {
if (file.size < minSize) continue;
totalSize += file.size;
}
outputFiles.push(file);
if (limit > 0 && outputFiles.length >= limit) break;
}
if (limit > 0 && outputFiles.length >= limit) break;

logger.info({ path: targetPath, fileCount: fileList.length, totalSize: size }, 'List:Size');
logger.info({ path: targetPath, fileCount: fileList.length, totalSize }, 'List:Size');
}

return chunkFiles(outputFiles, maxLength, maxSize);
return chunkFiles(outputFiles, maxLength, groupSize);
}

0 comments on commit a1e54de

Please sign in to comment.