Skip to content

Commit

Permalink
fix(stac-validate): cache json schema objects to reduce network failu…
Browse files Browse the repository at this point in the history
…res TDE-1212 (#1029)

#### Motivation

_What does this change aim to achieve?_

We are seeing quite a few network related failures when validating JSON
schema objects

#### Modification

Cache JSON schema objects locally when the docker container is built

_Why is this change being made? What implications or other
considerations are there?_

#### Checklist

_If not applicable, provide explanation of why._

- [ ] Tests updated
- [ ] Docs updated
- [ ] Issue linked in Title
  • Loading branch information
blacha authored Jul 23, 2024
1 parent f40e69a commit 0338068
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 3 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
node_modules
build
build
json-schema-cache
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@ ADD package.json package-lock.json /app/
RUN npm install --omit=dev
ADD build/src /app/

# Cache of copy of the STAC JSON schemas by triggering a validation run
RUN node /app/index.js stac-validate https://nz-imagery.s3-ap-southeast-2.amazonaws.com/new-zealand/new-zealand_2020-2021_10m/rgb/2193/collection.json

ENTRYPOINT ["node", "/app/index.js"]
27 changes: 25 additions & 2 deletions src/commands/stac-validate/stac.validate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { fsa } from '@chunkd/fs';
import Ajv, { DefinedError, SchemaObject, ValidateFunction } from 'ajv';
import { fastFormats } from 'ajv-formats/dist/formats.js';
import { boolean, command, flag, number, option, restPositionals, string } from 'cmd-ts';
import { createHash } from 'crypto';
import { dirname, join } from 'path';
import { performance } from 'perf_hooks';
import * as st from 'stac-ts';
Expand All @@ -13,6 +14,27 @@ import { hashStream } from '../../utils/hash.js';
import { Sha256Prefix } from '../../utils/hash.js';
import { config, registerCli, verbose } from '../common.js';

/**
* Store a local copy of JSON schemas into a cache directory
*
* This is to prevent overloading the remote hosts as stac validation can trigger lots of schema requests
*
* @param url JSON schema to load
* @returns object from the cache if it exists or directly from the uri
*/
async function readSchema(url: string): Promise<object> {
const cacheId = createHash('sha256').update(url).digest('hex');
const cachePath = `./json-schema-cache/${cacheId}.json`;
try {
return await fsa.readJson<object>(cachePath);
} catch (e) {
return fsa.readJson<object>(url).then(async (obj) => {
await fsa.write(cachePath, JSON.stringify(obj));
return obj;
});
}
}

export const commandStacValidate = command({
name: 'stac-validate',
description: 'Validate STAC files',
Expand Down Expand Up @@ -78,8 +100,9 @@ export const commandStacValidate = command({
strict: args.strict,
loadSchema: (uri: string): Promise<SchemaObject> => {
let existing = Schemas.get(uri);

if (existing == null) {
existing = fsa.readJson(uri);
existing = readSchema(uri);
Schemas.set(uri, existing);
}
return existing;
Expand All @@ -99,7 +122,7 @@ export const commandStacValidate = command({
if (schema != null) return schema;
let existing = ajvSchema.get(uri);
if (existing == null) {
existing = fsa.readJson<object>(uri).then((json) => ajv.compileAsync(json));
existing = readSchema(uri).then((json) => ajv.compileAsync(json));
ajvSchema.set(uri, existing);
}
return existing;
Expand Down

0 comments on commit 0338068

Please sign in to comment.