Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace JsonSchema with ajv for dictionary validation #304

Merged
merged 1 commit into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"plugin:jsonc/recommended-with-json"
],
"parserOptions": {
"ecmaVersion": 9,
"ecmaVersion": 11,
"sourceType": "script",
"ecmaFeatures": {
"globalReturn": false,
Expand Down Expand Up @@ -401,7 +401,8 @@
"DynamicProperty": "readonly",
"EventDispatcher": "readonly",
"EventListenerCollection": "readonly",
"Logger": "readonly"
"Logger": "readonly",
"import": "readonly"
}
},
{
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ dictionaries/
/playwright/.cache/
/test/playwright/__screenshots__/
ext/manifest.json
ext/lib/validate-schemas.js
16 changes: 15 additions & 1 deletion dev/build.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ const childProcess = require('child_process');
const util = require('./util');
const {getAllFiles, getArgs, testMain} = util;
const {ManifestUtil} = require('./manifest-util');

const Ajv = require('ajv');
const standaloneCode = require('ajv/dist/standalone').default;

async function createZip(directory, excludeFiles, outputFileName, sevenZipExes, onUpdate, dryRun) {
try {
Expand Down Expand Up @@ -130,6 +131,19 @@ async function build(buildDir, extDir, manifestUtil, variantNames, manifestPath,
process.stdout.write(message);
};

process.stdout.write('Building schema validators using ajv\n');
const schemaDir = path.join(extDir, 'data/schemas/');
const schemaFileNames = fs.readdirSync(schemaDir);
const schemas = schemaFileNames.map((schemaFileName) => JSON.parse(fs.readFileSync(path.join(schemaDir, schemaFileName))));
const ajv = new Ajv({schemas: schemas, code: {source: true, esm: true}});
const moduleCode = standaloneCode(ajv);

// https://github.com/ajv-validator/ajv/issues/2209
const patchedModuleCode = moduleCode.replaceAll('require("ajv/dist/runtime/ucs2length").default', 'import("/lib/ucs2length.js").default');

fs.writeFileSync(path.join(extDir, 'lib/validate-schemas.js'), patchedModuleCode);


process.stdout.write(`Version: ${yomitanVersion}...\n`);

for (const variantName of variantNames) {
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/custom-audio-list-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "customAudioList",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-index-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryIndex",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"description": "Index file containing information about the data contained in the dictionary.",
Expand Down
3 changes: 2 additions & 1 deletion ext/data/schemas/dictionary-kanji-bank-v1-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryKanjiBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
Expand Down Expand Up @@ -30,4 +31,4 @@
"description": "A meaning for the kanji character."
}
}
}
}
3 changes: 2 additions & 1 deletion ext/data/schemas/dictionary-kanji-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryKanjiBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
Expand Down Expand Up @@ -42,4 +43,4 @@
}
]
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryKanjiMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-tag-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTagBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing tag information for terms and kanji.",
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-term-bank-v1-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTermBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing term information.",
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-term-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTermBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"structuredContent": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTermMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/options-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "options",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
Expand Down
99 changes: 25 additions & 74 deletions ext/js/language/dictionary-importer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

/* global
* JSZip
* JsonSchema
* MediaUtil
*/

Expand Down Expand Up @@ -51,8 +50,10 @@ class DictionaryImporter {

const index = JSON.parse(await indexFile.async('string'));

const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json');
this._validateJsonSchema(index, indexSchema, indexFileName);
const ajvSchemas = await import('/lib/validate-schemas.js');
if (!ajvSchemas.dictionaryIndex(index)) {
throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName);
}

const dictionaryTitle = index.title;
const version = index.format || index.version;
Expand All @@ -75,8 +76,7 @@ class DictionaryImporter {

// Load schemas
this._progressNextStep(0);
const dataBankSchemaPaths = this._getDataBankSchemaPaths(version);
const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path)));
const dataBankSchemas = this._getDataBankSchemas(version);

// Files
const termFiles = this._getArchiveFiles(archive, 'term_bank_?.json');
Expand All @@ -87,11 +87,11 @@ class DictionaryImporter {

// Load data
this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length);
const termList = await this._readFileSequence(termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
const termMetaList = await this._readFileSequence(termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
const kanjiList = await this._readFileSequence(kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
const tagList = await this._readFileSequence(tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
const termList = await this._readFileSequence(ajvSchemas, termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
const termMetaList = await this._readFileSequence(ajvSchemas, termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
const kanjiList = await this._readFileSequence(ajvSchemas, kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
const kanjiMetaList = await this._readFileSequence(ajvSchemas, kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
const tagList = await this._readFileSequence(ajvSchemas, tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
this._addOldIndexTags(index, tagList, dictionaryTitle);

// Prefix wildcard support
Expand Down Expand Up @@ -214,68 +214,27 @@ class DictionaryImporter {
return summary;
}

async _getSchema(fileName) {
const schema = await this._fetchJsonAsset(fileName);
return new JsonSchema(schema);
}

_validateJsonSchema(value, schema, fileName) {
try {
schema.validate(value);
} catch (e) {
throw this._formatSchemaError(e, fileName);
}
}

_formatSchemaError(e, fileName) {
const valuePathString = this._getSchemaErrorPathString(e.valueStack, 'dictionary');
const schemaPathString = this._getSchemaErrorPathString(e.schemaStack, 'schema');

const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`);
e2.data = e;
_formatAjvSchemaError(schema, fileName) {
const e2 = new Error(`Dictionary has invalid data in '${fileName}'`);
e2.data = schema.errors;

return e2;
}

_getSchemaErrorPathString(infoList, base='') {
let result = base;
for (const {path} of infoList) {
const pathArray = Array.isArray(path) ? path : [path];
for (const pathPart of pathArray) {
if (pathPart === null) {
result = base;
} else {
switch (typeof pathPart) {
case 'string':
if (result.length > 0) {
result += '.';
}
result += pathPart;
break;
case 'number':
result += `[${pathPart}]`;
break;
}
}
}
}
return result;
}

_getDataBankSchemaPaths(version) {
_getDataBankSchemas(version) {
const termBank = (
version === 1 ?
'/data/schemas/dictionary-term-bank-v1-schema.json' :
'/data/schemas/dictionary-term-bank-v3-schema.json'
'dictionaryTermBankV1' :
'dictionaryTermBankV3'
);
const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json';
const termMetaBank = 'dictionaryTermMetaBankV3';
const kanjiBank = (
version === 1 ?
'/data/schemas/dictionary-kanji-bank-v1-schema.json' :
'/data/schemas/dictionary-kanji-bank-v3-schema.json'
'dictionaryKanjiBankV1' :
'dictionaryKanjiBankV3'
);
const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json';
const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json';
const kanjiMetaBank = 'dictionaryKanjiMetaBankV3';
const tagBank = 'dictionaryTagBankV3';

return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
}
Expand Down Expand Up @@ -539,28 +498,20 @@ class DictionaryImporter {
return results;
}

async _readFileSequence(files, convertEntry, schema, dictionaryTitle) {
async _readFileSequence(ajvSchemas, files, convertEntry, schemaName, dictionaryTitle) {
const progressData = this._progressData;
let count = 0;
let startIndex = 0;
if (typeof this._onProgress === 'function') {
schema.progressInterval = 1000;
schema.progress = (s) => {
const index = s.getValueStackLength() > 1 ? s.getValueStackItem(1).path : 0;
progressData.index = startIndex + (index / count);
this._progress();
};
}

const results = [];
for (const file of files) {
const entries = JSON.parse(await file.async('string'));

count = Array.isArray(entries) ? Math.max(entries.length, 1) : 1;
startIndex = progressData.index;
this._progress();

this._validateJsonSchema(entries, schema, file.name);
if (!ajvSchemas[schemaName](entries)) {
throw this._formatAjvSchemaError(ajvSchemas[schemaName], file.name);
}

progressData.index = startIndex + 1;
this._progress();
Expand Down
16 changes: 16 additions & 0 deletions ext/lib/ucs2length.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export default function ucs2length(str) {
const len = str.length;
let length = 0;
let pos = 0;
let value;
while (pos < len) {
length++;
value = str.charCodeAt(pos++);
if (value >= 0xd800 && value <= 0xdbff && pos < len) {
// high surrogate, and there is a next character
value = str.charCodeAt(pos);
if ((value & 0xfc00) === 0xdc00) pos++; // low surrogate
}
}
return length;
}
33 changes: 32 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
},
"devDependencies": {
"@playwright/test": "^1.39.0",
"ajv": "^8.11.0",
"@types/node": "^20.8.10",
"ajv": "^8.12.0",
"browserify": "^17.0.0",
"css": "^3.0.0",
"eslint": "^8.52.0",
Expand Down
Loading