From 376151096431d4362e4baaacf0cef4a534e169f7 Mon Sep 17 00:00:00 2001 From: Darius Jahandarie Date: Fri, 3 Nov 2023 23:32:33 +0900 Subject: [PATCH] Replace JsonSchema with ajv for dictionary validation --- .eslintrc.json | 5 +- .gitignore | 1 + dev/build.js | 16 ++- .../schemas/custom-audio-list-schema.json | 1 + ext/data/schemas/dictionary-index-schema.json | 1 + .../dictionary-kanji-bank-v1-schema.json | 3 +- .../dictionary-kanji-bank-v3-schema.json | 3 +- .../dictionary-kanji-meta-bank-v3-schema.json | 1 + .../dictionary-tag-bank-v3-schema.json | 1 + .../dictionary-term-bank-v1-schema.json | 1 + .../dictionary-term-bank-v3-schema.json | 1 + .../dictionary-term-meta-bank-v3-schema.json | 1 + ext/data/schemas/options-schema.json | 1 + ext/js/language/dictionary-importer.js | 99 +++++-------------- ext/lib/ucs2length.js | 16 +++ package-lock.json | 33 ++++++- package.json | 3 +- 17 files changed, 106 insertions(+), 81 deletions(-) create mode 100644 ext/lib/ucs2length.js diff --git a/.eslintrc.json b/.eslintrc.json index 56bbcf0952..a7fb842b07 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -5,7 +5,7 @@ "plugin:jsonc/recommended-with-json" ], "parserOptions": { - "ecmaVersion": 9, + "ecmaVersion": 11, "sourceType": "script", "ecmaFeatures": { "globalReturn": false, @@ -401,7 +401,8 @@ "DynamicProperty": "readonly", "EventDispatcher": "readonly", "EventListenerCollection": "readonly", - "Logger": "readonly" + "Logger": "readonly", + "import": "readonly" } }, { diff --git a/.gitignore b/.gitignore index 405fead0aa..426db4ad30 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ dictionaries/ /playwright/.cache/ /test/playwright/__screenshots__/ ext/manifest.json +ext/lib/validate-schemas.js diff --git a/dev/build.js b/dev/build.js index 24b1e2d0f5..3bfb5418f0 100644 --- a/dev/build.js +++ b/dev/build.js @@ -24,7 +24,8 @@ const childProcess = require('child_process'); const util = require('./util'); const {getAllFiles, getArgs, testMain} = util; const {ManifestUtil} = require('./manifest-util'); - +const Ajv = require('ajv'); +const standaloneCode = require('ajv/dist/standalone').default; async function createZip(directory, excludeFiles, outputFileName, sevenZipExes, onUpdate, dryRun) { try { @@ -130,6 +131,19 @@ async function build(buildDir, extDir, manifestUtil, variantNames, manifestPath, process.stdout.write(message); }; + process.stdout.write('Building schema validators using ajv\n'); + const schemaDir = path.join(extDir, 'data/schemas/'); + const schemaFileNames = fs.readdirSync(schemaDir); + const schemas = schemaFileNames.map((schemaFileName) => JSON.parse(fs.readFileSync(path.join(schemaDir, schemaFileName)))); + const ajv = new Ajv({schemas: schemas, code: {source: true, esm: true}}); + const moduleCode = standaloneCode(ajv); + + // https://github.com/ajv-validator/ajv/issues/2209 + const patchedModuleCode = moduleCode.replaceAll('require("ajv/dist/runtime/ucs2length").default', 'import("/lib/ucs2length.js").default'); + + fs.writeFileSync(path.join(extDir, 'lib/validate-schemas.js'), patchedModuleCode); + + process.stdout.write(`Version: ${yomitanVersion}...\n`); for (const variantName of variantNames) { diff --git a/ext/data/schemas/custom-audio-list-schema.json b/ext/data/schemas/custom-audio-list-schema.json index 2cb3ca78b1..885ad08761 100644 --- a/ext/data/schemas/custom-audio-list-schema.json +++ b/ext/data/schemas/custom-audio-list-schema.json @@ -1,4 +1,5 @@ { + "$id": "customAudioList", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "required": [ diff --git a/ext/data/schemas/dictionary-index-schema.json b/ext/data/schemas/dictionary-index-schema.json index a8ca0f2375..98b2714396 100644 --- a/ext/data/schemas/dictionary-index-schema.json +++ b/ext/data/schemas/dictionary-index-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryIndex", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "description": "Index file containing information about the data contained in the dictionary.", diff --git a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json index 5aca2d6ac7..d506a19d6a 100644 --- a/ext/data/schemas/dictionary-kanji-bank-v1-schema.json +++ b/ext/data/schemas/dictionary-kanji-bank-v1-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryKanjiBankV1", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing kanji information.", @@ -30,4 +31,4 @@ "description": "A meaning for the kanji character." } } -} \ No newline at end of file +} diff --git a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json index ee5082946b..763ce3b142 100644 --- a/ext/data/schemas/dictionary-kanji-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-kanji-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryKanjiBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing kanji information.", @@ -42,4 +43,4 @@ } ] } -} \ No newline at end of file +} diff --git a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json index e478de93ff..d8f5031bd6 100644 --- a/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryKanjiMetaBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "frequency": { diff --git a/ext/data/schemas/dictionary-tag-bank-v3-schema.json b/ext/data/schemas/dictionary-tag-bank-v3-schema.json index f77211190b..ab6e3377ee 100644 --- a/ext/data/schemas/dictionary-tag-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-tag-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTagBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing tag information for terms and kanji.", diff --git a/ext/data/schemas/dictionary-term-bank-v1-schema.json b/ext/data/schemas/dictionary-term-bank-v1-schema.json index 9366e9ff61..ab4c49f613 100644 --- a/ext/data/schemas/dictionary-term-bank-v1-schema.json +++ b/ext/data/schemas/dictionary-term-bank-v1-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTermBankV1", "$schema": "http://json-schema.org/draft-07/schema#", "type": "array", "description": "Data file containing term information.", diff --git a/ext/data/schemas/dictionary-term-bank-v3-schema.json b/ext/data/schemas/dictionary-term-bank-v3-schema.json index 335144c7b4..7d0b4868cf 100644 --- a/ext/data/schemas/dictionary-term-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-term-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTermBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "structuredContent": { diff --git a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json index eb4d3feda9..86e4af9331 100644 --- a/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json +++ b/ext/data/schemas/dictionary-term-meta-bank-v3-schema.json @@ -1,4 +1,5 @@ { + "$id": "dictionaryTermMetaBankV3", "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { "frequency": { diff --git a/ext/data/schemas/options-schema.json b/ext/data/schemas/options-schema.json index 601f5d06ec..8ccbfa94e1 100644 --- a/ext/data/schemas/options-schema.json +++ b/ext/data/schemas/options-schema.json @@ -1,4 +1,5 @@ { + "$id": "options", "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", "required": [ diff --git a/ext/js/language/dictionary-importer.js b/ext/js/language/dictionary-importer.js index 718d9f1cf3..0cf3d5f5bd 100644 --- a/ext/js/language/dictionary-importer.js +++ b/ext/js/language/dictionary-importer.js @@ -18,7 +18,6 @@ /* global * JSZip - * JsonSchema * MediaUtil */ @@ -51,8 +50,10 @@ class DictionaryImporter { const index = JSON.parse(await indexFile.async('string')); - const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json'); - this._validateJsonSchema(index, indexSchema, indexFileName); + const ajvSchemas = await import('/lib/validate-schemas.js'); + if (!ajvSchemas.dictionaryIndex(index)) { + throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName); + } const dictionaryTitle = index.title; const version = index.format || index.version; @@ -75,8 +76,7 @@ class DictionaryImporter { // Load schemas this._progressNextStep(0); - const dataBankSchemaPaths = this._getDataBankSchemaPaths(version); - const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path))); + const dataBankSchemas = this._getDataBankSchemas(version); // Files const termFiles = this._getArchiveFiles(archive, 'term_bank_?.json'); @@ -87,11 +87,11 @@ class DictionaryImporter { // Load data this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length); - const termList = await this._readFileSequence(termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle); - const termMetaList = await this._readFileSequence(termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle); - const kanjiList = await this._readFileSequence(kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle); - const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle); - const tagList = await this._readFileSequence(tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle); + const termList = await this._readFileSequence(ajvSchemas, termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle); + const termMetaList = await this._readFileSequence(ajvSchemas, termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle); + const kanjiList = await this._readFileSequence(ajvSchemas, kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle); + const kanjiMetaList = await this._readFileSequence(ajvSchemas, kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle); + const tagList = await this._readFileSequence(ajvSchemas, tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle); this._addOldIndexTags(index, tagList, dictionaryTitle); // Prefix wildcard support @@ -214,68 +214,27 @@ class DictionaryImporter { return summary; } - async _getSchema(fileName) { - const schema = await this._fetchJsonAsset(fileName); - return new JsonSchema(schema); - } - - _validateJsonSchema(value, schema, fileName) { - try { - schema.validate(value); - } catch (e) { - throw this._formatSchemaError(e, fileName); - } - } - - _formatSchemaError(e, fileName) { - const valuePathString = this._getSchemaErrorPathString(e.valueStack, 'dictionary'); - const schemaPathString = this._getSchemaErrorPathString(e.schemaStack, 'schema'); - - const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`); - e2.data = e; + _formatAjvSchemaError(schema, fileName) { + const e2 = new Error(`Dictionary has invalid data in '${fileName}'`); + e2.data = schema.errors; return e2; } - _getSchemaErrorPathString(infoList, base='') { - let result = base; - for (const {path} of infoList) { - const pathArray = Array.isArray(path) ? path : [path]; - for (const pathPart of pathArray) { - if (pathPart === null) { - result = base; - } else { - switch (typeof pathPart) { - case 'string': - if (result.length > 0) { - result += '.'; - } - result += pathPart; - break; - case 'number': - result += `[${pathPart}]`; - break; - } - } - } - } - return result; - } - - _getDataBankSchemaPaths(version) { + _getDataBankSchemas(version) { const termBank = ( version === 1 ? - '/data/schemas/dictionary-term-bank-v1-schema.json' : - '/data/schemas/dictionary-term-bank-v3-schema.json' + 'dictionaryTermBankV1' : + 'dictionaryTermBankV3' ); - const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json'; + const termMetaBank = 'dictionaryTermMetaBankV3'; const kanjiBank = ( version === 1 ? - '/data/schemas/dictionary-kanji-bank-v1-schema.json' : - '/data/schemas/dictionary-kanji-bank-v3-schema.json' + 'dictionaryKanjiBankV1' : + 'dictionaryKanjiBankV3' ); - const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json'; - const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json'; + const kanjiMetaBank = 'dictionaryKanjiMetaBankV3'; + const tagBank = 'dictionaryTagBankV3'; return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank]; } @@ -539,28 +498,20 @@ class DictionaryImporter { return results; } - async _readFileSequence(files, convertEntry, schema, dictionaryTitle) { + async _readFileSequence(ajvSchemas, files, convertEntry, schemaName, dictionaryTitle) { const progressData = this._progressData; - let count = 0; let startIndex = 0; - if (typeof this._onProgress === 'function') { - schema.progressInterval = 1000; - schema.progress = (s) => { - const index = s.getValueStackLength() > 1 ? s.getValueStackItem(1).path : 0; - progressData.index = startIndex + (index / count); - this._progress(); - }; - } const results = []; for (const file of files) { const entries = JSON.parse(await file.async('string')); - count = Array.isArray(entries) ? Math.max(entries.length, 1) : 1; startIndex = progressData.index; this._progress(); - this._validateJsonSchema(entries, schema, file.name); + if (!ajvSchemas[schemaName](entries)) { + throw this._formatAjvSchemaError(ajvSchemas[schemaName], file.name); + } progressData.index = startIndex + 1; this._progress(); diff --git a/ext/lib/ucs2length.js b/ext/lib/ucs2length.js new file mode 100644 index 0000000000..120a64d453 --- /dev/null +++ b/ext/lib/ucs2length.js @@ -0,0 +1,16 @@ +export default function ucs2length(str) { + const len = str.length; + let length = 0; + let pos = 0; + let value; + while (pos < len) { + length++; + value = str.charCodeAt(pos++); + if (value >= 0xd800 && value <= 0xdbff && pos < len) { + // high surrogate, and there is a next character + value = str.charCodeAt(pos); + if ((value & 0xfc00) === 0xdc00) pos++; // low surrogate + } + } + return length; +} diff --git a/package-lock.json b/package-lock.json index 24e49c867b..6f6581b791 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,7 +11,8 @@ "license": "GPL-3.0-or-later", "devDependencies": { "@playwright/test": "^1.39.0", - "ajv": "^8.11.0", + "@types/node": "^20.8.10", + "ajv": "^8.12.0", "browserify": "^17.0.0", "css": "^3.0.0", "eslint": "^8.52.0", @@ -523,6 +524,15 @@ "integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==", "dev": true }, + "node_modules/@types/node": { + "version": "20.8.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.10.tgz", + "integrity": "sha512-TlgT8JntpcbmKUFzjhsyhGfP2fsiz1Mv56im6enJ905xG1DAYesxJaeSbGqQmAw8OWPdhyJGhGSQGKRNJ45u9w==", + "dev": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, "node_modules/@types/normalize-package-data": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz", @@ -5246,6 +5256,12 @@ "undeclared-identifiers": "bin.js" } }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + }, "node_modules/universalify": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz", @@ -5985,6 +6001,15 @@ "integrity": "sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==", "dev": true }, + "@types/node": { + "version": "20.8.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.10.tgz", + "integrity": "sha512-TlgT8JntpcbmKUFzjhsyhGfP2fsiz1Mv56im6enJ905xG1DAYesxJaeSbGqQmAw8OWPdhyJGhGSQGKRNJ45u9w==", + "dev": true, + "requires": { + "undici-types": "~5.26.4" + } + }, "@types/normalize-package-data": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz", @@ -9581,6 +9606,12 @@ "xtend": "^4.0.1" } }, + "undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + }, "universalify": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.2.0.tgz", diff --git a/package.json b/package.json index 22f5bd5646..ee95d38834 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,8 @@ }, "devDependencies": { "@playwright/test": "^1.39.0", - "ajv": "^8.11.0", + "@types/node": "^20.8.10", + "ajv": "^8.12.0", "browserify": "^17.0.0", "css": "^3.0.0", "eslint": "^8.52.0",