From 502f71c89d101788f2b2618b10e6084847784478 Mon Sep 17 00:00:00 2001 From: Cashew <52880648+cashewnuttynuts@users.noreply.github.com> Date: Fri, 12 Jul 2024 22:10:37 +0700 Subject: [PATCH] Sort dictionary entries by number of text processors applied (#1200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * sort by text processing chain length * write dictionary data * fix wrong variantsMap initialization * write dictionary data * fix logic bug * fix logic * move textprocessing comparison up * add textProcessorRuleChainCandidates to TermDictionaryEntry * remove comment * Update ext/js/language/translator.js Co-authored-by: Stefan Vuković Signed-off-by: Cashew <52880648+cashewnuttynuts@users.noreply.github.com> * remove unused variable * add text replacements to TextProcessorRuleChain * write dictionary data --------- Signed-off-by: Cashew <52880648+cashewnuttynuts@users.noreply.github.com> Co-authored-by: Stefan Vuković --- ext/js/language/translator.js | 125 ++++++-- test/data/translator-test-results.json | 424 +++++++++++++++++++++++++ types/ext/dictionary.d.ts | 6 + types/ext/translation-internal.d.ts | 6 + 4 files changed, 536 insertions(+), 25 deletions(-) diff --git a/ext/js/language/translator.js b/ext/js/language/translator.js index b99374089f..ef9c3b541b 100644 --- a/ext/js/language/translator.js +++ b/ext/js/language/translator.js @@ -242,7 +242,7 @@ export class Translator { /** @type {import('translation-internal').TermDictionaryEntry[]} */ const dictionaryEntries = []; const ids = new Set(); - for (const {databaseEntries, originalText, transformedText, deinflectedText, inflectionRuleChainCandidates} of deinflections) { + for (const {databaseEntries, originalText, transformedText, deinflectedText, textProcessorRuleChainCandidates, inflectionRuleChainCandidates} of deinflections) { if (databaseEntries.length === 0) { continue; } originalTextLength = Math.max(originalTextLength, originalText.length); for (const databaseEntry of databaseEntries) { @@ -259,12 +259,13 @@ export class Translator { continue; } if (transformedText.length > existingTransformedLength) { - dictionaryEntries.splice(existingIndex, 1, this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, inflectionRuleChainCandidates, true, enabledDictionaryMap, tagAggregator)); + dictionaryEntries.splice(existingIndex, 1, this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, textProcessorRuleChainCandidates, inflectionRuleChainCandidates, true, enabledDictionaryMap, tagAggregator)); } else { this._mergeInflectionRuleChains(existingEntry, inflectionRuleChainCandidates); + this._mergeTextProcessorRuleChains(existingEntry, textProcessorRuleChainCandidates); } } else { - const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, inflectionRuleChainCandidates, true, enabledDictionaryMap, tagAggregator); + const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, textProcessorRuleChainCandidates, inflectionRuleChainCandidates, true, enabledDictionaryMap, tagAggregator); dictionaryEntries.push(dictionaryEntry); ids.add(id); } @@ -291,6 +292,23 @@ export class Translator { return null; } + /** + * @param {import('translation-internal').TermDictionaryEntry} existingEntry + * @param {import('translation-internal').TextProcessorRuleChainCandidate[]} textProcessorRuleChainCandidates + */ + _mergeTextProcessorRuleChains(existingEntry, textProcessorRuleChainCandidates) { + const existingChains = existingEntry.textProcessorRuleChainCandidates; + + for (const textProcessorRules of textProcessorRuleChainCandidates) { + const duplicate = existingChains.find((existingChain) => { + return this._areArraysEqualIgnoreOrder(existingChain, textProcessorRules); + }); + if (!duplicate) { + existingEntry.textProcessorRuleChainCandidates.push(textProcessorRules); + } + } + } + /** * @param {import('translation-internal').TermDictionaryEntry} existingEntry * @param {import('translation-internal').InflectionRuleChainCandidate[]} inflectionRuleChainCandidates @@ -348,7 +366,7 @@ export class Translator { let deinflections = ( options.deinflect ? this._getAlgorithmDeinflections(text, options) : - [this._createDeinflection(text, text, text, 0, [])] + [this._createDeinflection(text, text, text, 0, [], [])] ); if (deinflections.length === 0) { return []; } @@ -381,7 +399,7 @@ export class Translator { /** @type {import('translation-internal').DatabaseDeinflection[]} */ const dictionaryDeinflections = []; for (const deinflection of deinflections) { - const {originalText, transformedText, inflectionRuleChainCandidates: algorithmChains, databaseEntries} = deinflection; + const {originalText, transformedText, textProcessorRuleChainCandidates, inflectionRuleChainCandidates: algorithmChains, databaseEntries} = deinflection; for (const entry of databaseEntries) { const {dictionary, definitions} = entry; const entryDictionary = enabledDictionaryMap.get(dictionary); @@ -399,7 +417,7 @@ export class Translator { }; }); - const dictionaryDeinflection = this._createDeinflection(originalText, transformedText, formOf, 0, inflectionRuleChainCandidates); + const dictionaryDeinflection = this._createDeinflection(originalText, transformedText, formOf, 0, textProcessorRuleChainCandidates, inflectionRuleChainCandidates); dictionaryDeinflections.push(dictionaryDeinflection); } } @@ -491,17 +509,24 @@ export class Translator { ) { const preprocessedTextVariants = this._getTextVariants(rawSource, textPreprocessors, this._getTextReplacementsVariants(options), sourceCache); - for (const source of preprocessedTextVariants) { + for (const [source, preprocessorRuleChainCandidates] of preprocessedTextVariants) { for (const deinflection of this._multiLanguageTransformer.transform(language, source)) { const {trace, conditions} = deinflection; const postprocessedTextVariants = this._getTextVariants(deinflection.text, textPostprocessors, [null], sourceCache); - for (const transformedText of postprocessedTextVariants) { + for (const [transformedText, postprocessorRuleChainCandidates] of postprocessedTextVariants) { /** @type {import('translation-internal').InflectionRuleChainCandidate} */ const inflectionRuleChainCandidate = { source: 'algorithm', inflectionRules: trace.map((frame) => frame.transform), }; - deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, [inflectionRuleChainCandidate])); + + // Every combination of preprocessor rule candidates and postprocessor rule candidates + const textProcessorRuleChainCandidates = preprocessorRuleChainCandidates.flatMap( + (preprocessorRuleChainCandidate) => postprocessorRuleChainCandidates.map( + (postprocessorRuleChainCandidate) => [...preprocessorRuleChainCandidate, ...postprocessorRuleChainCandidate], + ), + ); + deinflections.push(this._createDeinflection(rawSource, source, transformedText, conditions, textProcessorRuleChainCandidates, [inflectionRuleChainCandidate])); } } } @@ -514,26 +539,43 @@ export class Translator { * @param {import('language').TextProcessorWithId[]} textProcessors * @param {(import('translation').FindTermsTextReplacement[] | null)[]} textReplacements * @param {import('translation-internal').TextCache} textCache - * @returns {Set} + * @returns {import('translation-internal').VariantAndTextProcessorRuleChainCandidatesMap} */ _getTextVariants(text, textProcessors, textReplacements, textCache) { - let variants = new Set([text]); - for (const textReplacement of textReplacements) { + /** @type {import('translation-internal').VariantAndTextProcessorRuleChainCandidatesMap} */ + let variantsMap = new Map([ + [text, [[]]], + ]); + + for (const [id, textReplacement] of textReplacements.entries()) { if (textReplacement === null) { continue; } - variants.add(this._applyTextReplacements(text, textReplacement)); + variantsMap.set(this._applyTextReplacements(text, textReplacement), [['Text Replacement' + ' ' + id]]); } for (const {id, textProcessor: {process, options}} of textProcessors) { - /** @type {Set} */ - const newVariants = new Set(); - for (const variant of variants) { + /** @type {import('translation-internal').VariantAndTextProcessorRuleChainCandidatesMap} */ + const newVariantsMap = new Map(); + for (const [variant, currentPreprocessorRuleChainCandidates] of variantsMap) { for (const option of options) { const processed = this._getProcessedText(textCache, variant, id, option, process); - newVariants.add(processed); + const existingCandidates = newVariantsMap.get(processed); + + // Ignore if applying the textProcessor doesn't change the source + if (processed === variant) { + if (typeof existingCandidates === 'undefined') { + newVariantsMap.set(processed, currentPreprocessorRuleChainCandidates); + } else { + newVariantsMap.set(processed, existingCandidates); + } + } else if (typeof existingCandidates === 'undefined') { + newVariantsMap.set(processed, currentPreprocessorRuleChainCandidates.map((candidate) => [...candidate, id])); + } else { + newVariantsMap.set(processed, [...existingCandidates, ...currentPreprocessorRuleChainCandidates.map((candidate) => [...candidate, id])]); + } } } - variants = newVariants; + variantsMap = newVariantsMap; } - return variants; + return variantsMap; } /** @@ -618,11 +660,12 @@ export class Translator { * @param {string} transformedText * @param {string} deinflectedText * @param {number} conditions + * @param {import('translation-internal').TextProcessorRuleChainCandidate[]} textProcessorRuleChainCandidates * @param {import('translation-internal').InflectionRuleChainCandidate[]} inflectionRuleChainCandidates * @returns {import('translation-internal').DatabaseDeinflection} */ - _createDeinflection(originalText, transformedText, deinflectedText, conditions, inflectionRuleChainCandidates) { - return {originalText, transformedText, deinflectedText, conditions, inflectionRuleChainCandidates, databaseEntries: []}; + _createDeinflection(originalText, transformedText, deinflectedText, conditions, textProcessorRuleChainCandidates, inflectionRuleChainCandidates) { + return {originalText, transformedText, deinflectedText, conditions, textProcessorRuleChainCandidates, inflectionRuleChainCandidates, databaseEntries: []}; } // Term dictionary entry grouping @@ -697,7 +740,7 @@ export class Translator { if (ids.has(id)) { continue; } const {term} = databaseEntry; - const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, term, term, term, [], false, enabledDictionaryMap, tagAggregator); + const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, term, term, term, [], [], false, enabledDictionaryMap, tagAggregator); dictionaryEntries.push(dictionaryEntry); ids.add(id); ungroupedDictionaryEntriesMap.delete(id); @@ -770,7 +813,7 @@ export class Translator { for (const {ids, dictionaryEntries} of target.groups) { if (ids.has(id)) { continue; } - const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, sourceText, sourceText, sourceText, [], false, enabledDictionaryMap, tagAggregator); + const dictionaryEntry = this._createTermDictionaryEntryFromDatabaseEntry(databaseEntry, sourceText, sourceText, sourceText, [], [], false, enabledDictionaryMap, tagAggregator); dictionaryEntries.push(dictionaryEntry); ids.add(id); ungroupedDictionaryEntriesMap.delete(id); @@ -1592,6 +1635,7 @@ export class Translator { /** * @param {boolean} isPrimary + * @param {import('translation-internal').TextProcessorRuleChainCandidate[]} textProcessorRuleChainCandidates * @param {import('translation-internal').InflectionRuleChainCandidate[]} inflectionRuleChainCandidates * @param {number} score * @param {number} dictionaryIndex @@ -1602,10 +1646,11 @@ export class Translator { * @param {import('dictionary').TermDefinition[]} definitions * @returns {import('translation-internal').TermDictionaryEntry} */ - _createTermDictionaryEntry(isPrimary, inflectionRuleChainCandidates, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxOriginalTextLength, headwords, definitions) { + _createTermDictionaryEntry(isPrimary, textProcessorRuleChainCandidates, inflectionRuleChainCandidates, score, dictionaryIndex, dictionaryPriority, sourceTermExactMatchCount, maxOriginalTextLength, headwords, definitions) { return { type: 'term', isPrimary, + textProcessorRuleChainCandidates, inflectionRuleChainCandidates, score, frequencyOrder: 0, @@ -1625,13 +1670,14 @@ export class Translator { * @param {string} originalText * @param {string} transformedText * @param {string} deinflectedText + * @param {import('translation-internal').TextProcessorRuleChainCandidate[]} textProcessorRuleChainCandidates * @param {import('translation-internal').InflectionRuleChainCandidate[]} inflectionRuleChainCandidates * @param {boolean} isPrimary * @param {Map} enabledDictionaryMap * @param {TranslatorTagAggregator} tagAggregator * @returns {import('translation-internal').TermDictionaryEntry} */ - _createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, inflectionRuleChainCandidates, isPrimary, enabledDictionaryMap, tagAggregator) { + _createTermDictionaryEntryFromDatabaseEntry(databaseEntry, originalText, transformedText, deinflectedText, textProcessorRuleChainCandidates, inflectionRuleChainCandidates, isPrimary, enabledDictionaryMap, tagAggregator) { const { matchType, matchSource, @@ -1665,6 +1711,7 @@ export class Translator { return this._createTermDictionaryEntry( isPrimary, + textProcessorRuleChainCandidates, inflectionRuleChainCandidates, score, dictionaryIndex, @@ -1708,20 +1755,30 @@ export class Translator { const definitions = []; /** @type {?Map} */ const definitionsMap = checkDuplicateDefinitions ? new Map() : null; + let inflections = null; + let textProcesses = null; for (const {dictionaryEntry, headwordIndexMap} of definitionEntries) { score = Math.max(score, dictionaryEntry.score); dictionaryIndex = Math.min(dictionaryIndex, dictionaryEntry.dictionaryIndex); dictionaryPriority = Math.max(dictionaryPriority, dictionaryEntry.dictionaryPriority); + if (dictionaryEntry.isPrimary) { isPrimary = true; maxOriginalTextLength = Math.max(maxOriginalTextLength, dictionaryEntry.maxOriginalTextLength); + const dictionaryEntryInflections = dictionaryEntry.inflectionRuleChainCandidates; + const dictionaryEntryTextProcesses = dictionaryEntry.textProcessorRuleChainCandidates; + if (inflections === null || dictionaryEntryInflections.length < inflections.length) { inflections = dictionaryEntryInflections; } + if (textProcesses === null || dictionaryEntryTextProcesses.length < textProcesses.length) { + textProcesses = dictionaryEntryTextProcesses; + } } + if (definitionsMap !== null) { this._addTermDefinitions(definitions, definitionsMap, dictionaryEntry.definitions, headwordIndexMap, tagAggregator); } else { @@ -1743,6 +1800,7 @@ export class Translator { return this._createTermDictionaryEntry( isPrimary, + textProcesses !== null ? textProcesses : [], inflections !== null ? inflections : [], score, dictionaryIndex, @@ -1931,6 +1989,10 @@ export class Translator { let i = v2.maxOriginalTextLength - v1.maxOriginalTextLength; if (i !== 0) { return i; } + // Sort by length of the shortest text processing chain + i = this._getShortestTextProcessingChainLength(v1.textProcessorRuleChainCandidates) - this._getShortestTextProcessingChainLength(v2.textProcessorRuleChainCandidates); + if (i !== 0) { return i; } + // Sort by length of the shortest inflection chain i = this._getShortestInflectionChainLength(v1.inflectionRuleChainCandidates) - this._getShortestInflectionChainLength(v2.inflectionRuleChainCandidates); if (i !== 0) { return i; } @@ -2130,6 +2192,19 @@ export class Translator { } } + /** + * @param {import('translation-internal').TextProcessorRuleChainCandidate[]} inflectionRuleChainCandidates + * @returns {number} + */ + _getShortestTextProcessingChainLength(inflectionRuleChainCandidates) { + if (inflectionRuleChainCandidates.length === 0) { return 0; } + let length = Number.MAX_SAFE_INTEGER; + for (const candidate of inflectionRuleChainCandidates) { + length = Math.min(length, candidate.length); + } + return length; + } + /** * @param {import('translation-internal').InflectionRuleChainCandidate[]} inflectionRuleChainCandidates * @returns {number} diff --git a/test/data/translator-test-results.json b/test/data/translator-test-results.json index d93a8a66c8..68a74bb8f4 100644 --- a/test/data/translator-test-results.json +++ b/test/data/translator-test-results.json @@ -291,6 +291,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -459,6 +462,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -646,6 +652,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -816,6 +825,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -986,6 +998,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -1156,6 +1171,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -1326,6 +1344,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -1494,6 +1515,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -1681,6 +1705,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -1875,6 +1902,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -2069,6 +2099,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -2263,6 +2296,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -2457,6 +2493,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -2632,6 +2671,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -2807,6 +2849,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -2982,6 +3027,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -3157,6 +3205,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -3325,6 +3376,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -3512,6 +3566,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -3630,6 +3687,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -3804,6 +3864,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -3985,6 +4048,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "convertHiraganaToKatakana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -4159,6 +4227,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -4329,6 +4400,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -4505,6 +4579,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -4675,6 +4752,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -4851,6 +4931,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -5045,6 +5128,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -5239,6 +5325,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -5414,6 +5503,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -5595,6 +5687,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -5789,6 +5884,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -5983,6 +6081,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6158,6 +6259,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6339,6 +6443,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6467,6 +6574,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6529,6 +6639,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6591,6 +6704,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6653,6 +6769,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6715,6 +6834,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6782,6 +6904,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6849,6 +6974,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6916,6 +7044,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -6983,6 +7114,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -7045,6 +7179,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -7113,6 +7250,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -7355,6 +7495,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -7597,6 +7740,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -7820,6 +7966,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -8043,6 +8192,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -8211,6 +8363,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -8398,6 +8553,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -8858,6 +9016,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -9276,6 +9437,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -9444,6 +9608,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -9631,6 +9798,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -9846,6 +10016,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -10061,6 +10234,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -10276,6 +10452,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -10491,6 +10670,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -10666,6 +10848,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -10841,6 +11026,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -11016,6 +11204,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -11191,6 +11382,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -11359,6 +11553,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -11546,6 +11743,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -11740,6 +11942,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -11934,6 +12141,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -12128,6 +12340,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -12322,6 +12539,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -12497,6 +12719,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -12672,6 +12899,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -12847,6 +13079,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -13022,6 +13259,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -13190,6 +13432,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -13377,6 +13624,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -13571,6 +13823,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -13765,6 +14022,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -13959,6 +14221,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -14153,6 +14420,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -14328,6 +14600,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -14503,6 +14780,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -14678,6 +14960,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -14853,6 +15140,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -15021,6 +15313,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -15208,6 +15505,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -15321,6 +15623,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -15429,6 +15736,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "Text Replacement 1" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -15546,6 +15858,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -16006,6 +16321,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -16430,6 +16748,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -16584,6 +16905,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -16684,6 +17008,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -16784,6 +17111,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -16884,6 +17214,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -16984,6 +17317,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17101,6 +17437,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17217,6 +17556,9 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17322,6 +17664,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "alphanumericWidthVariants" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17387,6 +17734,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "alphanumericWidthVariants" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17468,6 +17820,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "alphanumericWidthVariants" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17549,6 +17906,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "alphabeticToHiragana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17719,6 +18081,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "alphabeticToHiragana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -17895,6 +18262,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "convertHiraganaToKatakana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18065,6 +18437,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "convertHiraganaToKatakana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18241,6 +18618,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "convertHiraganaToKatakana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18350,6 +18732,12 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "convertHalfWidthCharacters", + "convertHiraganaToKatakana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18520,6 +18908,12 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "convertHalfWidthCharacters", + "convertHiraganaToKatakana" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18696,6 +19090,15 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "collapseEmphaticSequences" + ], + [ + "convertHiraganaToKatakana", + "collapseEmphaticSequences" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18788,6 +19191,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "capitalizeFirstLetter" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18869,6 +19277,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "decapitalize" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -18950,6 +19363,12 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "disassembleHangul", + "reassembleHangul" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", @@ -19033,6 +19452,11 @@ { "type": "term", "isPrimary": true, + "textProcessorRuleChainCandidates": [ + [ + "capitalizeFirstLetter" + ] + ], "inflectionRuleChainCandidates": [ { "source": "algorithm", diff --git a/types/ext/dictionary.d.ts b/types/ext/dictionary.d.ts index 060e6907b5..34a22cc215 100644 --- a/types/ext/dictionary.d.ts +++ b/types/ext/dictionary.d.ts @@ -207,6 +207,10 @@ export type TermDictionaryEntry = { * original search text, while non-primary sources originate from related terms. */ isPrimary: boolean; + /** + * Ways that a looked-up word might be transformed into this term. + */ + textProcessorRuleChainCandidates: textProcessorRuleChainCandidate[]; /** * Ways that a looked-up word might be an inflected form of this term. */ @@ -258,6 +262,8 @@ export type InflectionRuleChainCandidate = { inflectionRules: InflectionRuleChain; }; +type textProcessorRuleChainCandidate = string[]; + export type InflectionRuleChain = InflectionRule[]; export type InflectionRule = { diff --git a/types/ext/translation-internal.d.ts b/types/ext/translation-internal.d.ts index 05821a6426..6cfa97d24e 100644 --- a/types/ext/translation-internal.d.ts +++ b/types/ext/translation-internal.d.ts @@ -40,8 +40,13 @@ export type TextDeinflectionOptionsArrays = [ emphatic: [collapseEmphatic: boolean, collapseEmphaticFull: boolean][], ]; +export type TextProcessorRuleChainCandidate = string[]; + +export type VariantAndTextProcessorRuleChainCandidatesMap = Map; + export type TermDictionaryEntry = Omit & { inflectionRuleChainCandidates: InflectionRuleChainCandidate[]; + textProcessorRuleChainCandidates: TextProcessorRuleChainCandidate[]; }; export type InflectionRuleChainCandidate = { @@ -54,6 +59,7 @@ export type DatabaseDeinflection = { transformedText: string; deinflectedText: string; conditions: number; + textProcessorRuleChainCandidates: TextProcessorRuleChainCandidate[]; inflectionRuleChainCandidates: InflectionRuleChainCandidate[]; databaseEntries: DictionaryDatabase.TermEntry[]; };