From 0c43e10320acada4c62cc96c912f294eff565fd2 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Sat, 20 Jan 2024 17:05:47 +0100 Subject: [PATCH 1/2] fix deinflection bug --- ext/data/deinflect.json | 48 +++++++++++++++++----------------- ext/js/language/deinflector.js | 16 +++++++----- test/deinflector.test.js | 2 +- types/ext/deinflector.d.ts | 2 +- 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/ext/data/deinflect.json b/ext/data/deinflect.json index 49ad4b72f4..884dbcc12c 100644 --- a/ext/data/deinflect.json +++ b/ext/data/deinflect.json @@ -324,28 +324,28 @@ {"kanaIn": "な", "kanaOut": "", "rulesIn": [], "rulesOut": ["v1", "v5", "vk", "vs", "vz"]} ], "masu stem": [ - {"kanaIn": "い", "kanaOut": "いる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "え", "kanaOut": "える", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "き", "kanaOut": "きる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "ぎ", "kanaOut": "ぎる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "け", "kanaOut": "ける", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "げ", "kanaOut": "げる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "じ", "kanaOut": "じる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "せ", "kanaOut": "せる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "ぜ", "kanaOut": "ぜる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "ち", "kanaOut": "ちる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "て", "kanaOut": "てる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "で", "kanaOut": "でる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "に", "kanaOut": "にる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "ね", "kanaOut": "ねる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "ひ", "kanaOut": "ひる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "び", "kanaOut": "びる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "へ", "kanaOut": "へる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "べ", "kanaOut": "べる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "み", "kanaOut": "みる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "め", "kanaOut": "める", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "り", "kanaOut": "りる", "rulesIn": [], "rulesOut": ["v1"]}, - {"kanaIn": "れ", "kanaOut": "れる", "rulesIn": [], "rulesOut": ["v1"]}, + {"kanaIn": "い", "kanaOut": "いる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "え", "kanaOut": "える", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "き", "kanaOut": "きる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "ぎ", "kanaOut": "ぎる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "け", "kanaOut": "ける", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "げ", "kanaOut": "げる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "じ", "kanaOut": "じる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "せ", "kanaOut": "せる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "ぜ", "kanaOut": "ぜる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "ち", "kanaOut": "ちる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "て", "kanaOut": "てる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "で", "kanaOut": "でる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "に", "kanaOut": "にる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "ね", "kanaOut": "ねる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "ひ", "kanaOut": "ひる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "び", "kanaOut": "びる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "へ", "kanaOut": "へる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "べ", "kanaOut": "べる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "み", "kanaOut": "みる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "め", "kanaOut": "める", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "り", "kanaOut": "りる", "rulesIn": [], "rulesOut": ["v1d"]}, + {"kanaIn": "れ", "kanaOut": "れる", "rulesIn": [], "rulesOut": ["v1d"]}, {"kanaIn": "い", "kanaOut": "う", "rulesIn": [], "rulesOut": ["v5"]}, {"kanaIn": "き", "kanaOut": "く", "rulesIn": [], "rulesOut": ["v5"]}, {"kanaIn": "ぎ", "kanaOut": "ぐ", "rulesIn": [], "rulesOut": ["v5"]}, @@ -603,10 +603,10 @@ "progressive or perfect": [ {"kanaIn": "ている", "kanaOut": "て", "rulesIn": ["v1"], "rulesOut": ["iru"]}, {"kanaIn": "ておる", "kanaOut": "て", "rulesIn": ["v5"], "rulesOut": ["iru"]}, - {"kanaIn": "てる", "kanaOut": "て", "rulesIn": ["v1"], "rulesOut": ["iru"]}, + {"kanaIn": "てる", "kanaOut": "て", "rulesIn": ["v1p"], "rulesOut": ["iru"]}, {"kanaIn": "でいる", "kanaOut": "で", "rulesIn": ["v1"], "rulesOut": ["iru"]}, {"kanaIn": "でおる", "kanaOut": "で", "rulesIn": ["v5"], "rulesOut": ["iru"]}, - {"kanaIn": "でる", "kanaOut": "で", "rulesIn": ["v1"], "rulesOut": ["iru"]}, + {"kanaIn": "でる", "kanaOut": "で", "rulesIn": ["v1p"], "rulesOut": ["iru"]}, {"kanaIn": "とる", "kanaOut": "て", "rulesIn": ["v5"], "rulesOut": ["iru"]}, {"kanaIn": "ないでいる", "kanaOut": "ない", "rulesIn": ["v1"], "rulesOut": ["adj-i"]} ], diff --git a/ext/js/language/deinflector.js b/ext/js/language/deinflector.js index 7d75576d23..b52b7f5b98 100644 --- a/ext/js/language/deinflector.js +++ b/ext/js/language/deinflector.js @@ -20,13 +20,15 @@ export class Deinflector { /* eslint-disable no-multi-spaces */ /** @type {Map} @readonly */ static _ruleTypes = new Map([ - ['v1', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b00000001)], // Verb ichidan - ['v5', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b00000010)], // Verb godan - ['vs', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b00000100)], // Verb suru - ['vk', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b00001000)], // Verb kuru - ['vz', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b00010000)], // Verb zuru - ['adj-i', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b00100000)], // Adjective i - ['iru', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b01000000)] // Intermediate -iru endings for progressive or perfect tense + ['v1', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b000000011)], // Verb ichidan + ['v1d', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b000000010)], // Verb ichidan dictionary form + ['v1p', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b000000001)], // Verb ichidan progressive or perfect + ['v5', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b000000100)], // Verb godan + ['vs', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b000001000)], // Verb suru + ['vk', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b000010000)], // Verb kuru + ['vz', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b000100000)], // Verb zuru + ['adj-i', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b001000000)], // Adjective i + ['iru', /** @type {import('translation-internal').DeinflectionRuleFlags} */ (0b010000000)] // Intermediate -iru endings for progressive or perfect tense ]); /* eslint-enable no-multi-spaces */ diff --git a/test/deinflector.test.js b/test/deinflector.test.js index b00625cfa2..e8b1aa4e94 100644 --- a/test/deinflector.test.js +++ b/test/deinflector.test.js @@ -115,7 +115,7 @@ function testDeinflections() { {term: '食べる', source: '食べたり', rule: 'v1', reasons: ['-tari']}, {term: '食べる', source: '食べず', rule: 'v1', reasons: ['-zu']}, {term: '食べる', source: '食べぬ', rule: 'v1', reasons: ['-nu']}, - {term: '食べる', source: '食べ', rule: 'v1', reasons: ['masu stem']}, + {term: '食べる', source: '食べ', rule: 'v1d', reasons: ['masu stem']}, {term: '食べる', source: '食べましょう', rule: 'v1', reasons: ['polite volitional']}, {term: '食べる', source: '食べよう', rule: 'v1', reasons: ['volitional']}, // ['causative passive'] diff --git a/types/ext/deinflector.d.ts b/types/ext/deinflector.d.ts index 4e5f007b5e..5defbf7959 100644 --- a/types/ext/deinflector.d.ts +++ b/types/ext/deinflector.d.ts @@ -17,7 +17,7 @@ import type * as TranslationInternal from './translation-internal'; -export type ReasonTypeRaw = 'v1' | 'v5' | 'vs' | 'vk' | 'vz' | 'adj-i' | 'iru'; +export type ReasonTypeRaw = 'v1' | 'v1d' | 'v1p' | 'v5' | 'vs' | 'vk' | 'vz' | 'adj-i' | 'iru'; export type ReasonsRaw = { [reason: string]: { From 7e8f6f5f982889faf0b24cf8e485df120ec3d8c1 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Sat, 20 Jan 2024 20:59:43 +0100 Subject: [PATCH 2/2] add test --- test/deinflector.test.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/deinflector.test.js b/test/deinflector.test.js index e8b1aa4e94..e90704f10e 100644 --- a/test/deinflector.test.js +++ b/test/deinflector.test.js @@ -31,7 +31,7 @@ const dirname = path.dirname(fileURLToPath(import.meta.url)); * @param {Deinflector} deinflector * @param {string} source * @param {string} expectedTerm - * @param {string} expectedRule + * @param {string|undefined} expectedRule * @param {string[]|undefined} expectedReasons * @returns {{has: false, reasons: null, rules: null}|{has: true, reasons: string[], rules: number}} */ @@ -895,7 +895,9 @@ function testDeinflections() { {term: 'くる', source: 'くさせられない', rule: 'vk'}, {term: 'かわいい', source: 'かわいげ', rule: 'adj-i', reasons: ['-ge']}, - {term: '可愛い', source: 'かわいげ', rule: 'adj-i', reasons: ['-ge']} + {term: '可愛い', source: 'かわいげ', rule: 'adj-i', reasons: ['-ge']}, + + {term: '食べる', source: '食べて', reasons: ['-te', 'progressive or perfect', 'masu stem']} ] }, {