From 3a8ee001df7a55664f664a15b8fb7611de5e6a1b Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 19 Jun 2024 09:50:04 +0200 Subject: [PATCH 1/2] [th] add test case --- data/test/dict/th/en/tag_bank_1.json | 9 +++++++++ data/test/dict/th/en/term_bank_1.json | 14 ++++++++++++++ data/test/ipa/th/en/tag_bank_1.json | 1 + data/test/kaikki/th-en.json | 1 + data/test/tidy/th-en-forms-0.json | 4 ++++ data/test/tidy/th-en-lemmas.json | 19 +++++++++++++++++++ 6 files changed, 48 insertions(+) create mode 100644 data/test/dict/th/en/tag_bank_1.json create mode 100644 data/test/dict/th/en/term_bank_1.json create mode 100644 data/test/ipa/th/en/tag_bank_1.json create mode 100644 data/test/kaikki/th-en.json create mode 100644 data/test/tidy/th-en-forms-0.json create mode 100644 data/test/tidy/th-en-lemmas.json diff --git a/data/test/dict/th/en/tag_bank_1.json b/data/test/dict/th/en/tag_bank_1.json new file mode 100644 index 0000000..3f9ba19 --- /dev/null +++ b/data/test/dict/th/en/tag_bank_1.json @@ -0,0 +1,9 @@ +[ + [ + "char", + "", + 0, + "character", + 0 + ] +] \ No newline at end of file diff --git a/data/test/dict/th/en/term_bank_1.json b/data/test/dict/th/en/term_bank_1.json new file mode 100644 index 0000000..5549a3e --- /dev/null +++ b/data/test/dict/th/en/term_bank_1.json @@ -0,0 +1,14 @@ +[ + [ + "็", + "", + "char", + "character", + 0, + [ + "It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\n" + ], + 0, + "" + ] +] \ No newline at end of file diff --git a/data/test/ipa/th/en/tag_bank_1.json b/data/test/ipa/th/en/tag_bank_1.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/data/test/ipa/th/en/tag_bank_1.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/data/test/kaikki/th-en.json b/data/test/kaikki/th-en.json new file mode 100644 index 0000000..dfc5d0f --- /dev/null +++ b/data/test/kaikki/th-en.json @@ -0,0 +1 @@ +{"pos": "character", "head_templates": [{"name": "head", "args": {"1": "th", "2": "letter", "sc": "Thai"}, "expansion": "็"}], "word": "็", "lang": "Thai", "lang_code": "th", "senses": [{"raw_glosses": ["It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\n"], "glosses": ["It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\nFor example:\nเป็น is the shortened form of เปะน.", "It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X."], "tags": ["letter"], "id": "en-็-th-character-84RE3RJR"}, {"raw_glosses": ["It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\n"], "glosses": ["It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\nFor example:\nเป็น is the shortened form of เปะน.", "For example"], "tags": ["letter"], "id": "en-็-th-character-JsJLj2js", "categories": [{"name": "Thai entries with incorrect language header", "kind": "other", "parents": ["Entries with incorrect language header", "Entry maintenance"], "source": "w+disamb", "_dis": "29 42 29"}, {"name": "Thai terms with redundant script codes", "kind": "other", "parents": ["Terms with redundant script codes", "Entry maintenance"], "source": "w+disamb", "_dis": "30 40 30"}]}, {"raw_glosses": ["It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\n"], "glosses": ["It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\nFor example:\nเป็น is the shortened form of เปะน.", "เป็น is the shortened form of เปะน."], "tags": ["letter"], "id": "en-็-th-character-ePGdQcyW"}]} diff --git a/data/test/tidy/th-en-forms-0.json b/data/test/tidy/th-en-forms-0.json new file mode 100644 index 0000000..f4ab01d --- /dev/null +++ b/data/test/tidy/th-en-forms-0.json @@ -0,0 +1,4 @@ +{ + "_type": "map", + "map": [] +} \ No newline at end of file diff --git a/data/test/tidy/th-en-lemmas.json b/data/test/tidy/th-en-lemmas.json new file mode 100644 index 0000000..8d1eea4 --- /dev/null +++ b/data/test/tidy/th-en-lemmas.json @@ -0,0 +1,19 @@ +{ + "็": { + "็": { + "character": { + "ipa": [], + "senses": [ + { + "glosses": [ + "It is a Thai symbol that is called Mai Taikhu (ไม้ไต่คู้). It is used to shorten the written form of the vowel เ-ะ to เ-็. The word that has the form initial consonant + เ ะ + end consonant will be shortened to เ-็X.\n" + ], + "tags": [ + "letter" + ] + } + ] + } + } + } +} \ No newline at end of file From 45bcc9d5fd63c16e2888bb5822efa36707b7bc74 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 19 Jun 2024 09:53:02 +0200 Subject: [PATCH 2/2] fix test bug when no ipa --- 4-make-yomitan.test.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/4-make-yomitan.test.js b/4-make-yomitan.test.js index dd42199..b561c99 100644 --- a/4-make-yomitan.test.js +++ b/4-make-yomitan.test.js @@ -38,12 +38,14 @@ for (const {iso: sourceIso} of languages){ const testTermTags = JSON.parse(readFileSync(`data/test/temp/dict/tag_bank_1.json`, 'utf8')); const testTerms = JSON.parse(readFileSync(`data/test/temp/dict/term_bank_1.json`, 'utf8')); const testIpaTags = JSON.parse(readFileSync(`data/test/temp/ipa/tag_bank_1.json`, 'utf8')); - const testIpa = JSON.parse(readFileSync(`data/test/temp/ipa/term_meta_bank_1.json`, 'utf8')); + const testIpaFile = `data/test/temp/ipa/term_meta_bank_1.json`; + const testIpa = existsSync(testIpaFile) ? JSON.parse(readFileSync(testIpaFile, 'utf8')) : null; const validTermTags = JSON.parse(readFileSync(`data/test/dict/${sourceIso}/${targetIso}/tag_bank_1.json`, 'utf8')); const validTerms = JSON.parse(readFileSync(`data/test/dict/${sourceIso}/${targetIso}/term_bank_1.json`, 'utf8')); const validIpaTags = JSON.parse(readFileSync(`data/test/ipa/${sourceIso}/${targetIso}/tag_bank_1.json`, 'utf8')); - const validIpa = JSON.parse(readFileSync(`data/test/ipa/${sourceIso}/${targetIso}/term_meta_bank_1.json`, 'utf8')); + const validIpaFile = `data/test/ipa/${sourceIso}/${targetIso}/term_meta_bank_1.json`; + const validIpa = existsSync(validIpaFile) ? JSON.parse(readFileSync(validIpaFile, 'utf8')) : null; describe(`Converting tidy ${sourceIso}-${targetIso} to yomitan format`, () => { test('should have valid term tags', () => {