From 47a7a4860bc4d6ed0c14d75e7e7e2fa4a9b60822 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 9 Oct 2024 18:08:12 +0200 Subject: [PATCH 01/13] baseline --- data/test/dict/sq/en/tag_bank_1.json | 7 + data/test/dict/sq/en/term_bank_1.json | 169 +++++++++++++++++ data/test/dict/sq/en/term_bank_2.json | 197 +++++++++++++++++++ data/test/ipa/sq/en/tag_bank_1.json | 10 +- data/test/ipa/sq/en/term_meta_bank_1.json | 46 +++++ data/test/kaikki/sq-en.json | 3 +- data/test/tidy/sq-en-forms-0.json | 126 +++++++++++++ data/test/tidy/sq-en-lemmas.json | 219 ++++++++++++++++++++++ 8 files changed, 775 insertions(+), 2 deletions(-) diff --git a/data/test/dict/sq/en/tag_bank_1.json b/data/test/dict/sq/en/tag_bank_1.json index 0143cd3..895bfd1 100644 --- a/data/test/dict/sq/en/tag_bank_1.json +++ b/data/test/dict/sq/en/tag_bank_1.json @@ -12,5 +12,12 @@ -1, "masculine", 1 + ], + [ + "fem", + "", + -1, + "feminine", + 1 ] ] \ No newline at end of file diff --git a/data/test/dict/sq/en/term_bank_1.json b/data/test/dict/sq/en/term_bank_1.json index d9ba885..4672f1e 100644 --- a/data/test/dict/sq/en/term_bank_1.json +++ b/data/test/dict/sq/en/term_bank_1.json @@ -10,5 +10,174 @@ ], 0, "" + ], + [ + "gjuhë", + "", + "n fem", + "n", + 0, + [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": "tongue (organ)" + }, + { + "tag": "div", + "data": { + "listType": "ol" + }, + "style": { + "marginLeft": 2 + }, + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "1. " + }, + "(figurative) speech, talking" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "2. " + }, + "strip of land" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "3. " + }, + "bell clapper, clanger, tongue" + ] + } + ] + } + ] + }, + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": "language, tongue" + }, + { + "tag": "div", + "data": { + "listType": "ol" + }, + "style": { + "marginLeft": 2 + }, + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "1. " + }, + "register, speech, style" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "2. " + }, + "language (generally, any form of communication)" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "3. " + }, + "(colloquial) local dialect" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "4. " + }, + "(colloquial) Albanian, as a subject in school" + ] + } + ] + } + ] + } + ], + 0, + "" ] ] \ No newline at end of file diff --git a/data/test/dict/sq/en/term_bank_2.json b/data/test/dict/sq/en/term_bank_2.json index 7f01320..f9af542 100644 --- a/data/test/dict/sq/en/term_bank_2.json +++ b/data/test/dict/sq/en/term_bank_2.json @@ -34,5 +34,202 @@ ], 0, "" + ], + [ + "gjuhëra/gjuhëna", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "plural", + "dialectal" + ] + ] + ], + 0, + "" + ], + [ + "gjuha", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "definite" + ] + ], + [ + "gjuhë", + [ + "nominative", + "singular", + "definite" + ] + ] + ], + 0, + "" + ], + [ + "gjuhët", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "accusative", + "plural", + "definite" + ] + ], + [ + "gjuhë", + [ + "nominative", + "plural", + "definite" + ] + ] + ], + 0, + "" + ], + [ + "gjuhën", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "accusative", + "singular", + "definite" + ] + ] + ], + 0, + "" + ], + [ + "gjuhe", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "singular", + "indefinite", + "ablative" + ] + ], + [ + "gjuhë", + [ + "dative", + "singular", + "indefinite" + ] + ] + ], + 0, + "" + ], + [ + "gjuhës", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "singular", + "definite", + "ablative" + ] + ], + [ + "gjuhë", + [ + "dative", + "singular", + "definite" + ] + ] + ], + 0, + "" + ], + [ + "gjuhëve", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "plural", + "definite", + "ablative" + ] + ], + [ + "gjuhë", + [ + "dative", + "plural", + "definite" + ] + ], + [ + "gjuhë", + [ + "dative", + "plural", + "indefinite" + ] + ] + ], + 0, + "" + ], + [ + "gjuhësh", + "", + "non-lemma", + "", + 0, + [ + [ + "gjuhë", + [ + "plural", + "indefinite", + "ablative" + ] + ] + ], + 0, + "" ] ] \ No newline at end of file diff --git a/data/test/ipa/sq/en/tag_bank_1.json b/data/test/ipa/sq/en/tag_bank_1.json index 0637a08..0dd99bc 100644 --- a/data/test/ipa/sq/en/tag_bank_1.json +++ b/data/test/ipa/sq/en/tag_bank_1.json @@ -1 +1,9 @@ -[] \ No newline at end of file +[ + [ + "Gheg", + "dialect", + 0, + "Gheg", + 0 + ] +] \ No newline at end of file diff --git a/data/test/ipa/sq/en/term_meta_bank_1.json b/data/test/ipa/sq/en/term_meta_bank_1.json index 27aa487..32e1f48 100644 --- a/data/test/ipa/sq/en/term_meta_bank_1.json +++ b/data/test/ipa/sq/en/term_meta_bank_1.json @@ -11,5 +11,51 @@ } ] } + ], + [ + "gjuhë", + "ipa", + { + "reading": "gjuhë", + "transcriptions": [ + { + "ipa": "/ˈɟuhə/", + "tags": [] + }, + { + "ipa": "[ˈɡjuː(h)]", + "tags": [ + "Gheg", + "Northern" + ] + }, + { + "ipa": "[ˈɡuː(h)]", + "tags": [ + "Gheg", + "Northern" + ] + }, + { + "ipa": "[ˈɡũː]", + "tags": [ + "Kosovo" + ] + }, + { + "ipa": "[ˈɡʎuhə]", + "tags": [ + "Arbëresh", + "Arvanitika" + ] + }, + { + "ipa": "[ˈɡʎuɣə]", + "tags": [ + "Calabria" + ] + } + ] + } ] ] \ No newline at end of file diff --git a/data/test/kaikki/sq-en.json b/data/test/kaikki/sq-en.json index b6169d9..1c2e7cd 100644 --- a/data/test/kaikki/sq-en.json +++ b/data/test/kaikki/sq-en.json @@ -1,2 +1,3 @@ {"pos": "noun", "head_templates": [{"name": "head", "args": {"1": "sq", "2": "noun", "head": "", "sort": "", "g": "m", "cat2": "masculine nouns"}, "expansion": "akull m"}, {"name": "sq-noun", "args": {"1": "m", "2": "akuj"}, "expansion": "akull m (plural akuj)"}], "forms": [{"form": "akuj", "tags": ["plural"]}], "etymology_number": 1, "wikipedia": ["Vladimir Orel"], "etymology_text": "Uncertain. Possibly:\n# A derivation from Proto-Indo-European *keHl- whence also Proto-Celtic *kaletos (“hard”), Proto-Slavic *kaliti (“to temper, harden”), Latin callum (“hardened skin”).\n# Borrowed from Germanic, ultimately from Proto-Germanic *jekulaz (“icicle”).\n# Akin Old Armenian ոյծ (oyc, “cold, frost”), suffixed with -ull, though the two terms are phonologically incompatible.", "etymology_templates": [{"name": "unc", "args": {"1": "sq"}, "expansion": "Uncertain"}, {"name": "der", "args": {"1": "sq", "2": "ine-pro", "3": "", "4": "*keHl-"}, "expansion": "Proto-Indo-European *keHl-"}, {"name": "cog", "args": {"1": "cel-pro", "2": "*kaletos", "t": "hard"}, "expansion": "Proto-Celtic *kaletos (“hard”)"}, {"name": "cog", "args": {"1": "sla-pro", "2": "*kaliti", "t": "to temper, harden"}, "expansion": "Proto-Slavic *kaliti (“to temper, harden”)"}, {"name": "cog", "args": {"1": "la", "2": "callum", "t": "hardened skin"}, "expansion": "Latin callum (“hardened skin”)"}, {"name": "glossary", "args": {"1": "loanword", "2": "Borrowed"}, "expansion": "Borrowed"}, {"name": "bor", "args": {"1": "sq", "2": "gem", "3": "", "4": "", "5": "", "lit": "", "pos": "", "tr": "", "ts": "", "id": "", "sc": "", "g": "", "g2": "", "g3": "", "nocat": "", "sort": ""}, "expansion": "Germanic"}, {"name": "bor+", "args": {"1": "sq", "2": "gem"}, "expansion": "Borrowed from Germanic"}, {"name": "der", "args": {"1": "sq", "2": "gem-pro", "3": "*jekulaz", "t": "icicle"}, "expansion": "Proto-Germanic *jekulaz (“icicle”)"}, {"name": "cog", "args": {"1": "xcl", "2": "ոյծ", "t": "cold, frost"}, "expansion": "Old Armenian ոյծ (oyc, “cold, frost”)"}, {"name": "af", "args": {"1": "sq", "2": "-ull"}, "expansion": "-ull"}], "sounds": [{"ipa": "/ˈakuɫ/"}, {"rhymes": "-akuɫ"}], "word": "akull", "lang": "Albanian", "lang_code": "sq", "senses": [{"links": [["ice", "ice"]], "glosses": ["ice"], "tags": ["masculine"], "id": "akull-sq-noun-TLCyUMYl"}]} -{"pos": "verb", "head_templates": [{"name": "head", "args": {"1": "sq", "2": "verb form"}, "expansion": "ik"}], "word": "ik", "lang": "Albanian", "lang_code": "sq", "senses": [{"links": [["iki", "iki#Albanian"]], "glosses": ["second-person singular imperative of iki"], "tags": ["form-of", "imperative", "second-person", "singular"], "form_of": [{"word": "iki"}], "id": "ik-sq-verb-OAavRVQB", "categories": []}]} \ No newline at end of file +{"pos": "verb", "head_templates": [{"name": "head", "args": {"1": "sq", "2": "verb form"}, "expansion": "ik"}], "word": "ik", "lang": "Albanian", "lang_code": "sq", "senses": [{"links": [["iki", "iki#Albanian"]], "glosses": ["second-person singular imperative of iki"], "tags": ["form-of", "imperative", "second-person", "singular"], "form_of": [{"word": "iki"}], "id": "ik-sq-verb-OAavRVQB", "categories": []}]} +{"pos": "noun", "forms": [{"form": "gjuhë", "tags": ["plural"]}, {"form": "gjuhëra/gjuhëna", "tags": ["dialectal", "plural"]}, {"form": "gjuha", "tags": ["definite"]}, {"form": "no-table-tags", "source": "declension", "tags": ["table-tags"]}, {"form": "sq-noun-f", "source": "declension", "tags": ["inflection-template"]}, {"form": "gjuhë", "tags": ["indefinite", "nominative", "singular"], "source": "declension"}, {"form": "gjuha", "tags": ["definite", "nominative", "singular"], "source": "declension"}, {"form": "gjuhë", "tags": ["indefinite", "nominative", "plural"], "source": "declension"}, {"form": "gjuhët", "tags": ["definite", "nominative", "plural"], "source": "declension"}, {"form": "gjuhë", "tags": ["accusative", "indefinite", "singular"], "source": "declension"}, {"form": "gjuhën", "tags": ["accusative", "definite", "singular"], "source": "declension"}, {"form": "gjuhë", "tags": ["accusative", "indefinite", "plural"], "source": "declension"}, {"form": "gjuhët", "tags": ["accusative", "definite", "plural"], "source": "declension"}, {"form": "gjuhe", "tags": ["dative", "indefinite", "singular"], "source": "declension"}, {"form": "gjuhës", "tags": ["dative", "definite", "singular"], "source": "declension"}, {"form": "gjuhëve", "tags": ["dative", "indefinite", "plural"], "source": "declension"}, {"form": "gjuhëve", "tags": ["dative", "definite", "plural"], "source": "declension"}, {"form": "gjuhe", "tags": ["ablative", "indefinite", "singular"], "source": "declension"}, {"form": "gjuhës", "tags": ["ablative", "definite", "singular"], "source": "declension"}, {"form": "gjuhësh", "tags": ["ablative", "indefinite", "plural"], "source": "declension"}, {"form": "gjuhëve", "tags": ["ablative", "definite", "plural"], "source": "declension"}], "inflection_templates": [{"name": "sq-decl-noun", "args": {"1": "gjuhë", "2": "gjuhë", "3": "gjuha", "4": "gjuhët", "5": "gjuhë", "6": "gjuhë", "7": "gjuhën", "8": "gjuhët", "9": "gjuhe", "10": "gjuhëve", "11": "gjuhës", "12": "gjuhëve", "13": "gjuhësh"}}], "sounds": [{"ipa": "/ˈɟuhə/"}, {"tags": ["Gheg", "Northern"], "ipa": "[ˈɡjuː(h)]"}, {"tags": ["Gheg", "Northern"], "ipa": "[ˈɡuː(h)]"}, {"tags": ["Kosovo"], "ipa": "[ˈɡũː]"}, {"tags": ["Arbëresh", "Arvanitika"], "ipa": "[ˈɡʎuhə]"}, {"note": "Calabria", "ipa": "[ˈɡʎuɣə]"}, {"rhymes": "-uhə"}], "wikipedia": ["Vladimir Orel"], "etymology_text": "Unclear. Akin to Arbëresh glunzë (“voice”). Possibilities include:\n# Inherited from Proto-Indo-European *gol(H)-s-os, via a byform *gl̥(H)-s-ós, whence also Proto-Slavic *golsъ (“voice”), Lithuanian gal̃sas (“voice”), Proto-Germanic *kalz-ōną (“to call”). However the medial -h- instead of expected **-sh- is left unexplained.\n# From a byform *ǵʰnud-sḱ-eh₂, doubly methasised from Proto-Indo-European *dn̥ǵʰwéh₂s ~ *dn̥ǵʰuh₂és (“tongue”). Compare Tocharian B kantwo, also metathised. The outcome gl- (and later gj-) from original *ǵ(ʰ)n- is also attested in gju (“knee”). The usage of the infixed *-sḱ- does not seem have any parallels.\n# A connection with Ancient Greek γλῶσσα (glôssa), itself of unclear origin, cannot be proven.", "etymology_templates": [{"name": "unk", "args": {"1": "sq", "2": "Unclear"}, "expansion": "Unclear"}, {"name": "glossary", "args": {"1": "Inherited"}, "expansion": "Inherited"}, {"name": "inh", "args": {"1": "sq", "2": "ine-pro", "3": "", "4": "*gol(H)-s-os", "5": "", "lit": "", "pos": "", "tr": "", "ts": "", "id": "", "sc": "", "g": "", "g2": "", "g3": "", "nocat": "", "sort": ""}, "expansion": "Proto-Indo-European *gol(H)-s-os"}, {"name": "inh+", "args": {"1": "sq", "2": "ine-pro", "3": "", "4": "*gol(H)-s-os"}, "expansion": "Inherited from Proto-Indo-European *gol(H)-s-os"}, {"name": "cog", "args": {"1": "sla-pro", "2": "*golsъ", "t": "voice"}, "expansion": "Proto-Slavic *golsъ (“voice”)"}, {"name": "cog", "args": {"1": "lt", "2": "gal̃sas", "t": "voice"}, "expansion": "Lithuanian gal̃sas (“voice”)"}, {"name": "cog", "args": {"1": "gem-pro", "2": "*kalzōną", "3": "*kalz-ōną", "t": "to call"}, "expansion": "Proto-Germanic *kalz-ōną (“to call”)"}, {"name": "der", "args": {"1": "sq", "2": "ine-pro", "3": "*dn̥ǵʰwéh₂s", "4": "*dn̥ǵʰwéh₂s ~ *dn̥ǵʰuh₂és", "t": "tongue"}, "expansion": "Proto-Indo-European *dn̥ǵʰwéh₂s ~ *dn̥ǵʰuh₂és (“tongue”)"}, {"name": "cog", "args": {"1": "txb", "2": "kantwo"}, "expansion": "Tocharian B kantwo"}, {"name": "cog", "args": {"1": "grc", "2": "γλῶσσα"}, "expansion": "Ancient Greek γλῶσσα (glôssa)"}], "word": "gjuhë", "lang": "Albanian", "lang_code": "sq", "synonyms": [{"tags": ["obsolete"], "word": "gluhë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "now Cham", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Arbëresh", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Arvanitika", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["obsolete"], "word": "gjuhu", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Gheg", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhunë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["Gheg"], "word": "gjuhënë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["Gheg", "Northern"], "word": "guhë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"tags": ["dialectal"], "word": "gû", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "Kosovo", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gju — Borgo Erizzo", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"english": "Sicily", "word": "gëluhë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"english": "Calabria", "word": "gjufë", "_dis1": "0 0 0 0 0 0 0 0 0"}], "derived": [{"word": "dygjuhësi", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "dygjuhësh", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhcë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhësi", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhësisht", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhësor", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëtar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëz", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëzoj", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhor", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "shumëgjuhësh", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhë letrare", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhë nëne", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëbilbil", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëbrisk", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëçarë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëdele", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëdreri", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëgjarpër", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëgjatë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëhelm", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëkrijues", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëkuq", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlashtë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlëshuar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlidhur", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlopatë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëlopë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëllomkë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmbajtur", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmite", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmpirë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëmprehtë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhënepërkë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhënuse", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëpremë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëprerë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëqen", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhërrënduar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëshkurtër", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëshkurtuar", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëshpatë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhështhurur", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëtrashë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëtharë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëthikë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhëzënë", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhujëse", "_dis1": "0 0 0 0 0 0 0 0 0"}, {"word": "gjuhustër", "_dis1": "0 0 0 0 0 0 0 0 0"}], "senses": [{"examples": [{"text": "Mbaje gjuhën!", "english": "Hold your tongue!", "type": "example"}, {"text": "E ka gjuhën të gjatë.", "english": "(literally, “She has a long tongue.”)", "type": "example", "roman": "She is very talkative."}], "links": [["tongue", "tongue"], ["speech", "speech"], ["talking", "talking"]], "raw_glosses": ["tongue (organ)", "(figurative) speech, talking"], "glosses": ["tongue (organ)", "speech, talking"], "synonyms": [{"word": "gojë"}], "tags": ["feminine", "figuratively"], "id": "en-gjuhë-sq-noun-4U3OJriL", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"links": [["tongue", "tongue"], ["strip", "strip"], ["land", "land"]], "glosses": ["tongue (organ)", "strip of land"], "synonyms": [{"word": "rrip"}], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-Tfx~l-b2", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"links": [["tongue", "tongue"], ["bell", "bell"], ["clapper", "clapper"], ["clanger", "clanger"]], "glosses": ["tongue (organ)", "bell clapper, clanger, tongue"], "synonyms": [{"word": "gjuhëz"}], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-zu-bA4a3", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"examples": [{"text": "gjuhë lope e zier", "english": "boiled beef tongue", "type": "example"}, {"text": "Dogji gjuhën.", "english": "I burned my tongue.", "type": "example"}], "links": [["tongue", "tongue"]], "glosses": ["tongue (organ)"], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-r4b272FF", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Body parts", "kind": "topical", "parents": ["Body", "Anatomy", "All topics", "Biology", "Medicine", "Fundamental", "Sciences", "Healthcare", "Health"], "source": "w", "orig": "sq:Body parts", "langcode": "sq"}]}, {"examples": [{"text": "gjuha e fëmijëve", "english": "children speech", "type": "example"}, {"text": "gjuhë e trashë", "english": "foul language", "type": "example"}, {"text": "gjuha e shkrimtarit", "english": "the author's style", "type": "example"}], "links": [["language", "language"], ["tongue", "tongue"], ["register", "register"], ["speech", "speech"], ["style", "style"]], "glosses": ["language, tongue", "register, speech, style"], "synonyms": [{"word": "ligjërim"}, {"word": "stil"}], "tags": ["feminine"], "id": "en-gjuhë-sq-noun--CHs0sns", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}, {"examples": [{"text": "gjuha e muzikës", "english": "music's language", "type": "example"}, {"text": "gjuha e bletëve", "english": "bees' language", "type": "example"}], "links": [["language", "language"], ["tongue", "tongue"]], "glosses": ["language, tongue", "language (generally, any form of communication)"], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-nlIefoUV", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}, {"links": [["language", "language"], ["tongue", "tongue"], ["dialect", "dialect"]], "raw_glosses": ["language, tongue", "(colloquial) local dialect"], "glosses": ["language, tongue", "local dialect"], "synonyms": [{"word": "e folme"}, {"word": "dialekt"}], "tags": ["colloquial", "feminine"], "id": "en-gjuhë-sq-noun-mWdoYa8o", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}, {"links": [["language", "language"], ["tongue", "tongue"], ["Albanian", "Albanian"], ["subject", "subject"]], "raw_glosses": ["language, tongue", "(colloquial) Albanian, as a subject in school"], "glosses": ["language, tongue", "Albanian, as a subject in school"], "tags": ["colloquial", "feminine"], "id": "en-gjuhë-sq-noun-7CKeEbtj", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}, {"name": "Albanian entries with incorrect language header", "kind": "other", "parents": ["Entries with incorrect language header", "Entry maintenance"], "source": "w+disamb", "_dis": "3 3 4 3 16 16 16 23 16"}, {"name": "Pages with 1 entry", "kind": "other", "parents": [], "source": "w+disamb", "_dis": "4 2 7 2 9 10 9 48 9"}, {"name": "Pages with entries", "kind": "other", "parents": [], "source": "w+disamb", "_dis": "4 2 3 2 9 15 9 47 9"}]}, {"examples": [{"text": "gjuha shqipe", "english": "the Albanian language", "type": "example"}], "links": [["language", "language"], ["tongue", "tongue"]], "glosses": ["language, tongue"], "tags": ["feminine"], "id": "en-gjuhë-sq-noun-GSYYUYQQ", "categories": [{"name": "Albanian terms with collocations", "kind": "other", "parents": ["Terms with collocations", "Entry maintenance"], "source": "w"}]}]} \ No newline at end of file diff --git a/data/test/tidy/sq-en-forms-0.json b/data/test/tidy/sq-en-forms-0.json index bde81ba..76f289c 100644 --- a/data/test/tidy/sq-en-forms-0.json +++ b/data/test/tidy/sq-en-forms-0.json @@ -44,6 +44,132 @@ ] ] } + ], + [ + "gjuhë", + { + "_type": "map", + "map": [ + [ + "gjuhëra/gjuhëna", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "plural dialectal" + ] + ] + ] + } + ], + [ + "gjuha", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "definite", + "nominative singular definite" + ] + ] + ] + } + ], + [ + "gjuhët", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "accusative plural definite", + "nominative plural definite" + ] + ] + ] + } + ], + [ + "gjuhën", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "accusative singular definite" + ] + ] + ] + } + ], + [ + "gjuhe", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "singular indefinite ablative", + "dative singular indefinite" + ] + ] + ] + } + ], + [ + "gjuhës", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "singular definite ablative", + "dative singular definite" + ] + ] + ] + } + ], + [ + "gjuhëve", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "plural definite ablative", + "dative plural definite", + "dative plural indefinite" + ] + ] + ] + } + ], + [ + "gjuhësh", + { + "_type": "map", + "map": [ + [ + "noun", + [ + "plural indefinite ablative" + ] + ] + ] + } + ] + ] + } ] ] } \ No newline at end of file diff --git a/data/test/tidy/sq-en-lemmas.json b/data/test/tidy/sq-en-lemmas.json index 1956d77..b8482da 100644 --- a/data/test/tidy/sq-en-lemmas.json +++ b/data/test/tidy/sq-en-lemmas.json @@ -20,5 +20,224 @@ ] } } + }, + "gjuhë": { + "gjuhë": { + "noun": { + "ipa": [ + { + "ipa": "/ˈɟuhə/", + "tags": [] + }, + { + "ipa": "[ˈɡjuː(h)]", + "tags": [ + "Gheg", + "Northern" + ] + }, + { + "ipa": "[ˈɡuː(h)]", + "tags": [ + "Gheg", + "Northern" + ] + }, + { + "ipa": "[ˈɡũː]", + "tags": [ + "Kosovo" + ] + }, + { + "ipa": "[ˈɡʎuhə]", + "tags": [ + "Arbëresh", + "Arvanitika" + ] + }, + { + "ipa": "[ˈɡʎuɣə]", + "tags": [ + "Calabria" + ] + } + ], + "senses": [ + { + "glosses": [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": "tongue (organ)" + }, + { + "tag": "div", + "data": { + "listType": "ol" + }, + "style": { + "marginLeft": 2 + }, + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "1. " + }, + "(figurative) speech, talking" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "2. " + }, + "strip of land" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "3. " + }, + "bell clapper, clanger, tongue" + ] + } + ] + } + ] + } + ], + "tags": [ + "feminine" + ] + }, + { + "glosses": [ + { + "type": "structured-content", + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": "language, tongue" + }, + { + "tag": "div", + "data": { + "listType": "ol" + }, + "style": { + "marginLeft": 2 + }, + "content": [ + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "1. " + }, + "register, speech, style" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "2. " + }, + "language (generally, any form of communication)" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "3. " + }, + "(colloquial) local dialect" + ] + }, + { + "tag": "div", + "data": { + "listType": "li" + }, + "content": [ + { + "tag": "span", + "data": { + "listType": "number" + }, + "content": "4. " + }, + "(colloquial) Albanian, as a subject in school" + ] + } + ] + } + ] + } + ], + "tags": [ + "feminine" + ] + } + ] + } + } } } \ No newline at end of file From da8312eae4450893fd0e01bf47693d58170addd7 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 9 Oct 2024 18:12:34 +0200 Subject: [PATCH 02/13] early exit on senses --- 3-tidy-up.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 23c52cf..79aec7b 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -170,6 +170,9 @@ function handleLine(line) { formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet)))); }); } + + const {senses} = parsedLine; + if (!senses) return; const ipa = sounds ? sounds @@ -188,10 +191,6 @@ function handleLine(line) { .filter(ipaObj => ipaObj.ipa) : []; - - const {senses} = parsedLine; - if (!senses) return; - const sensesWithGlosses = senses.filter(sense => sense.glosses || sense.raw_glosses || sense.raw_gloss); sensesWithGlosses.map(sense => { const glosses = sense.raw_glosses || sense.raw_gloss || sense.glosses; From e12baa536ca979010bf950103917a2ff6664e487 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 9 Oct 2024 21:32:07 +0200 Subject: [PATCH 03/13] refactor --- 3-tidy-up.js | 60 ++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 79aec7b..51a1664 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -142,34 +142,8 @@ function handleLine(line) { if(!pos) return; const word = getCanonicalWordForm(parsedLine); if (!word) return; - const readings = getReadings(word, parsedLine); - if (forms) { - forms.forEach((formData) => { - const { form } = formData; - let { tags } = formData; - if(!form) return; - if(!tags) return; - if(form === '-') return; - tags = tags.filter(tag => !redundantTags.includes(tag)); - const isBlacklisted = tags.some(value => blacklistedTags.includes(value)); - if (isBlacklisted) return; - const isIdentity = !tags.some(value => !identityTags.includes(value)); - if (isIdentity) return; - - const wordMap = automatedForms.get(word) || new Map(); - const formMap = wordMap.get(form) || new Map(); - formMap.get(pos) || formMap.set(pos, new Set()); - wordMap.set(form, formMap); - automatedForms.set(word, wordMap); - - const tagsSet = new Set((formMap.get(pos))); - - tagsSet.add(sortTags(targetIso, tags).join(' ')); - - formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet)))); - }); - } + processForms(forms, word, pos); const {senses} = parsedLine; if (!senses) return; @@ -213,7 +187,8 @@ function handleLine(line) { }); if (sensesWithoutInflectionGlosses.length === 0) return; - + + const readings = getReadings(word, parsedLine); initializeWordResult(word, readings, pos); for (const ipaObj of ipa) { @@ -256,6 +231,35 @@ function handleLine(line) { } } +function processForms(forms, word, pos) { + if (forms) { + forms.forEach((formData) => { + const { form } = formData; + let { tags } = formData; + if (!form) return; + if (!tags) return; + if (form === '-') return; + tags = tags.filter(tag => !redundantTags.includes(tag)); + const isBlacklisted = tags.some(value => blacklistedTags.includes(value)); + if (isBlacklisted) return; + const isIdentity = !tags.some(value => !identityTags.includes(value)); + if (isIdentity) return; + + const wordMap = automatedForms.get(word) || new Map(); + const formMap = wordMap.get(form) || new Map(); + formMap.get(pos) || formMap.set(pos, new Set()); + wordMap.set(form, formMap); + automatedForms.set(word, wordMap); + + const tagsSet = new Set((formMap.get(pos))); + + tagsSet.add(sortTags(targetIso, tags).join(' ')); + + formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet)))); + }); + } +} + function saveSenseResult(word, readings, pos, currSense) { for (const reading of readings) { lemmaDict[word][reading][pos].senses.push(currSense); From 2bcc6a0f455d29084a259b960c602f034c5beaac Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Fri, 11 Oct 2024 16:28:00 +0200 Subject: [PATCH 04/13] wip --- .gitignore | 2 + .vscode/settings.json | 3 + 3-tidy-up.js | 204 +++++++++++++++++++++++++++++++----------- jsconfig.json | 18 ++++ types/types.ts | 100 +++++++++++++++++++++ 5 files changed, 276 insertions(+), 51 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 jsconfig.json create mode 100644 types/types.ts diff --git a/.gitignore b/.gitignore index aef1c85..e177713 100755 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ *.json *.jsonl +!jsconfig.json +!.vscode/settings.json !tag_bank_term.json !tag_bank_ipa.json !parts_of_speech.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..be944f5 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "javascript.validate.enable": true +} \ No newline at end of file diff --git a/3-tidy-up.js b/3-tidy-up.js index 51a1664..045c663 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -7,20 +7,31 @@ const { target_iso: targetIso, kaikki_file: kaikkiFile, tidy_folder: writeFolder -} = process.env; +} = /** @type {TidyEnv} */ (process.env); const { sortTags, similarSort, mergePersonTags, consoleOverwrite, clearConsoleLine, logProgress, mapJsonReplacer } = require('./util/util'); +/** @type {LemmaDict} */ const lemmaDict = {}; + const formsMap = new Map(); const automatedForms = new Map(); +/** + * @param {string} string + * @returns {string} +*/ function escapeRegExp(string) { return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&'); } +/** + * @param {string[]} glosses + * @param {FormOf[]|undefined} formOf + * @returns {boolean} + */ function isInflectionGloss(glosses, formOf) { - glossesString = JSON.stringify(glosses); + const glossesString = JSON.stringify(glosses); switch (targetIso) { case 'de': if (glosses.some(gloss => /des (?:Verbs|Adjektivs|Substantivs|Demonstrativpronomens|Possessivpronomens|Pronomens)/.test(gloss))) return true; @@ -28,6 +39,7 @@ function isInflectionGloss(glosses, formOf) { if (glosses.some(gloss => /.*inflection of.*/.test(gloss))) return true; if(!Array.isArray(formOf)) return false; for (const {word: lemma} of formOf) { + if(!lemma) continue; if (glosses.some(gloss => new RegExp(`of ${escapeRegExp(lemma)}$`).test(gloss))) return true; } @@ -38,12 +50,16 @@ function isInflectionGloss(glosses, formOf) { return false; } - -function handleLevel(nest, level) { +/** + * @param {GlossTree} glossTree + * @param {number} level + * @returns {*} + */ +function handleLevel(glossTree, level) { const nestDefs = []; let defIndex = 0; - for (const [def, children] of nest) { + for (const [def, children] of glossTree) { defIndex += 1; if(children.size > 0) { @@ -51,6 +67,7 @@ function handleLevel(nest, level) { const childDefs = handleLevel(children, nextLevel); const listType = level === 1 ? "li" : "number"; + /** @type {StructuredContent[]} */ const content = level === 1 ? def : [{ "tag": "span", "data": { "listType": "number" }, "content": `${defIndex}. ` }, def]; nestDefs.push([ @@ -65,6 +82,10 @@ function handleLevel(nest, level) { return nestDefs; } +/** + * @param {GlossTree} glossTree + * @param {SenseInfo} sense + */ function handleNest(glossTree, sense) { const nestedGloss = handleLevel(glossTree, 1); @@ -74,7 +95,12 @@ function handleNest(glossTree, sense) { } } } - +/** + * @param {*} form + * @param {string} pos + * @param {*} lemma + * @param {*} inflections + */ function addDeinflections(form, pos, lemma, inflections) { if (targetIso === 'fr') { form = form.replace(/(qu\')?(ils\/elles|il\/elle\/on)\s*/, ''); @@ -132,12 +158,14 @@ lr.on('line', (line) => { if (line) { lineCount += 1; logProgress("Processing lines", lineCount); - handleLine(line); + handleLine(JSON.parse(line)); } }); -function handleLine(line) { - const parsedLine = JSON.parse(line); +/** + * @param {KaikkiLine} parsedLine + */ +function handleLine(parsedLine) { const { pos, sounds, forms } = parsedLine; if(!pos) return; const word = getCanonicalWordForm(parsedLine); @@ -164,9 +192,11 @@ function handleLine(line) { .flatMap(ipaObj => typeof ipaObj.ipa === 'string' ? [ipaObj] : ipaObj.ipa.map(ipa => ({ ipa, tags: ipaObj.tags })) ) .filter(ipaObj => ipaObj.ipa) : []; - - const sensesWithGlosses = senses.filter(sense => sense.glosses || sense.raw_glosses || sense.raw_gloss); - sensesWithGlosses.map(sense => { + + /** @type {TidySense[]} */ + const sensesWithGlosses = senses + .filter(sense => sense.glosses || sense.raw_glosses || sense.raw_gloss) + .map(sense => { const glosses = sense.raw_glosses || sense.raw_gloss || sense.glosses; const glossesArray = Array.isArray(glosses) ? glosses : [glosses]; @@ -175,8 +205,7 @@ function handleLine(line) { tags.push(...sense.raw_tags); } - sense.glossesArray = glossesArray; - sense.tags = tags; + return {...sense, glossesArray, tags}; }); const sensesWithoutInflectionGlosses = sensesWithGlosses.filter(sense => { @@ -195,31 +224,36 @@ function handleLine(line) { saveIpaResult(word, readings, pos, ipaObj); } + /** @type {GlossTree} */ const glossTree = new Map(); for (const sense of sensesWithoutInflectionGlosses) { const { glossesArray, tags } = sense; let temp = glossTree; for (const [levelIndex, levelGloss] of glossesArray.entries()) { - if(!temp.get(levelGloss)) { - temp.set(levelGloss, new Map()); + let curr = temp.get(levelGloss); + if(!curr) { + curr = new Map(); + temp.set(levelGloss, curr); if(levelIndex === 0) { - temp.get(levelGloss).set('_tags', tags); + curr.set('_tags', tags); } } else if (levelIndex === 0) { - temp.get(levelGloss).set('_tags', tags.filter(value => temp.get(levelGloss).get('_tags').includes(value))); + curr.set('_tags', tags.filter(value => curr?.get('_tags')?.includes(value))); } - temp = temp.get(levelGloss); + temp = curr; } } for (const [gloss, children] of glossTree) { - const tags = children.get('_tags'); - children.delete('_tags'); + const tags = children.get('_tags') || []; + children.delete('_tags'); + /** @type {SenseInfo} */ const currSense = { glosses: [], tags }; if(children.size === 0) { currSense.glosses.push(gloss); } else { + /** @type {GlossTree} */ const branch = new Map(); branch.set(gloss, children); handleNest(branch, currSense); @@ -231,41 +265,57 @@ function handleLine(line) { } } +/** + * @param {Form[]|undefined} forms + * @param {string} word + * @param {string} pos + */ function processForms(forms, word, pos) { - if (forms) { - forms.forEach((formData) => { - const { form } = formData; - let { tags } = formData; - if (!form) return; - if (!tags) return; - if (form === '-') return; - tags = tags.filter(tag => !redundantTags.includes(tag)); - const isBlacklisted = tags.some(value => blacklistedTags.includes(value)); - if (isBlacklisted) return; - const isIdentity = !tags.some(value => !identityTags.includes(value)); - if (isIdentity) return; - - const wordMap = automatedForms.get(word) || new Map(); - const formMap = wordMap.get(form) || new Map(); - formMap.get(pos) || formMap.set(pos, new Set()); - wordMap.set(form, formMap); - automatedForms.set(word, wordMap); - - const tagsSet = new Set((formMap.get(pos))); - - tagsSet.add(sortTags(targetIso, tags).join(' ')); - - formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet)))); - }); - } + if(!forms) return; + forms.forEach((formData) => { + const { form } = formData; + let { tags } = formData; + if (!form) return; + if (!tags) return; + if (form === '-') return; + tags = tags.filter(tag => !redundantTags.includes(tag)); + const isBlacklisted = tags.some(value => blacklistedTags.includes(value)); + if (isBlacklisted) return; + const isIdentity = !tags.some(value => !identityTags.includes(value)); + if (isIdentity) return; + + const wordMap = automatedForms.get(word) || new Map(); + const formMap = wordMap.get(form) || new Map(); + formMap.get(pos) || formMap.set(pos, new Set()); + wordMap.set(form, formMap); + automatedForms.set(word, wordMap); + + const tagsSet = new Set((formMap.get(pos))); + + tagsSet.add(sortTags(targetIso, tags).join(' ')); + + formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet)))); + }); } +/** + * @param {string} word + * @param {string[]} readings + * @param {string} pos + * @param {SenseInfo} currSense + */ function saveSenseResult(word, readings, pos, currSense) { for (const reading of readings) { lemmaDict[word][reading][pos].senses.push(currSense); } } +/** + * @param {string} word + * @param {string[]} readings + * @param {string} pos + * @param {*} ipaObj + */ function saveIpaResult(word, readings, pos, ipaObj) { for (const reading of readings) { const result = lemmaDict[word][reading][pos]; @@ -275,6 +325,11 @@ function saveIpaResult(word, readings, pos, ipaObj) { } } +/** + * @param {string} word + * @param {string[]} readings + * @param {string} pos + */ function initializeWordResult(word, readings, pos) { for (const reading of readings) { const result = ensureNestedObject(lemmaDict, [word, reading, pos]); @@ -283,6 +338,12 @@ function initializeWordResult(word, readings, pos) { } } +/** + * @param {Glosses|undefined} glosses + * @param {string} word + * @param {string} pos + * @returns + */ function processInflectionGlosses(glosses, word, pos) { switch (targetIso) { case 'de': @@ -290,6 +351,7 @@ function processInflectionGlosses(glosses, word, pos) { case 'en': return processEnglishInflectionGlosses(glosses, word, pos); case 'fr': + if(!glosses) return; let inflection, lemma; const match1 = glosses[0].match(/(.*)du verbe\s+((?:(?!\bdu\b).)*)$/); @@ -315,6 +377,12 @@ function processInflectionGlosses(glosses, word, pos) { } } +/** + * @param {*} glosses + * @param {string} word + * @param {string} pos + * @returns + */ function processGermanInflectionGlosses(glosses, word, pos) { const match1 = glosses[0].match(/(.*)des (?:Verbs|Adjektivs|Substantivs|Demonstrativpronomens|Possessivpronomens|Pronomens) (.*)$/); if (!match1 || match1.length < 3) return; @@ -325,6 +393,11 @@ function processGermanInflectionGlosses(glosses, word, pos) { } } +/** + * @param {NestedObject} obj + * @param {string[]} keys + * @returns {NestedObject} + */ function ensureNestedObject(obj, keys) { for (const key of keys) { obj[key] ??= {}; @@ -333,9 +406,14 @@ function ensureNestedObject(obj, keys) { return obj; } +/** + * @param {Glosses|undefined} glosses + * @param {string} word + * @param {string} pos + */ function processEnglishInflectionGlosses(glosses, word, pos) { - if(!glosses) return; - glossPieces = glosses.flatMap(gloss => gloss.split('##').map(piece => piece.trim())); + if(!glosses || !Array.isArray(glosses)) return; + const glossPieces = glosses.flatMap(gloss => gloss.split('##').map(piece => piece.trim())); const lemmas = new Set(); const inflections = new Set(); for (const piece of glossPieces) { @@ -374,6 +452,10 @@ function processEnglishInflectionGlosses(glosses, word, pos) { } } +/** + * @param {KaikkiLine} line + * @returns {string|undefined} + */ function getCanonicalWordForm({word, forms}) { if(!forms) return word; @@ -392,6 +474,11 @@ function getCanonicalWordForm({word, forms}) { } } +/** + * @param {string|undefined} word + * @param {Form[]} forms + * @returns {string|undefined} + */ function getCanonicalForm(word, forms) { const canonicalForm = forms.find(form => form.tags && form.tags.includes('canonical') @@ -403,7 +490,7 @@ function getCanonicalForm(word, forms) { word = word.replace(/ {{#if:.+/, '').trim(); } - bracketsRegex = /\[.*\]$/; + const bracketsRegex = /\[.*\]$/; if (bracketsRegex.test(word)) { word = word.replace(bracketsRegex, '').trim(); } @@ -411,6 +498,11 @@ function getCanonicalForm(word, forms) { return word; } +/** + * @param {string} word + * @param {KaikkiLine} line + * @returns {string[]} + */ function getReadings(word, line){ switch(sourceIso){ case 'fa': return [getPersianReading(word, line)]; @@ -420,13 +512,23 @@ function getReadings(word, line){ } } +/** + * @param {string} word + * @param {KaikkiLine} line + * @returns {string} + */ function getPersianReading(word, line){ const {forms} = line; if(!forms) return word; const romanization = forms.find(({form, tags}) => tags && tags.includes('romanization') && tags.length === 1 && form); - return romanization ? romanization.form : word; + return romanization?.form || word; } +/** + * @param {string} word + * @param {KaikkiLine} line + * @returns {string[]} + */ function getJapaneseReadings(word, line){ const {head_templates} = line; if(!head_templates) { diff --git a/jsconfig.json b/jsconfig.json new file mode 100644 index 0000000..866a5bb --- /dev/null +++ b/jsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "module": "ES2022", + "target": "ES2022", + "checkJs": true, + "strict": true, + "strictNullChecks": true, + "noImplicitAny": true, + "strictPropertyInitialization": true, + "suppressImplicitAnyIndexErrors": false + }, + "paths": { + "*": ["./types/*"], + "ext/json-schema": ["./types/ext/json-schema"] + }, + "exclude": ["node_modules", "**/node_modules/*"] +} + \ No newline at end of file diff --git a/types/types.ts b/types/types.ts new file mode 100644 index 0000000..40a33b9 --- /dev/null +++ b/types/types.ts @@ -0,0 +1,100 @@ +declare global { + type TidyEnv = { + source_iso: string, + target_iso: string, + kaikki_file: string, + tidy_folder: string, + } + + type KaikkiLine = { + head_templates?: HeadTemplate[]; + word?: string; + pos?: string; + sounds?: Sound[]; + forms?: Form[]; + senses?: KaikkiSense[]; + } + + type HeadTemplate = { + name?: string; + args?: string[]; + } + + type Sound = { + ipa?: string; + tags?: string[]; + note?: string; + } + + type Form = { + form?: string; + tags?: string[]; + } + + type KaikkiSense = { + glosses?: Glosses; + raw_glosses?: Glosses; + raw_gloss?: Glosses; + tags?: string[]; + raw_tags?: string[]; + form_of?: FormOf[]; + } + + type Glosses = string | string[]; + + type FormOf = { + word?: string; + } + + type GlossTree = Map & { + get(key: '_tags'): string[] | undefined; + set(key: '_tags', value: string[]): GlossTree; + }; + + type TidySense = Omit & { + tags: string[]; + glossesArray: string[]; + } + + type LemmaDict = { + [word: string]: { + [reading: string]: { + [pos: string]: LemmaInfo + } + } + } + + type LemmaInfo = { + ipa: IpaInfo[], + senses: SenseInfo[], + } + + type IpaInfo = { + ipa: string, + tags: string[], + } + + type SenseInfo = { + glosses: YomitanGloss[], + tags: string[], + } + + type YomitanGloss = string | StructuredGloss + + type StructuredGloss = { + type: "structured-content", + content: string | StructuredContent[], + } + + type StructuredContent = { + tag: string, + data: string, + content: StructuredContent, + } + + type NestedObject = { + [key: string]: NestedObject; + } +} + +export {} // This is needed to make this file a module \ No newline at end of file From f074c7695a7d5203cdfe6e04a1537ed735b9bea1 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Fri, 11 Oct 2024 16:44:38 +0200 Subject: [PATCH 05/13] wip --- 3-tidy-up.js | 25 ++++++++++++++++--------- types/types.ts | 4 ++-- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 045c663..b9e8396 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -96,10 +96,10 @@ function handleNest(glossTree, sense) { } } /** - * @param {*} form + * @param {string} form * @param {string} pos - * @param {*} lemma - * @param {*} inflections + * @param {string} lemma + * @param {string[]} inflections */ function addDeinflections(form, pos, lemma, inflections) { if (targetIso === 'fr') { @@ -176,7 +176,8 @@ function handleLine(parsedLine) { const {senses} = parsedLine; if (!senses) return; - const ipa = sounds + /** @type {IpaInfo[]} */ + const ipa = /** @type {IpaInfo[]} */ (sounds ? sounds .filter(sound => sound && sound.ipa) .map(({ipa, tags, note}) => { @@ -189,9 +190,9 @@ function handleLine(parsedLine) { } return ({ipa, tags}) }) - .flatMap(ipaObj => typeof ipaObj.ipa === 'string' ? [ipaObj] : ipaObj.ipa.map(ipa => ({ ipa, tags: ipaObj.tags })) ) - .filter(ipaObj => ipaObj.ipa) - : []; + .flatMap(ipaObj => typeof ipaObj.ipa === 'string' ? [ipaObj] : ipaObj?.ipa?.map(ipa => ({ ipa, tags: ipaObj.tags })) ) + .filter(ipaObj => ipaObj?.ipa) + : []); /** @type {TidySense[]} */ const sensesWithGlosses = senses @@ -314,7 +315,7 @@ function saveSenseResult(word, readings, pos, currSense) { * @param {string} word * @param {string[]} readings * @param {string} pos - * @param {*} ipaObj + * @param {IpaInfo} ipaObj */ function saveIpaResult(word, readings, pos, ipaObj) { for (const reading of readings) { @@ -352,6 +353,9 @@ function processInflectionGlosses(glosses, word, pos) { return processEnglishInflectionGlosses(glosses, word, pos); case 'fr': if(!glosses) return; + /** + * @type {string|undefined} + */ let inflection, lemma; const match1 = glosses[0].match(/(.*)du verbe\s+((?:(?!\bdu\b).)*)$/); @@ -378,12 +382,13 @@ function processInflectionGlosses(glosses, word, pos) { } /** - * @param {*} glosses + * @param {Glosses|undefined} glosses * @param {string} word * @param {string} pos * @returns */ function processGermanInflectionGlosses(glosses, word, pos) { + if (!glosses || !Array.isArray(glosses)) return; const match1 = glosses[0].match(/(.*)des (?:Verbs|Adjektivs|Substantivs|Demonstrativpronomens|Possessivpronomens|Pronomens) (.*)$/); if (!match1 || match1.length < 3) return; const inflection = match1[1].trim(); @@ -414,7 +419,9 @@ function ensureNestedObject(obj, keys) { function processEnglishInflectionGlosses(glosses, word, pos) { if(!glosses || !Array.isArray(glosses)) return; const glossPieces = glosses.flatMap(gloss => gloss.split('##').map(piece => piece.trim())); + /** @type {Set} */ const lemmas = new Set(); + /** @type {Set} */ const inflections = new Set(); for (const piece of glossPieces) { const lemmaMatch = piece.match(/of ([^\s]+)\s*$/); diff --git a/types/types.ts b/types/types.ts index 40a33b9..e4fc802 100644 --- a/types/types.ts +++ b/types/types.ts @@ -21,7 +21,7 @@ declare global { } type Sound = { - ipa?: string; + ipa?: string|string[]; tags?: string[]; note?: string; } @@ -93,7 +93,7 @@ declare global { } type NestedObject = { - [key: string]: NestedObject; + [key: string]: NestedObject | any; } } From b3a131d3efb470131c425ab9bfa8e38a5f08bde9 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Fri, 11 Oct 2024 16:45:30 +0200 Subject: [PATCH 06/13] wip --- 3-tidy-up.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index b9e8396..1dbac36 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -195,7 +195,7 @@ function handleLine(parsedLine) { : []); /** @type {TidySense[]} */ - const sensesWithGlosses = senses + const sensesWithGlosses = /** @type {TidySense[]} */ (senses .filter(sense => sense.glosses || sense.raw_glosses || sense.raw_gloss) .map(sense => { const glosses = sense.raw_glosses || sense.raw_gloss || sense.glosses; @@ -207,7 +207,7 @@ function handleLine(parsedLine) { } return {...sense, glossesArray, tags}; - }); + })); const sensesWithoutInflectionGlosses = sensesWithGlosses.filter(sense => { const {glossesArray, form_of, glosses} = sense; From b942ea3436777e3638b627aaa14598c334ab6a25 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Tue, 15 Oct 2024 12:19:49 +0200 Subject: [PATCH 07/13] start adding types --- 3-tidy-up.js | 19 ++++++++++++------- 4-make-yomitan.js | 1 + jsconfig.json | 5 ++++- types/types.ts | 10 ++++++++-- util/util.js | 13 ++++++++++++- 5 files changed, 37 insertions(+), 11 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 1dbac36..06fed75 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -14,7 +14,10 @@ const { sortTags, similarSort, mergePersonTags, consoleOverwrite, clearConsoleLi /** @type {LemmaDict} */ const lemmaDict = {}; +/** @type {FormsMap} */ const formsMap = new Map(); + +/** @type {AutomatedForms} */ const automatedForms = new Map(); /** @@ -67,7 +70,6 @@ function handleLevel(glossTree, level) { const childDefs = handleLevel(children, nextLevel); const listType = level === 1 ? "li" : "number"; - /** @type {StructuredContent[]} */ const content = level === 1 ? def : [{ "tag": "span", "data": { "listType": "number" }, "content": `${defIndex}. ` }, def]; nestDefs.push([ @@ -99,16 +101,16 @@ function handleNest(glossTree, sense) { * @param {string} form * @param {string} pos * @param {string} lemma - * @param {string[]} inflections + * @param {string[]|Set} inflections */ function addDeinflections(form, pos, lemma, inflections) { if (targetIso === 'fr') { form = form.replace(/(qu\')?(ils\/elles|il\/elle\/on)\s*/, ''); } - const lemmaForms = formsMap.get(lemma) || new Map(); + const lemmaForms = formsMap.get(lemma) || /** @type {Map>} */ (new Map()); formsMap.set(lemma, lemmaForms); - const formPOSs = lemmaForms.get(form) || new Map(); + const formPOSs = lemmaForms.get(form) || /** @type {Map} */ (new Map()); lemmaForms.set(form, formPOSs); formPOSs.get(pos) || formPOSs.set(pos, []); @@ -267,7 +269,7 @@ function handleLine(parsedLine) { } /** - * @param {Form[]|undefined} forms + * @param {FormInfo[]|undefined} forms * @param {string} word * @param {string} pos */ @@ -285,7 +287,9 @@ function processForms(forms, word, pos) { const isIdentity = !tags.some(value => !identityTags.includes(value)); if (isIdentity) return; + /** @type {Map>>} */ const wordMap = automatedForms.get(word) || new Map(); + /** @type {Map|string[]>} */ const formMap = wordMap.get(form) || new Map(); formMap.get(pos) || formMap.set(pos, new Set()); wordMap.set(form, formMap); @@ -483,7 +487,7 @@ function getCanonicalWordForm({word, forms}) { /** * @param {string|undefined} word - * @param {Form[]} forms + * @param {FormInfo[]} forms * @returns {string|undefined} */ function getCanonicalForm(word, forms) { @@ -619,13 +623,14 @@ lr.on('end', () => { const formsFilePath = `${writeFolder}/${sourceIso}-${targetIso}-forms.json`; + /** @type {{[chunkIndex: string]: FormsMap}} */ const mapChunks = Array.from(formsMap.entries()).reduce((acc, [key, value], index) => { logProgress("Chunking form dict", index, formsMap.size); const chunkIndex = Math.floor(index / 10000); acc[chunkIndex] ??= new Map(); acc[chunkIndex].set(key, value); return acc; - }, {}); + }, /** @type {{[chunkIndex: string]: FormsMap}} */ ({})); if(!mapChunks['0']) { mapChunks['0'] = new Map(); diff --git a/4-make-yomitan.js b/4-make-yomitan.js index 32a9e47..5942af4 100644 --- a/4-make-yomitan.js +++ b/4-make-yomitan.js @@ -1,3 +1,4 @@ +//@ts-nocheck const path = require('path'); const { readFileSync, writeFileSync, existsSync, readdirSync, mkdirSync, unlinkSync } = require('fs'); const { sortTags, writeInBatches, consoleOverwrite, diff --git a/jsconfig.json b/jsconfig.json index 866a5bb..609e7cc 100644 --- a/jsconfig.json +++ b/jsconfig.json @@ -13,6 +13,9 @@ "*": ["./types/*"], "ext/json-schema": ["./types/ext/json-schema"] }, - "exclude": ["node_modules", "**/node_modules/*"] + "exclude": [ + "node_modules", + "**/node_modules/*" + ] } \ No newline at end of file diff --git a/types/types.ts b/types/types.ts index e4fc802..ab1e7ab 100644 --- a/types/types.ts +++ b/types/types.ts @@ -11,7 +11,7 @@ declare global { word?: string; pos?: string; sounds?: Sound[]; - forms?: Form[]; + forms?: FormInfo[]; senses?: KaikkiSense[]; } @@ -26,7 +26,7 @@ declare global { note?: string; } - type Form = { + type FormInfo = { form?: string; tags?: string[]; } @@ -92,6 +92,12 @@ declare global { content: StructuredContent, } + type Lemma = string; + type Form = string; + type PoS = string; + type FormsMap = Map>>; + type AutomatedForms = Map|string[]>>>; + type NestedObject = { [key: string]: NestedObject | any; } diff --git a/util/util.js b/util/util.js index 1f3aede..09f3499 100644 --- a/util/util.js +++ b/util/util.js @@ -1,3 +1,4 @@ +//@ts-nocheck const path = require('path'); const { readFileSync, writeFileSync, existsSync } = require('fs'); const date = require('date-and-time'); @@ -35,7 +36,10 @@ function sortTags(targetIso, tags) { } // sorts inflection entries to be nearby similar inflections - +/** + * @param {string[]} tags + * @returns {string[]} + */ function similarSort(tags) { return tags.sort((a, b) => { const aWords = a.split(' '); @@ -62,6 +66,11 @@ function similarSort(tags) { // input: ['first-person singular present', 'third-person singular present'] // output: ['first/third-person singular present'] +/** + * @param {string} targetIso + * @param {string[]} tags + * @returns {string[]} + */ function mergePersonTags(targetIso, tags) { const persons = ["first-person", "second-person", "third-person"]; @@ -70,7 +79,9 @@ function mergePersonTags(targetIso, tags) { return items.sort((a, b) => persons.indexOf(a) - persons.indexOf(b)); } + /** @type {string[]} */ const result = []; + /** @type {Object} */ const mergeObj = {}; for (const item of tags) { From e47cc1deb5d0cc4bb7ea286b471e993a4a7839df Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Tue, 15 Oct 2024 13:43:29 +0200 Subject: [PATCH 08/13] get examples in tidied --- 3-tidy-up.js | 74 +++++++----- data/test/tidy/cs-en-lemmas.json | 31 ++++- data/test/tidy/de-en-lemmas.json | 125 +++++++++++++++++--- data/test/tidy/en-de-lemmas.json | 7 +- data/test/tidy/en-en-lemmas.json | 40 ++++++- data/test/tidy/es-en-lemmas.json | 22 +++- data/test/tidy/fa-en-lemmas.json | 22 +++- data/test/tidy/fr-en-lemmas.json | 192 ++++++++++++++++++++++++++++++- data/test/tidy/fr-fr-lemmas.json | 39 +++++++ data/test/tidy/ja-en-lemmas.json | 133 +++++++++++++++++---- data/test/tidy/la-en-lemmas.json | 152 +++++++++++++++++++++--- data/test/tidy/sq-en-lemmas.json | 27 ++++- data/test/tidy/th-en-lemmas.json | 3 +- types/types.ts | 21 +++- 14 files changed, 789 insertions(+), 99 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 06fed75..308fd8a 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -54,15 +54,15 @@ function isInflectionGloss(glosses, formOf) { } /** - * @param {GlossTree} glossTree + * @param {GlossTwig} glossTwig * @param {number} level * @returns {*} */ -function handleLevel(glossTree, level) { +function handleLevel(glossTwig, level) { const nestDefs = []; let defIndex = 0; - for (const [def, children] of glossTree) { + for (const [def, children] of glossTwig) { defIndex += 1; if(children.size > 0) { @@ -85,11 +85,11 @@ function handleLevel(glossTree, level) { } /** - * @param {GlossTree} glossTree + * @param {GlossTwig} glossTwig * @param {SenseInfo} sense */ -function handleNest(glossTree, sense) { - const nestedGloss = handleLevel(glossTree, 1); +function handleNest(glossTwig, sense) { + const nestedGloss = handleLevel(glossTwig, 1); if (nestedGloss.length > 0) { for (const entry of nestedGloss) { @@ -227,18 +227,53 @@ function handleLine(parsedLine) { saveIpaResult(word, readings, pos, ipaObj); } - /** @type {GlossTree} */ + const glossTree = getGlossTree(sensesWithoutInflectionGlosses); + + for (const [gloss, branches] of glossTree) { + const tags = branches.get('_tags') || []; + const examples = branches.get('_examples') || []; + branches.delete('_tags'); + branches.delete('_examples'); + + /** @type {SenseInfo} */ + const currSense = { glosses: [], tags, examples }; + if(branches.size === 0) { + currSense.glosses.push(gloss); + } else { + /** @type {GlossBranch} */ + const syntheticBranch = new Map(); + syntheticBranch.set(gloss, branches); + handleNest(syntheticBranch, currSense); + } + + if (currSense.glosses.length > 0) { + saveSenseResult(word, readings, pos, currSense); + } + } +} + +/** + * @param {TidySense[]} sensesWithoutInflectionGlosses + * @returns {GlossTree} + */ +function getGlossTree(sensesWithoutInflectionGlosses) { const glossTree = new Map(); for (const sense of sensesWithoutInflectionGlosses) { const { glossesArray, tags } = sense; + let { examples = [] } = sense; + examples = examples + .filter(({type}) => type !== 'quotation') + .map(({text, english}) => ({text, english})) + let temp = glossTree; for (const [levelIndex, levelGloss] of glossesArray.entries()) { let curr = temp.get(levelGloss); - if(!curr) { + if (!curr) { curr = new Map(); temp.set(levelGloss, curr); - if(levelIndex === 0) { + if (levelIndex === 0) { curr.set('_tags', tags); + curr.set('_examples', examples); } } else if (levelIndex === 0) { curr.set('_tags', tags.filter(value => curr?.get('_tags')?.includes(value))); @@ -246,26 +281,7 @@ function handleLine(parsedLine) { temp = curr; } } - - for (const [gloss, children] of glossTree) { - const tags = children.get('_tags') || []; - children.delete('_tags'); - - /** @type {SenseInfo} */ - const currSense = { glosses: [], tags }; - if(children.size === 0) { - currSense.glosses.push(gloss); - } else { - /** @type {GlossTree} */ - const branch = new Map(); - branch.set(gloss, children); - handleNest(branch, currSense); - } - - if (currSense.glosses.length > 0) { - saveSenseResult(word, readings, pos, currSense); - } - } + return glossTree; } /** diff --git a/data/test/tidy/cs-en-lemmas.json b/data/test/tidy/cs-en-lemmas.json index b609234..34e2591 100644 --- a/data/test/tidy/cs-en-lemmas.json +++ b/data/test/tidy/cs-en-lemmas.json @@ -15,6 +15,16 @@ ], "tags": [ "feminine" + ], + "examples": [ + { + "text": "textová zpráva", + "english": "text message" + }, + { + "text": "Chcete nechat zprávu?", + "english": "Would you like to leave a message?" + } ] }, { @@ -23,6 +33,16 @@ ], "tags": [ "feminine" + ], + "examples": [ + { + "text": "lékařská zpráva", + "english": "medical report" + }, + { + "text": "podat zprávu", + "english": "to file a report" + } ] } ] @@ -43,7 +63,13 @@ "glosses": [ "for" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "Zabili ho pro peníze.", + "english": "They killed him for his money." + } + ] } ] } @@ -67,7 +93,8 @@ "imperfective", "reflexive", "with se" - ] + ], + "examples": [] } ] } diff --git a/data/test/tidy/de-en-lemmas.json b/data/test/tidy/de-en-lemmas.json index 27e4383..4e118b1 100644 --- a/data/test/tidy/de-en-lemmas.json +++ b/data/test/tidy/de-en-lemmas.json @@ -112,6 +112,16 @@ "class-4", "strong", "transitive" + ], + "examples": [ + { + "text": "jemanden gesund pflegen", + "english": "to nurse someone back to health" + }, + { + "text": "Kranke pflegen", + "english": "to care for the sick" + } ] }, { @@ -188,7 +198,8 @@ "tags": [ "class-4", "strong" - ] + ], + "examples": [] }, { "glosses": [ @@ -251,6 +262,16 @@ "tags": [ "class-4", "strong" + ], + "examples": [ + { + "text": "Umgang pflegen", + "english": "to regularly be in contact" + }, + { + "text": "Geselligkeit pflegen", + "english": "to socialize regularly (literally, “to regularly engage in gregariousness”)" + } ] } ] @@ -274,6 +295,12 @@ "tags": [ "masculine", "strong" + ], + "examples": [ + { + "text": "Fuchs, du hast die Gans gestohlen. Gib sie wieder her!", + "english": "(line from a popular children’s song)" + } ] }, { @@ -284,6 +311,12 @@ "informal", "masculine", "strong" + ], + "examples": [ + { + "text": "Er ist ein ganz schöner Fuchs.", + "english": "He is a really handsome fox." + } ] }, { @@ -294,6 +327,12 @@ "informal", "masculine", "strong" + ], + "examples": [ + { + "text": "Unser Paul ist ja ein kleiner Fuchs.", + "english": "Our Paul is a little redhead." + } ] }, { @@ -303,7 +342,8 @@ "tags": [ "masculine", "strong" - ] + ], + "examples": [] }, { "glosses": [ @@ -313,7 +353,8 @@ "masculine", "slang", "strong" - ] + ], + "examples": [] }, { "glosses": [ @@ -322,6 +363,12 @@ "tags": [ "masculine", "strong" + ], + "examples": [ + { + "text": "Ich hatte nur vier Trümpfe und darunter beide Füchse.", + "english": "I had only four trumps and among them were both aces of diamonds." + } ] }, { @@ -331,7 +378,8 @@ "tags": [ "masculine", "strong" - ] + ], + "examples": [] }, { "glosses": [ @@ -341,7 +389,8 @@ "archaic", "masculine", "strong" - ] + ], + "examples": [] }, { "glosses": [ @@ -350,7 +399,8 @@ "tags": [ "masculine", "strong" - ] + ], + "examples": [] }, { "glosses": [ @@ -360,7 +410,8 @@ "masculine", "obsolete", "strong" - ] + ], + "examples": [] } ] } @@ -392,7 +443,8 @@ "also", "neuter", "rare" - ] + ], + "examples": [] }, { "glosses": [ @@ -402,7 +454,8 @@ "also", "neuter", "rare" - ] + ], + "examples": [] }, { "glosses": [ @@ -412,7 +465,8 @@ "also", "neuter", "rare" - ] + ], + "examples": [] } ] } @@ -437,7 +491,8 @@ "form-of", "masculine", "strong" - ] + ], + "examples": [] } ] } @@ -461,31 +516,69 @@ "glosses": [ "from" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "Ich fahre von Köln nach Hamburg.", + "english": "I'm travelling from Cologne to Hamburg." + }, + { + "text": "Ich hab’s von meiner Schwester gehört.", + "english": "I heard it from my sister." + } + ] }, { "glosses": [ "of, belonging to (often replacing genitive; see usage note below)" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "das Auto meines Vaters = das Auto von meinem Vater", + "english": "my father’s car / the car of my father" + } + ] }, { "glosses": [ "by (with passive voice)" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "Das Hotel wird von der Firma bezahlt.", + "english": "The hotel is paid for by the company." + } + ] }, { "glosses": [ "about, of (a topic)" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "Er hat von seiner Jugend erzählt.", + "english": "He told about his youth." + } + ] }, { "glosses": [ "on, with (a resource)" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "Von welchem Geld soll ich als Arbeitsloser in Urlaub fahren?", + "english": "Being unemployed, on what money should I go on holidays?" + }, + { + "text": "Man kann nicht nur von Luft und Liebe leben.", + "english": "You can’t live on air and love alone. (proverb)" + } + ] } ] } diff --git a/data/test/tidy/en-de-lemmas.json b/data/test/tidy/en-de-lemmas.json index 36a8d03..699112a 100644 --- a/data/test/tidy/en-de-lemmas.json +++ b/data/test/tidy/en-de-lemmas.json @@ -13,7 +13,12 @@ "glosses": [ "[1] aussuchen, auswählen, vorziehen, wählen" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "\"Connolly was chosen for the case by prosecutors because the sheriff and Bristol District Attorney C. Samuel Sutter are “close professional and personal friends,” said Sutter spokesman Gregg Miliote.\"" + } + ] } ] } diff --git a/data/test/tidy/en-en-lemmas.json b/data/test/tidy/en-en-lemmas.json index dae6f58..c1d452e 100644 --- a/data/test/tidy/en-en-lemmas.json +++ b/data/test/tidy/en-en-lemmas.json @@ -16,6 +16,11 @@ "tags": [ "ditransitive", "transitive" + ], + "examples": [ + { + "text": "Waiter, please bring me a single malt whiskey." + } ] }, { @@ -25,6 +30,11 @@ "tags": [ "figuratively", "transitive" + ], + "examples": [ + { + "text": "The new company director brought a fresh perspective on sales and marketing." + } ] }, { @@ -33,6 +43,11 @@ ], "tags": [ "transitive" + ], + "examples": [ + { + "text": "The controversial TV broadcast brought a storm of complaints." + } ] }, { @@ -41,25 +56,37 @@ ], "tags": [ "transitive" - ] + ], + "examples": [] }, { "glosses": [ "To persuade; to induce; to draw; to lead; to guide." ], - "tags": [] + "tags": [], + "examples": [] }, { "glosses": [ "To produce in exchange; to sell for; to fetch." ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "What does coal bring per ton?" + } + ] }, { "glosses": [ "(baseball) To pitch, often referring to a particularly hard thrown fastball." ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "The closer Jones can really bring it." + } + ] } ] } @@ -82,6 +109,11 @@ "tags": [ "archaic", "literary" + ], + "examples": [ + { + "text": "\"The Hay Wain\" is a famous painting by John Constable." + } ] } ] diff --git a/data/test/tidy/es-en-lemmas.json b/data/test/tidy/es-en-lemmas.json index 7b71054..54f9066 100644 --- a/data/test/tidy/es-en-lemmas.json +++ b/data/test/tidy/es-en-lemmas.json @@ -19,7 +19,8 @@ ], "tags": [ "intransitive" - ] + ], + "examples": [] }, { "glosses": [ @@ -27,6 +28,12 @@ ], "tags": [ "intransitive" + ], + "examples": [ + { + "text": "Vive de migas, nada más.", + "english": "He lives on crumbs, nothing more." + } ] }, { @@ -35,6 +42,16 @@ ], "tags": [ "intransitive" + ], + "examples": [ + { + "text": "Vive en la casa roja.", + "english": "She lives in the red house." + }, + { + "text": "La pobrecita vive con dos hermanas crueles.", + "english": "The poor girl lives with two cruel sisters." + } ] }, { @@ -43,7 +60,8 @@ ], "tags": [ "transitive" - ] + ], + "examples": [] } ] } diff --git a/data/test/tidy/fa-en-lemmas.json b/data/test/tidy/fa-en-lemmas.json index 6452fd6..561d56d 100644 --- a/data/test/tidy/fa-en-lemmas.json +++ b/data/test/tidy/fa-en-lemmas.json @@ -31,7 +31,8 @@ ], "tags": [ "Khorasan" - ] + ], + "examples": [] } ] } @@ -80,19 +81,32 @@ "glosses": [ "Persian (the language of modern Iran, Afghanistan and Tajikistan, and widely spoken in Uzbekistan)." ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "بَرادَرِ شُوْهَرِش فارْسی بَلَدِه.", + "english": "Her husband's brother knows Persian." + } + ] }, { "glosses": [ "Persian (the language of Ancient Persia)." ], - "tags": [] + "tags": [], + "examples": [] }, { "glosses": [ "Persian, main ethnic group of Iran." ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "فارْسی هَسْتیم.", + "english": "We are Persian." + } + ] } ] } diff --git a/data/test/tidy/fr-en-lemmas.json b/data/test/tidy/fr-en-lemmas.json index 00d9aa8..a1d7388 100644 --- a/data/test/tidy/fr-en-lemmas.json +++ b/data/test/tidy/fr-en-lemmas.json @@ -15,6 +15,12 @@ ], "tags": [ "transitive" + ], + "examples": [ + { + "text": "prends ma main", + "english": "take my hand" + } ] }, { @@ -23,6 +29,12 @@ ], "tags": [ "transitive" + ], + "examples": [ + { + "text": "elle prend un café", + "english": "she is drinking a coffee" + } ] }, { @@ -31,6 +43,12 @@ ], "tags": [ "transitive" + ], + "examples": [ + { + "text": "Je vais prendre le plat du jour.", + "english": "I'll get the dish of the day." + } ] }, { @@ -39,6 +57,12 @@ ], "tags": [ "transitive" + ], + "examples": [ + { + "text": "prendre quelque chose à quelqu’un", + "english": "to take something from someone" + } ] }, { @@ -47,6 +71,16 @@ ], "tags": [ "transitive" + ], + "examples": [ + { + "text": "prendre une décision", + "english": "to make a decision" + }, + { + "text": "prendre des mesures draconiennes", + "english": "to take draconian measures" + } ] }, { @@ -55,6 +89,24 @@ ], "tags": [ "intransitive" + ], + "examples": [ + { + "text": "le feu ne prend pas", + "english": "the fire won't start" + }, + { + "text": "la sauce ne prend pas", + "english": "the sauce isn't thickening" + }, + { + "text": "ma mayonnaise ne prend pas", + "english": "my mayonnaise isn't setting" + }, + { + "text": "ça ne prend pas avec moi", + "english": "that won't wash with me" + } ] }, { @@ -63,6 +115,16 @@ ], "tags": [ "reflexive" + ], + "examples": [ + { + "text": "je me suis pris la main dans la porte", + "english": "I caught my hand in the door" + }, + { + "text": "je me suis pris la porte dans la figure", + "english": "the door hit me in the face" + } ] }, { @@ -111,6 +173,36 @@ "in various idiomatic expressions", "transitive", "with à" + ], + "examples": [ + { + "text": "Qu’est-ce qui t’a pris ? Qu’est-ce qui t’est passé par la tête ?", + "english": "What were you thinking? What got into you? What came over you?" + }, + { + "text": "Qu’est-ce qui lui a pris ? Quelle mouche l’a piqué ?", + "english": "What was he thinking? What got into him?" + }, + { + "text": "bien lui en prit", + "english": "good for him; it was a good choice (literally, “he took it well”)" + }, + { + "text": "mal lui en prit", + "english": "too bad for him; it was a bad choice (literally, “he took it badly”)" + }, + { + "text": "prendre en aversion", + "english": "take an aversion (to)" + }, + { + "text": "prendre en grippe", + "english": "take a dislike (to)" + }, + { + "text": "prendre en dégoût", + "english": "become disgusted (by)" + } ] }, { @@ -120,6 +212,72 @@ "tags": [ "followed by a partitive", "in various idiomatic expressions" + ], + "examples": [ + { + "text": "prendre de la vitesse", + "english": "to gain speed" + }, + { + "text": "prendre du galon", + "english": "to gain a promotion" + }, + { + "text": "prendre de l’avance", + "english": "to gain ground" + }, + { + "text": "prendre du retard", + "english": "to fall behind schedule, to run late, to drop behind" + }, + { + "text": "prendre de la hauteur", + "english": "to gain some perspective" + }, + { + "text": "prendre du recul", + "english": "to take a step back" + }, + { + "text": "prendre de la bouteille", + "english": "to gain experience" + }, + { + "text": "en prendre de la graine", + "english": "to take away a lesson" + }, + { + "text": "prendre du poids", + "english": "to gain weight" + }, + { + "text": "prendre de la masse", + "english": "to build muscle" + }, + { + "text": "prendre de la brioche, prendre du bide, prendre du ventre", + "english": "to get a paunch" + }, + { + "text": "prendre du bouchon", + "english": "to fail" + }, + { + "text": "prendre de l’élan", + "english": "to gain momentum" + }, + { + "text": "prendre de l’âge", + "english": "to get older" + }, + { + "text": "prendre de la valeur", + "english": "to gain value" + }, + { + "text": "prendre de l’importance", + "english": "to become important" + } ] }, { @@ -129,6 +287,12 @@ "tags": [ "colloquial", "impersonal" + ], + "examples": [ + { + "text": "Ça va me prendre au moins deux heures pour le mettre à jour.", + "english": "It's going to take me at least two hours to update it." + } ] }, { @@ -139,6 +303,12 @@ "broadly", "colloquial", "impersonal" + ], + "examples": [ + { + "text": "Pour finir dans deux heures, ça prend trois personnes.", + "english": "To finish in two hours, it'll take three people." + } ] }, { @@ -147,6 +317,16 @@ ], "tags": [ "impersonal" + ], + "examples": [ + { + "text": "il prend [quelque chose] à [quelqu’un]", + "english": "[something] comes over [someone]" + }, + { + "text": "Il lui prend une fantaisie de mettre le feu à la maison.", + "english": "A fancy comes over him to set fire to the house." + } ] } ] @@ -170,6 +350,12 @@ "tags": [ "impersonal", "intransitive" + ], + "examples": [ + { + "text": "Il semblerait qu’il y ait des différences significatives entre les deux groupes.", + "english": "There would seem to be significant differences between the two groups." + } ] }, { @@ -178,7 +364,8 @@ ], "tags": [ "intransitive" - ] + ], + "examples": [] } ] } @@ -286,7 +473,8 @@ ], "tags": [ "feminine" - ] + ], + "examples": [] } ] } diff --git a/data/test/tidy/fr-fr-lemmas.json b/data/test/tidy/fr-fr-lemmas.json index dda9d75..1f93451 100644 --- a/data/test/tidy/fr-fr-lemmas.json +++ b/data/test/tidy/fr-fr-lemmas.json @@ -15,6 +15,17 @@ ], "tags": [ "Hindouisme" + ], + "examples": [ + { + "text": "Chenguza est beau comme le grand Avatar, (descendant de Bram) et quoique élevé pour le sacerdoce dans les austérités du cloître, il a l'imagination vive, le désir de s'instruire, et un penchant décidé pour la volupté." + }, + { + "text": "Retiré dans les forêts, placé à la tête de nombreux pasteurs , qui ont dû se plaire à grossir sa renommée, et dont les bras l'ont aidé à renverser le tyran, Crichna évidemment dut sortir des Djangles, où il s'était tenu jusque-là dans le mystère , comme un homme environné d'une sorte d'auréole merveilleuse, qui le fit considérer de bonne heure comme un Avatar (incarnation) de la Divinité, caractère religieux et providentiel que l'Inde a toujours accordé à ses grands hommes" + }, + { + "text": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." + } ] }, { @@ -23,6 +34,20 @@ ], "tags": [ "figuratively" + ], + "examples": [ + { + "text": "Que d’avatars dans la vie politique de cet homme d’État !" + }, + { + "text": "Batman est l’avatar moderne de Zorro." + }, + { + "text": "L’espéranto est l’avatar moderne du sanskrit." + }, + { + "text": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." + } ] }, { @@ -32,6 +57,20 @@ "tags": [ "broadly", "Utilisé à tort" + ], + "examples": [ + { + "text": "Mais la guerre n’en finissait plus et c’est alors que je devins, après bien des démarches, de boulanger, élève-pilote et par la suite pilote aviateur, à ma très grande surprise et sans trop d’avatars." + }, + { + "text": "N'avions-nous pas aidé à la constituer, en courant bien des risques et au prix de quelles avatars ! Alors, lui mort, il ne savait plus qu'en faire…." + }, + { + "text": "Le service social du travail – Avatars d’une fonction, vicissitudes d’un métier" + }, + { + "text": "Compte tenu des divers avatars rencontrés ces dernières années (rouille du mélèze, puceron lanigère, tempêtes…), il n'est plus possible de s’intéresser à la seule productivité, […]." + } ] } ] diff --git a/data/test/tidy/ja-en-lemmas.json b/data/test/tidy/ja-en-lemmas.json index b83aac5..90a1696 100644 --- a/data/test/tidy/ja-en-lemmas.json +++ b/data/test/tidy/ja-en-lemmas.json @@ -13,7 +13,8 @@ "glosses": [ "pleasant, delightful, fun, enjoyable" ], - "tags": [] + "tags": [], + "examples": [] } ] } @@ -41,7 +42,24 @@ "glosses": [ "liked, favorite" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "好きな食べ物は? アイスクリームです。", + "english": "What's your favorite food? - It's ice cream." + }, + { + "text": "君が好きだからこそこれほど頑張っているんだよ。", + "english": "It's precisely because I like you [because of my fondness for you] that I'm working this hard." + }, + { + "text": "好きです。\nSuki desu.\n“I like you.” → idiomatically used to express: “I love you.”" + }, + { + "text": "どうとも好きにしなさい。", + "english": "Feel free to do as you like." + } + ] } ] } @@ -61,7 +79,13 @@ "glosses": [ "a raccoon dog, Nyctereutes procyonoides" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "アライグマなら尻尾にシマがある。どう見でもタヌキだ。", + "english": "If you're a raccoon, you'd have stripes on your tail. No matter how you look at it, you're a raccoon dog." + } + ] }, { "glosses": [ @@ -69,6 +93,12 @@ ], "tags": [ "figuratively" + ], + "examples": [ + { + "text": "やいやい、其処な狸め", + "english": "Hey there, you sly dog!" + } ] }, { @@ -78,7 +108,8 @@ "tags": [ "abbreviation", "alt-of" - ] + ], + "examples": [] }, { "glosses": [ @@ -88,6 +119,11 @@ "abbreviation", "alt-of", "rare" + ], + "examples": [ + { + "text": "狸を決め込む ― tanuki o kimekomu ― pretend to be a raccoon dog → feign sleep" + } ] }, { @@ -99,7 +135,8 @@ "alt-of", "obsolete", "rare" - ] + ], + "examples": [] } ] } @@ -173,7 +210,13 @@ ] } ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "車が走っている。", + "english": "A car is running. / Cars are running." + } + ] }, { "glosses": [ @@ -181,49 +224,89 @@ ], "tags": [ "transitive" + ], + "examples": [ + { + "text": "彼はこの道をよく走る。", + "english": "He often runs down this street." + } ] }, { "glosses": [ "to move smoothly; to slide" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "刀が鞘から走る。", + "english": "The sword slides out of its sheath." + } + ] }, { "glosses": [ "to run away, escape" ], - "tags": [] + "tags": [], + "examples": [] }, { "glosses": [ "to rush, hurry around" ], - "tags": [] + "tags": [], + "examples": [] }, { "glosses": [ "to give over oneself to; to commit oneself to (usually something bad)" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "彼は敵に走った。", + "english": "He defected to the enemy." + }, + { + "text": "立場を忘れて感情に走ってはいけない。", + "english": "Don't forget your stance and give in to emotions." + } + ] }, { "glosses": [ "to spread out, scatter, splatter, spout" ], - "tags": [] + "tags": [], + "examples": [] }, { "glosses": [ "to lead or extend in a certain direction" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "山脈が南北に走る。\nSanmyaku ga nanboku ni hashiru.\nThe mountain range runs north–south." + } + ] }, { "glosses": [ "to appear briefly; to flash" ], - "tags": [] + "tags": [], + "examples": [ + { + "text": "稲妻が走る", + "english": "lightning flashes by" + }, + { + "text": "背中に痛みが走った。", + "english": "I felt a brief pain in my back." + } + ] }, { "glosses": [ @@ -231,7 +314,8 @@ ], "tags": [ "used with 胸(むね)が (mune ga)" - ] + ], + "examples": [] }, { "glosses": [ @@ -239,7 +323,8 @@ ], "tags": [ "euphemistic" - ] + ], + "examples": [] }, { "glosses": [ @@ -248,7 +333,8 @@ "tags": [ "alt-of", "alternative" - ] + ], + "examples": [] } ] } @@ -272,13 +358,15 @@ "glosses": [ "five colors (usu. red (赤), blue (青), yellow (黄), white (白) and black (黒))" ], - "tags": [] + "tags": [], + "examples": [] }, { "glosses": [ "melon, gourd" ], - "tags": [] + "tags": [], + "examples": [] } ] } @@ -300,13 +388,15 @@ "glosses": [ "five colors (usu. red (赤), blue (青), yellow (黄), white (白) and black (黒))" ], - "tags": [] + "tags": [], + "examples": [] }, { "glosses": [ "melon, gourd" ], - "tags": [] + "tags": [], + "examples": [] } ] } @@ -326,7 +416,8 @@ "glosses": [ "[I am / someone is] hungry" ], - "tags": [] + "tags": [], + "examples": [] } ] } diff --git a/data/test/tidy/la-en-lemmas.json b/data/test/tidy/la-en-lemmas.json index d420da6..a035e01 100644 --- a/data/test/tidy/la-en-lemmas.json +++ b/data/test/tidy/la-en-lemmas.json @@ -29,7 +29,8 @@ ], "tags": [ "declension-1" - ] + ], + "examples": [] }, { "glosses": [ @@ -37,6 +38,16 @@ ], "tags": [ "declension-1" + ], + "examples": [ + { + "text": "hascine propter rēs maledicās fāmās ferunt.", + "english": "Is it on account of these things that they spread slanderous reports?" + }, + { + "text": "“Oenōtrī coluēre virī; nunc fāma minōrēs", + "english": "“Oenotrian men tilled [the land]; now rumor [has it that their] descendants call the nation ‘Italy’ after the name of its leader, [Italus].”" + } ] }, { @@ -45,6 +56,20 @@ ], "tags": [ "declension-1" + ], + "examples": [ + { + "text": "Dīmīcantī dē fāmā dēesse.", + "english": "To abandon one whose reputation is attacked." + }, + { + "text": "Fāma tamen clāra est; et adhūc sine crīmine vīxī.", + "english": "My good name is nevertheless unstained; and so far I have lived without blame." + }, + { + "text": "Multī fāmam, conscientiam paucī verentur.", + "english": "Many fear their reputation, few their conscience." + } ] }, { @@ -53,6 +78,11 @@ ], "tags": [ "declension-1" + ], + "examples": [ + { + "text": "Extemplō Libyae magnās it Fāma per urbēs —\nFāma, malum quā nōn aliud vēlōcius ūllum.\nStraightaway Rumor runs through the great cities of Libya – Rumor, than whom [there is] not any other evil more swift." + } ] } ] @@ -95,7 +125,8 @@ ], "tags": [ "conjugation-3" - ] + ], + "examples": [] }, { "glosses": [ @@ -103,6 +134,12 @@ ], "tags": [ "conjugation-3" + ], + "examples": [ + { + "text": "[…] dum ne quem militem legeret ex eo numero quibus senatus missionem reditumque in patriam negasset ante belli finem.", + "english": "[…] provided he did not choose any soldier from those to whom the Senate had refused discharge and a return home before the end of the war" + } ] }, { @@ -111,7 +148,8 @@ ], "tags": [ "conjugation-3" - ] + ], + "examples": [] }, { "glosses": [ @@ -119,7 +157,8 @@ ], "tags": [ "conjugation-3" - ] + ], + "examples": [] }, { "glosses": [ @@ -127,7 +166,8 @@ ], "tags": [ "conjugation-3" - ] + ], + "examples": [] }, { "glosses": [ @@ -135,6 +175,24 @@ ], "tags": [ "conjugation-3" + ], + "examples": [ + { + "text": "Librōs lege.", + "english": "Read books." + }, + { + "text": "Lēgistīne hunc librum?", + "english": "Have you read this book?" + }, + { + "text": "Lingua Graeca est; nōn potest legī.", + "english": "It's Greek; it cannot be read." + }, + { + "text": "Exigis, ut nostrōs dōnem tibi, Tucca, libellōs.\nNōn faciam: nam vīs vēndere, nōn legere.\nYou demand that I give our [⇒ my] little books to you, Tucca.", + "english": "I will not do [it]: for you want to sell [them], not to read [them]." + } ] }, { @@ -144,7 +202,8 @@ "tags": [ "Medieval-Latin", "conjugation-3" - ] + ], + "examples": [] } ] } @@ -186,7 +245,8 @@ ], "tags": [ "declension-2" - ] + ], + "examples": [] } ] } @@ -240,7 +300,8 @@ ], "tags": [ "not-comparable" - ] + ], + "examples": [] }, { "glosses": [ @@ -248,7 +309,8 @@ ], "tags": [ "not-comparable" - ] + ], + "examples": [] }, { "glosses": [ @@ -256,6 +318,12 @@ ], "tags": [ "not-comparable" + ], + "examples": [ + { + "text": "ab ōvō ū̆sque ad māla", + "english": "from the beginning to the end\n(literally, “from the egg to the apples”)" + } ] } ] @@ -303,6 +371,12 @@ "participle", "passive", "perfect" + ], + "examples": [ + { + "text": "Quae rectis lineis suos ordines servant", + "english": "Which preserve their order in straight lines" + } ] }, { @@ -317,7 +391,8 @@ "passive", "perfect", "usually" - ] + ], + "examples": [] }, { "glosses": [ @@ -331,6 +406,12 @@ "participle", "passive", "perfect" + ], + "examples": [ + { + "text": "Via stultī rēcta in oculīs eius; quī autem sapiēns est audit cōnsilia.", + "english": "The way of a fool is right in his own eyes: but he that is wise hearkeneth unto counsels. (Douay-Rheims trans., Challoner rev.: 1752 CE)" + } ] } ] @@ -408,6 +489,20 @@ "declension-4", "feminine", "irregular" + ], + "examples": [ + { + "text": "Deō domuīque", + "english": "For God and for home (motto of Methodist Ladies' College, Melbourne)" + }, + { + "text": "Stet fortūna domūs", + "english": "Let the good fortune of the house stand (motto of Harrow School, England)" + }, + { + "text": "nox erat et tōta lūmina nūlla domō", + "english": "It was night, and [there were] no lights in the whole house." + } ] }, { @@ -419,7 +514,8 @@ "declension-4", "feminine", "irregular" - ] + ], + "examples": [] }, { "glosses": [ @@ -468,7 +564,8 @@ "declension-4", "feminine", "irregular" - ] + ], + "examples": [] }, { "glosses": [ @@ -533,7 +630,8 @@ "declension-4", "feminine", "irregular" - ] + ], + "examples": [] }, { "glosses": [ @@ -545,6 +643,24 @@ "feminine", "idiomatic", "irregular" + ], + "examples": [ + { + "text": "domum trahere", + "english": "to drag into one's pocket" + }, + { + "text": "Domī versūra fit.", + "english": "One is one's own creditor. (proverb)" + }, + { + "text": "domō afferre", + "english": "to conceive on one's own" + }, + { + "text": "id nunc experior domō", + "english": "Now I'm learning this first-hand." + } ] }, { @@ -558,6 +674,16 @@ "idiomatic", "irregular", "in locative case in phrases" + ], + "examples": [ + { + "text": "bellī domīque; bellō domīque; vel bellī vel domī; domī bellōque; domī mīlitiaeque", + "english": "in war and peace" + }, + { + "text": "ut non quietior populus domi esset quam militiae", + "english": "so that the people should not become lazier in the time of peace than that of war" + } ] } ] diff --git a/data/test/tidy/sq-en-lemmas.json b/data/test/tidy/sq-en-lemmas.json index b8482da..6436081 100644 --- a/data/test/tidy/sq-en-lemmas.json +++ b/data/test/tidy/sq-en-lemmas.json @@ -15,7 +15,8 @@ ], "tags": [ "masculine" - ] + ], + "examples": [] } ] } @@ -140,6 +141,16 @@ ], "tags": [ "feminine" + ], + "examples": [ + { + "text": "Mbaje gjuhën!", + "english": "Hold your tongue!" + }, + { + "text": "E ka gjuhën të gjatë.", + "english": "(literally, “She has a long tongue.”)" + } ] }, { @@ -234,6 +245,20 @@ ], "tags": [ "feminine" + ], + "examples": [ + { + "text": "gjuha e fëmijëve", + "english": "children speech" + }, + { + "text": "gjuhë e trashë", + "english": "foul language" + }, + { + "text": "gjuha e shkrimtarit", + "english": "the author's style" + } ] } ] diff --git a/data/test/tidy/th-en-lemmas.json b/data/test/tidy/th-en-lemmas.json index 8d1eea4..51bab06 100644 --- a/data/test/tidy/th-en-lemmas.json +++ b/data/test/tidy/th-en-lemmas.json @@ -10,7 +10,8 @@ ], "tags": [ "letter" - ] + ], + "examples": [] } ] } diff --git a/types/types.ts b/types/types.ts index ab1e7ab..44b087d 100644 --- a/types/types.ts +++ b/types/types.ts @@ -32,6 +32,7 @@ declare global { } type KaikkiSense = { + examples?: Example[]; glosses?: Glosses; raw_glosses?: Glosses; raw_gloss?: Glosses; @@ -40,16 +41,29 @@ declare global { form_of?: FormOf[]; } + type Example = { + text?: string; + type?: "example" | "quotation"; + english?: string; + roman?: string; + } + type Glosses = string | string[]; type FormOf = { word?: string; } - type GlossTree = Map & { + type GlossTree = Map ; + + type GlossBranch = Map & { get(key: '_tags'): string[] | undefined; - set(key: '_tags', value: string[]): GlossTree; - }; + set(key: '_tags', value: string[]): GlossBranch; + get(key: '_examples'): Example[] | undefined; + set(key: '_examples', value: Example[]): GlossBranch; + } ; + + type GlossTwig = Map; type TidySense = Omit & { tags: string[]; @@ -77,6 +91,7 @@ declare global { type SenseInfo = { glosses: YomitanGloss[], tags: string[], + examples: Example[], } type YomitanGloss = string | StructuredGloss From 5df03cb48d2d98eb07336dc0ce36f17db592cfe9 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Tue, 15 Oct 2024 14:07:27 +0200 Subject: [PATCH 09/13] wip --- 4-make-yomitan.js | 76 ++++++++++++++++++++++++++++++++++++++++------- types/types.ts | 10 +++++++ 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/4-make-yomitan.js b/4-make-yomitan.js index 5942af4..187a1b5 100644 --- a/4-make-yomitan.js +++ b/4-make-yomitan.js @@ -1,4 +1,3 @@ -//@ts-nocheck const path = require('path'); const { readFileSync, writeFileSync, existsSync, readdirSync, mkdirSync, unlinkSync } = require('fs'); const { sortTags, writeInBatches, consoleOverwrite, @@ -32,7 +31,9 @@ if (!existsSync(`data/language/${source_iso}/${target_iso}`)) { mkdirSync(`data/language/${source_iso}/${target_iso}`, {recursive: true}); } +/** @type {WhitelistedTag[]} */ const targetLanguageTermTags = loadJsonArray(`data/language/target-language-tags/${target_iso}/tag_bank_term.json`); +/** @type {WhitelistedTag[]} */ const languageTermTags = loadJsonArray(`data/language/${source_iso}/${target_iso}/tag_bank_term.json`); const termTags = [...targetLanguageTermTags, ...languageTermTags]; @@ -44,7 +45,7 @@ const partsOfSpeech = loadJsonArray(`data/language/target-language-tags/${target const multiwordInflections = loadJsonArray(`data/language/${source_iso}/${target_iso}/multiword_inflections.json`); const tagStylesFile = `data/language/target-language-tags/${target_iso}/tag_styles.json`; -const tagStyles = existsSync(tagStylesFile) ? JSON.parse(readFileSync(tagStylesFile)) : {}; +const tagStyles = existsSync(tagStylesFile) ? JSON.parse(readFileSync(tagStylesFile, 'utf8')) : {}; const tagModifiers = [ ['chiefly', 'chief'], @@ -56,6 +57,11 @@ const tagModifiers = [ ['slightly', 'sli'], ] +/** + * @param {WhitelistedTag[]} tags + * @param {string} tag + * @returns + */ function findTag(tags, tag) { const fullTag = tags.find((x) => { if (typeof x[3] === 'string') { @@ -75,12 +81,16 @@ function findTag(tags, tag) { return result; } +/** + * @param {*} tag + * @returns + */ function findModifiedTag(tag){ let modifiedTag = null; tagModifiers.forEach((modifier) => { const regex = new RegExp(`^${modifier[0]} `); if (regex.test(tag)){ - fullTag = findTag(termTags, tag.replace(regex, '')); + const fullTag = findTag(termTags, tag.replace(regex, '')); if (fullTag){ modifiedTag = [ `${modifier[1]}-${fullTag[0]}`, @@ -118,12 +128,13 @@ let lastTermBankIndex = 0; consoleOverwrite(`4-make-yomitan.js: reading lemmas...`); const lemmasFile = `${readFolder}/${source_iso}-${target_iso}-lemmas.json`; - const lemmaDict = JSON.parse(readFileSync(path.resolve(__dirname, lemmasFile))); + /** @type {LemmaDict} */ + const lemmaDict = JSON.parse(readFileSync(path.resolve(__dirname, lemmasFile), 'utf8')); consoleOverwrite('4-make-yomitan.js: processing lemmas...'); for (const [lemma, readings] of Object.entries(lemmaDict)) { for (const [reading, partsOfSpeechOfWord] of Object.entries(readings)) { - normalizedLemma = normalizeOrthography(lemma); + const normalizedLemma = normalizeOrthography(lemma); let term = normalizedLemma; if(lemma !== normalizedLemma && lemma !== reading){ @@ -140,7 +151,10 @@ let lastTermBankIndex = 0; anyForms.push(message); } } - + + /** + * @param {any} word + */ function debug(word) { if (normalizedLemma === DEBUG_WORD) { console.log('-------------------'); @@ -153,7 +167,7 @@ let lastTermBankIndex = 0; for (const [pos, info] of Object.entries(partsOfSpeechOfWord)) { const {senses} = info; - const lemmaTags = [pos, ...(info.tags || [])]; + const lemmaTags = [pos]; ipa.push(...info.ipa); const entries = {}; @@ -165,6 +179,9 @@ let lastTermBankIndex = 0; glosses.forEach((gloss) => { debug(gloss); + /** + * @param {string} joinedTags + */ function addGlossToEntries(joinedTags) { if(!gloss) return; if (entries[joinedTags]) { @@ -274,7 +291,7 @@ let lastTermBankIndex = 0; consoleOverwrite('4-make-yomitan.js: Processing forms...'); const formsFiles = readdirSync(readFolder).filter((file) => file.startsWith(`${source_iso}-${target_iso}-forms-`)); for (const file of formsFiles) { - const formsPart = JSON.parse(readFileSync(path.resolve(__dirname, readFolder, file)), mapJsonReviver); + const formsPart = JSON.parse(readFileSync(path.resolve(__dirname, readFolder, file), 'utf8'), mapJsonReviver); for (const [lemma, forms] of formsPart.entries()) { formsMap.set(lemma, forms); } @@ -394,6 +411,10 @@ writeFileSync(`data/language/${source_iso}/${target_iso}/skippedPartsOfSpeech.js console.log('4-make-yomitan.js: Done!') +/** + * @param {*} ymtFormData + * @returns + */ function writeYmtFormData(ymtFormData) { const ymtForms = ymtFormData.map((form, index) => { const [term, reading, definitions] = form; @@ -414,6 +435,12 @@ function writeYmtFormData(ymtFormData) { return ymtFormData; } +/** + * @param {*} folder + * @param {*} data + * @param {*} bankIndex + * @returns + */ function writeBanks(folder, data, bankIndex = 0) { if(folder === 'form') folder = 'dict'; @@ -428,14 +455,25 @@ function writeBanks(folder, data, bankIndex = 0) { return writeInBatches(writeFolder, data, `${folder}/${filename}`, 25000, bankIndex); } +/** + * @param {*} folder + */ function writeTags(folder) { writeFileSync(`${writeFolder}/${folder}/tag_bank_1.json`, JSON.stringify(Object.values(ymtTags[folder]))); } +/** + * @param {*} folder + * @param {*} tagStyles + */ function writeStyles(folder, tagStyles){ writeFileSync(`${writeFolder}/${folder}/styles.css`, tagStyles); } +/** + * @param {*} folder + * @returns + */ function getTagStyles(folder){ let styles = ""; for (const fullTag of Object.values(ymtTags[folder])) { @@ -447,6 +485,9 @@ function getTagStyles(folder){ return styles; } +/** + * @param {*} folder + */ function writeIndex(folder) { const title = `${DICT_NAME}-${source_iso}-${target_iso}` + (folder === 'dict' ? '' : '-ipa'); writeFileSync(`${writeFolder}/${folder}/index.json`, JSON.stringify({ @@ -458,13 +499,20 @@ function writeIndex(folder) { })); } +/** + * @param {*} lemmaTags + * @param {*} senseTags + * @param {*} parenthesesTags + * @param {*} pos + * @returns + */ function processTags(lemmaTags, senseTags, parenthesesTags, pos) { let recognizedTags = []; const allEntryTags = [...new Set([...lemmaTags, ...senseTags, ...parenthesesTags])]; termTagCount += allEntryTags.length; - unrecognizedTags = allEntryTags + const unrecognizedTags = allEntryTags .map((tag) => { const fullTag = findTag(termTags, tag); @@ -487,16 +535,24 @@ function processTags(lemmaTags, senseTags, parenthesesTags, pos) { }) .filter(Boolean); - leftoverTags = unrecognizedTags.length ? `(${unrecognizedTags.join(', ')}) ` : ''; + const leftoverTags = unrecognizedTags.length ? `(${unrecognizedTags.join(', ')}) ` : ''; recognizedTags = [...new Set(recognizedTags)]; return { leftoverTags, recognizedTags }; } +/** + * @param {*} obj + * @returns + */ function sortBreakdown(obj){ return Object.fromEntries(Object.entries(obj).sort((a, b) => b[1] - a[1])); } +/** + * @param {string} term + * @returns {string} + */ function normalizeOrthography(term) { switch (source_iso) { case 'ar': diff --git a/types/types.ts b/types/types.ts index 44b087d..039487f 100644 --- a/types/types.ts +++ b/types/types.ts @@ -116,6 +116,16 @@ declare global { type NestedObject = { [key: string]: NestedObject | any; } + + // 4-make-yomitan.js types: + + type WhitelistedTag = [ + shortTag: string, + category: string, + sortOrder: number, + longTag: string | string[], // if array, first element will be used, others are aliases + popularityScore: number, + ] } export {} // This is needed to make this file a module \ No newline at end of file From f2177d39c10f29f2f3caccb698734818327c0cd1 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Tue, 15 Oct 2024 15:49:20 +0200 Subject: [PATCH 10/13] wip --- 4-make-yomitan.js | 61 ++++++++++------ jsconfig.json | 6 +- package-lock.json | 143 ++++++++++++++++++++++++++++++++++++- package.json | 3 +- types/types.ts => types.ts | 37 ++++++---- util/util.js | 3 +- 6 files changed, 209 insertions(+), 44 deletions(-) rename types/types.ts => types.ts (78%) diff --git a/4-make-yomitan.js b/4-make-yomitan.js index 187a1b5..b8e6c52 100644 --- a/4-make-yomitan.js +++ b/4-make-yomitan.js @@ -11,7 +11,7 @@ const { DICT_NAME, tidy_folder: readFolder, temp_folder: writeFolder -} = process.env; +} = /** @type {MakeYomitanEnv} */(process.env); const latestDownloadLink = 'https://github.com/yomidevs/kaikki-to-yomitan/releases/latest/download/'; @@ -60,7 +60,7 @@ const tagModifiers = [ /** * @param {WhitelistedTag[]} tags * @param {string} tag - * @returns + * @returns {null|import('types').TagBank.TagInformation} */ function findTag(tags, tag) { const fullTag = tags.find((x) => { @@ -72,13 +72,15 @@ function findTag(tags, tag) { return false; }); - const result = fullTag ? [...fullTag] : null; + if(!fullTag) return null; + + const result = [...fullTag]; - if(result && Array.isArray(result[3])){ - result[3] = result[3][0]; + if(Array.isArray(result[3])){ + result[3] = result[3][0]; // this makes it fit the yomitan tag format } - return result; + return /** @type {import('types').TagBank.TagInformation}*/ (result); } /** @@ -106,8 +108,12 @@ function findModifiedTag(tag){ return modifiedTag; } +/** @type {FormsMap} */ const formsMap = new Map(); +/** + * @type {{ipa: Object, dict: Object}} + */ const ymtTags = { ipa: {}, dict: {} @@ -124,6 +130,7 @@ let lastTermBankIndex = 0; { const ymtLemmas = []; + /** @type {import('types').TermBankMeta.TermPhoneticTranscription[]} */ const ymtIpa = []; consoleOverwrite(`4-make-yomitan.js: reading lemmas...`); @@ -169,6 +176,8 @@ let lastTermBankIndex = 0; const lemmaTags = [pos]; ipa.push(...info.ipa); + + /** @type {Object} */ const entries = {}; for (const sense of senses) { @@ -228,6 +237,7 @@ let lastTermBankIndex = 0; } } + /** @type {{ ipa: string; tags?: string[]; }[]} */ const mergedIpas = ipa.reduce((result, item) => { ipaCount++; item.tags = item.tags @@ -245,12 +255,16 @@ let lastTermBankIndex = 0; const existingIpa = result.find((x) => x.ipa === item.ipa); if (existingIpa) { - existingIpa.tags = [...new Set([...existingIpa.tags, ...item.tags])]; + existingIpa.tags = [ + ...new Set([ + ...(existingIpa.tags || []), + ...item.tags]) + ]; } else { result.push(item); } return result; - }, []); + }, /** @type {{ ipa: string; tags?: string[]; }[]} */ ([])); if (mergedIpas.length) { ymtIpa.push([ @@ -285,12 +299,14 @@ let lastTermBankIndex = 0; } { + /** @type {CondensedFormEntries} */ let ymtFormData = []; let formCounter = 0; consoleOverwrite('4-make-yomitan.js: Processing forms...'); const formsFiles = readdirSync(readFolder).filter((file) => file.startsWith(`${source_iso}-${target_iso}-forms-`)); for (const file of formsFiles) { + /** @type {FormsMap} */ const formsPart = JSON.parse(readFileSync(path.resolve(__dirname, readFolder, file), 'utf8'), mapJsonReviver); for (const [lemma, forms] of formsPart.entries()) { formsMap.set(lemma, forms); @@ -321,8 +337,9 @@ let lastTermBankIndex = 0; // TODO: generalize this if(target_iso === 'en'){ - hypotheses = gloss.split(' and ') - hypotheses = hypotheses.map((hypothesis) => hypothesis.split(' ')); + hypotheses = gloss + .split(' and ') + .map((hypothesis) => hypothesis.split(' ')); } if(target_iso === 'fr'){ @@ -348,6 +365,7 @@ let lastTermBankIndex = 0; return hypotheses; }); + /** @type {string[][]} */ const uniqueHypotheses = []; for (const hypothesis of inflectionHypotheses) { @@ -358,6 +376,7 @@ let lastTermBankIndex = 0; } } + /** @type {[ uninflectedTerm: string, inflectionRules: string[]][]} */ const deinflectionDefinitions = uniqueHypotheses.map((hypothesis) => [ lemma, hypothesis @@ -412,10 +431,11 @@ writeFileSync(`data/language/${source_iso}/${target_iso}/skippedPartsOfSpeech.js console.log('4-make-yomitan.js: Done!') /** - * @param {*} ymtFormData - * @returns + * @param {CondensedFormEntries} ymtFormData + * @returns {CondensedFormEntries} */ function writeYmtFormData(ymtFormData) { + /** @type {import('types').TermBank.TermInformation[]} */ const ymtForms = ymtFormData.map((form, index) => { const [term, reading, definitions] = form; return [ @@ -436,9 +456,9 @@ function writeYmtFormData(ymtFormData) { } /** - * @param {*} folder - * @param {*} data - * @param {*} bankIndex + * @param {string} folder + * @param {import('types').TermBank.DictionaryTermBankV3 | import('types').TermBankMeta.DictionaryTermMetaBankV3} data + * @param {number} bankIndex * @returns */ function writeBanks(folder, data, bankIndex = 0) { @@ -456,23 +476,23 @@ function writeBanks(folder, data, bankIndex = 0) { } /** - * @param {*} folder + * @param {'dict'|'ipa'} folder */ function writeTags(folder) { writeFileSync(`${writeFolder}/${folder}/tag_bank_1.json`, JSON.stringify(Object.values(ymtTags[folder]))); } /** - * @param {*} folder - * @param {*} tagStyles + * @param {'dict'|'ipa'} folder + * @param {string} tagStyles */ function writeStyles(folder, tagStyles){ writeFileSync(`${writeFolder}/${folder}/styles.css`, tagStyles); } /** - * @param {*} folder - * @returns + * @param {'dict'|'ipa'} folder + * @returns {string} */ function getTagStyles(folder){ let styles = ""; @@ -507,6 +527,7 @@ function writeIndex(folder) { * @returns */ function processTags(lemmaTags, senseTags, parenthesesTags, pos) { + /** @type {string[]} */ let recognizedTags = []; const allEntryTags = [...new Set([...lemmaTags, ...senseTags, ...parenthesesTags])]; diff --git a/jsconfig.json b/jsconfig.json index 609e7cc..06c0272 100644 --- a/jsconfig.json +++ b/jsconfig.json @@ -7,11 +7,7 @@ "strictNullChecks": true, "noImplicitAny": true, "strictPropertyInitialization": true, - "suppressImplicitAnyIndexErrors": false - }, - "paths": { - "*": ["./types/*"], - "ext/json-schema": ["./types/ext/json-schema"] + "suppressImplicitAnyIndexErrors": false, }, "exclude": [ "node_modules", diff --git a/package-lock.json b/package-lock.json index 97025fd..8f0c213 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,7 +12,8 @@ "archiver": "^6.0.1", "date-and-time": "^2.4.2", "line-by-line": "^0.1.6", - "node-stream-zip": "^1.15.0" + "node-stream-zip": "^1.15.0", + "yomichan-dict-builder": "^2.9.1" }, "devDependencies": { "jest": "^29.7.0" @@ -2041,6 +2042,11 @@ "node": ">=10.17.0" } }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" + }, "node_modules/import-local": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz", @@ -2971,6 +2977,44 @@ "node": ">=6" } }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/kleur": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", @@ -3027,6 +3071,14 @@ "node": ">=6" } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/line-by-line": { "version": "0.1.6", "resolved": "https://registry.npmjs.org/line-by-line/-/line-by-line-0.1.6.tgz", @@ -3293,6 +3345,11 @@ "node": ">=6" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" + }, "node_modules/parse-json": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", @@ -3559,6 +3616,11 @@ "semver": "bin/semver.js" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -4030,6 +4092,14 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/yomichan-dict-builder": { + "version": "2.9.1", + "resolved": "https://registry.npmjs.org/yomichan-dict-builder/-/yomichan-dict-builder-2.9.1.tgz", + "integrity": "sha512-21fDggyfjPfGUy6ghdDwyqysn/VlKTEA/C9QiiJibN/sO5rg9LLOAkhU/otP0Owp8i4nBZNgRDa74d9cHba5Jg==", + "dependencies": { + "jszip": "^3.10.1" + } + }, "node_modules/zip-stream": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/zip-stream/-/zip-stream-5.0.1.tgz", @@ -5587,6 +5657,11 @@ "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", "dev": true }, + "immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" + }, "import-local": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz", @@ -6300,6 +6375,46 @@ "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, + "jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "requires": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + }, + "dependencies": { + "readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, + "string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "requires": { + "safe-buffer": "~5.1.0" + } + } + } + }, "kleur": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", @@ -6349,6 +6464,14 @@ "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", "dev": true }, + "lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "requires": { + "immediate": "~3.0.5" + } + }, "line-by-line": { "version": "0.1.6", "resolved": "https://registry.npmjs.org/line-by-line/-/line-by-line-0.1.6.tgz", @@ -6552,6 +6675,11 @@ "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", "dev": true }, + "pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" + }, "parse-json": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", @@ -6733,6 +6861,11 @@ "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", "dev": true }, + "setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, "shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -7081,6 +7214,14 @@ "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", "dev": true }, + "yomichan-dict-builder": { + "version": "2.9.1", + "resolved": "https://registry.npmjs.org/yomichan-dict-builder/-/yomichan-dict-builder-2.9.1.tgz", + "integrity": "sha512-21fDggyfjPfGUy6ghdDwyqysn/VlKTEA/C9QiiJibN/sO5rg9LLOAkhU/otP0Owp8i4nBZNgRDa74d9cHba5Jg==", + "requires": { + "jszip": "^3.10.1" + } + }, "zip-stream": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/zip-stream/-/zip-stream-5.0.1.tgz", diff --git a/package.json b/package.json index 5d0ca29..15639f2 100755 --- a/package.json +++ b/package.json @@ -12,7 +12,8 @@ "archiver": "^6.0.1", "date-and-time": "^2.4.2", "line-by-line": "^0.1.6", - "node-stream-zip": "^1.15.0" + "node-stream-zip": "^1.15.0", + "yomichan-dict-builder": "^2.9.1" }, "description": "Converts Kaikki JSON to Yomitan compatible dictionary.", "devDependencies": { diff --git a/types/types.ts b/types.ts similarity index 78% rename from types/types.ts rename to types.ts index 039487f..5e4440c 100644 --- a/types/types.ts +++ b/types.ts @@ -1,4 +1,10 @@ +import * as TermBank from './node_modules/yomichan-dict-builder/src/types/yomitan/termbank'; +import * as TagBank from './node_modules/yomichan-dict-builder/src/types/yomitan/tagbank'; +import * as TermBankMeta from './node_modules/yomichan-dict-builder/src/types/yomitan/termbankmeta'; + declare global { + // 3-tidy-up.js types: + type TidyEnv = { source_iso: string, target_iso: string, @@ -89,24 +95,11 @@ declare global { } type SenseInfo = { - glosses: YomitanGloss[], + glosses: TermBank.DetailedDefinition[], tags: string[], examples: Example[], } - - type YomitanGloss = string | StructuredGloss - type StructuredGloss = { - type: "structured-content", - content: string | StructuredContent[], - } - - type StructuredContent = { - tag: string, - data: string, - content: StructuredContent, - } - type Lemma = string; type Form = string; type PoS = string; @@ -118,6 +111,16 @@ declare global { } // 4-make-yomitan.js types: + type MakeYomitanEnv = { + source_iso: string, + target_iso: string, + DEBUG_WORD?: string, + DICT_NAME: string, + tidy_folder: string, + temp_folder: string, + } + + type CondensedFormEntries = [string, string, [string, string[]][]][]; type WhitelistedTag = [ shortTag: string, @@ -128,4 +131,8 @@ declare global { ] } -export {} // This is needed to make this file a module \ No newline at end of file +export { + TermBank, + TagBank, + TermBankMeta +} \ No newline at end of file diff --git a/util/util.js b/util/util.js index 09f3499..523d908 100644 --- a/util/util.js +++ b/util/util.js @@ -3,7 +3,7 @@ const path = require('path'); const { readFileSync, writeFileSync, existsSync } = require('fs'); const date = require('date-and-time'); -const tagOrder = JSON.parse(readFileSync(path.resolve(__dirname, '../data/language/tag_order.json'))); +const tagOrder = JSON.parse(readFileSync(path.resolve(__dirname, '../data/language/tag_order.json'), 'utf-8')); const tagOrderAll = []; @@ -111,7 +111,6 @@ function mergePersonTags(targetIso, tags) { } else return tags; } - function writeInBatches(tempPath, inputArray, filenamePrefix, batchSize = 100000, bankIndex = 0) { consoleOverwrite(`Writing ${inputArray.length.toLocaleString()} entries of ${filenamePrefix}...`); From d3fa1d8ea7b873ed5729c0102d9c689ea6daf70a Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Tue, 15 Oct 2024 15:51:22 +0200 Subject: [PATCH 11/13] wip --- 4-make-yomitan.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/4-make-yomitan.js b/4-make-yomitan.js index b8e6c52..319dc06 100644 --- a/4-make-yomitan.js +++ b/4-make-yomitan.js @@ -84,8 +84,8 @@ function findTag(tags, tag) { } /** - * @param {*} tag - * @returns + * @param {string} tag + * @returns {null|import('types').TagBank.TagInformation} */ function findModifiedTag(tag){ let modifiedTag = null; @@ -506,7 +506,7 @@ function getTagStyles(folder){ } /** - * @param {*} folder + * @param {'dict'|'ipa'} folder */ function writeIndex(folder) { const title = `${DICT_NAME}-${source_iso}-${target_iso}` + (folder === 'dict' ? '' : '-ipa'); @@ -520,10 +520,10 @@ function writeIndex(folder) { } /** - * @param {*} lemmaTags - * @param {*} senseTags - * @param {*} parenthesesTags - * @param {*} pos + * @param {string[]} lemmaTags + * @param {string[]} senseTags + * @param {string[]} parenthesesTags + * @param {string} pos * @returns */ function processTags(lemmaTags, senseTags, parenthesesTags, pos) { From 88f25638f44bfd246768180458128d5772fcb645 Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 16 Oct 2024 00:12:00 +0200 Subject: [PATCH 12/13] wip --- .gitignore | 1 + 3-tidy-up.js | 54 +- 4-make-yomitan.js | 18 +- data/styles.css | 20 + data/test/dict/cs/en/term_bank_1.json | 167 +++- data/test/dict/de/en/tag_bank_1.json | 14 +- data/test/dict/de/en/term_bank_1.json | 400 ++++++++- data/test/dict/en/de/term_bank_1.json | 36 +- data/test/dict/en/en/term_bank_1.json | 216 ++++- data/test/dict/es/en/term_bank_1.json | 102 ++- data/test/dict/fa/en/term_bank_1.json | 74 +- data/test/dict/fr/en/term_bank_1.json | 1098 +++++++++++++++++++++++- data/test/dict/fr/fr/term_bank_1.json | 324 ++++++- data/test/dict/ja/en/tag_bank_1.json | 7 - data/test/dict/ja/en/term_bank_1.json | 470 ++++++++++- data/test/dict/la/en/term_bank_1.json | 649 +++++++++++++- data/test/tidy/cs-en-lemmas.json | 167 +++- data/test/tidy/de-en-lemmas.json | 389 ++++++++- data/test/tidy/en-de-lemmas.json | 36 +- data/test/tidy/en-en-lemmas.json | 216 ++++- data/test/tidy/es-en-lemmas.json | 102 ++- data/test/tidy/fa-en-lemmas.json | 74 +- data/test/tidy/fr-en-lemmas.json | 1128 +++++++++++++++++++++++-- data/test/tidy/fr-fr-lemmas.json | 324 ++++++- data/test/tidy/ja-en-lemmas.json | 470 ++++++++++- data/test/tidy/la-en-lemmas.json | 649 +++++++++++++- package-lock.json | 14 +- package.json | 2 +- types.ts | 2 +- 29 files changed, 6984 insertions(+), 239 deletions(-) create mode 100644 data/styles.css diff --git a/.gitignore b/.gitignore index e177713..a19c938 100755 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ *.zip data/**/*.css +!data/styles.css *.txt !instructions.txt diff --git a/3-tidy-up.js b/3-tidy-up.js index 308fd8a..58ed4cb 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -56,9 +56,10 @@ function isInflectionGloss(glosses, formOf) { /** * @param {GlossTwig} glossTwig * @param {number} level - * @returns {*} + * @returns {import('types').TermBank.StructuredContent[]} */ function handleLevel(glossTwig, level) { + /** @type {import('types').TermBank.StructuredContent[]} */ const nestDefs = []; let defIndex = 0; @@ -70,6 +71,7 @@ function handleLevel(glossTwig, level) { const childDefs = handleLevel(children, nextLevel); const listType = level === 1 ? "li" : "number"; + /** @type {import('types').TermBank.StructuredContent} */ const content = level === 1 ? def : [{ "tag": "span", "data": { "listType": "number" }, "content": `${defIndex}. ` }, def]; nestDefs.push([ @@ -238,7 +240,18 @@ function handleLine(parsedLine) { /** @type {SenseInfo} */ const currSense = { glosses: [], tags, examples }; if(branches.size === 0) { - currSense.glosses.push(gloss); + if(examples.length > 0) { + currSense.glosses.push({ + "type": "structured-content", + "content": [ + gloss, + getStructuredExamples(examples) + ] + }); + } else { + currSense.glosses.push(gloss); + } + } else { /** @type {GlossBranch} */ const syntheticBranch = new Map(); @@ -252,6 +265,41 @@ function handleLine(parsedLine) { } } +/** + * @param {Example[]} examples + * @returns {import('types').TermBank.StructuredContent[]} + */ +function getStructuredExamples(examples) { + return examples.map(({text, english}) => { + return { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag":"div", + "data": { + "content": "example-sentence" + }, + "content":[{ + "tag": "div", + "data": { + "content": "example-sentence-a", + }, + "content": text + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": english + } + ]} + } + }); +} + /** * @param {TidySense[]} sensesWithoutInflectionGlosses * @returns {GlossTree} @@ -262,7 +310,7 @@ function getGlossTree(sensesWithoutInflectionGlosses) { const { glossesArray, tags } = sense; let { examples = [] } = sense; examples = examples - .filter(({type}) => type !== 'quotation') + .filter(({type}) => !["quotation", "quote"].includes(type || '')) .map(({text, english}) => ({text, english})) let temp = glossTree; diff --git a/4-make-yomitan.js b/4-make-yomitan.js index 319dc06..18014dd 100644 --- a/4-make-yomitan.js +++ b/4-make-yomitan.js @@ -31,6 +31,8 @@ if (!existsSync(`data/language/${source_iso}/${target_iso}`)) { mkdirSync(`data/language/${source_iso}/${target_iso}`, {recursive: true}); } +const termDictStyles = readFileSync('data/styles.css', 'utf8'); + /** @type {WhitelistedTag[]} */ const targetLanguageTermTags = loadJsonArray(`data/language/target-language-tags/${target_iso}/tag_bank_term.json`); /** @type {WhitelistedTag[]} */ @@ -182,7 +184,7 @@ let lastTermBankIndex = 0; for (const sense of senses) { - const {glosses, tags} = sense; + const {glosses, tags, examples} = sense; const senseTags = [...lemmaTags, ...tags] glosses.forEach((gloss) => { @@ -284,9 +286,7 @@ let lastTermBankIndex = 0; writeIndex('dict'); writeTags('dict'); const dictTagStyles = getTagStyles('dict'); - if(dictTagStyles){ - writeStyles('dict', dictTagStyles); - } + writeStyles('dict', dictTagStyles); lastTermBankIndex = writeBanks('dict', ymtLemmas, lastTermBankIndex); writeIndex('ipa'); writeTags('ipa'); @@ -484,10 +484,14 @@ function writeTags(folder) { /** * @param {'dict'|'ipa'} folder - * @param {string} tagStyles + * @param {string} styles */ -function writeStyles(folder, tagStyles){ - writeFileSync(`${writeFolder}/${folder}/styles.css`, tagStyles); +function writeStyles(folder, styles){ + if(folder === 'dict') { + styles = styles + '\n' + termDictStyles; + } + if(!styles) return; + writeFileSync(`${writeFolder}/${folder}/styles.css`, styles); } /** diff --git a/data/styles.css b/data/styles.css new file mode 100644 index 0000000..6c080b7 --- /dev/null +++ b/data/styles.css @@ -0,0 +1,20 @@ +div[data-sc-content="extra-info"] { + margin-left: 0.5em; +} +div[data-sc-content="example-sentence"] { + background-color: color-mix(in srgb, var(--text-color, var(--fg, #333)) 5%, transparent); + border-color: var(--text-color, var(--fg, #333)); + border-style: none none none solid; + border-radius: 0.4rem; + border-width: calc(3em / var(--font-size-no-units, 14)); + margin-top: 0.5rem; + margin-bottom: 0.5rem; + padding: 0.1rem 0.5rem; +} +div[data-sc-content="example-sentence-a"] { + font-size: 1.1em; + font-style: italic; +} +div[data-sc-content="example-sentence-b"] { + font-size: 0.8em; +} \ No newline at end of file diff --git a/data/test/dict/cs/en/term_bank_1.json b/data/test/dict/cs/en/term_bank_1.json index 3bba80f..0f81e90 100644 --- a/data/test/dict/cs/en/term_bank_1.json +++ b/data/test/dict/cs/en/term_bank_1.json @@ -6,8 +6,134 @@ "n", 0, [ - "message", - "report" + { + "type": "structured-content", + "content": [ + "message", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "textová zpráva" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "text message" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Chcete nechat zprávu?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Would you like to leave a message?" + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "report", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "lékařská zpráva" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "medical report" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "podat zprávu" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to file a report" + } + ] + } + } + ] + ] + } ], 0, "" @@ -19,7 +145,42 @@ "prep", 0, [ - "for" + { + "type": "structured-content", + "content": [ + "for", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Zabili ho pro peníze." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "They killed him for his money." + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/de/en/tag_bank_1.json b/data/test/dict/de/en/tag_bank_1.json index 721d192..4204bc4 100644 --- a/data/test/dict/de/en/tag_bank_1.json +++ b/data/test/dict/de/en/tag_bank_1.json @@ -62,13 +62,6 @@ "military", 0 ], - [ - "cards", - "", - 0, - "card games", - 0 - ], [ "arch", "archaism", @@ -97,6 +90,13 @@ "rare", -1 ], + [ + "cards", + "", + 0, + "card games", + 0 + ], [ "prep", "partOfSpeech", diff --git a/data/test/dict/de/en/term_bank_1.json b/data/test/dict/de/en/term_bank_1.json index 78d4cd3..e28d685 100644 --- a/data/test/dict/de/en/term_bank_1.json +++ b/data/test/dict/de/en/term_bank_1.json @@ -235,8 +235,79 @@ "n", 0, [ - "fox (animal)", + { + "type": "structured-content", + "content": [ + "fox (animal)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Fuchs, du hast die Gans gestohlen. Gib sie wieder her!" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "(line from a popular children’s song)" + } + ] + } + } + ] + ] + }, "pledge (prospective member of a fraternity)", + { + "type": "structured-content", + "content": [ + "(card games) In Doppelkopf, the ace of diamonds, which earns a side of players an extra point if they win it from the other side", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ich hatte nur vier Trümpfe und darunter beide Füchse." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I had only four trumps and among them were both aces of diamonds." + } + ] + } + } + ] + ] + }, "a fox in radiosport foxhunt" ], 0, @@ -249,8 +320,78 @@ "n", 0, [ - "a clever or cunning person", - "a red-haired person or horse." + { + "type": "structured-content", + "content": [ + "(informal) a clever or cunning person", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Er ist ein ganz schöner Fuchs." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He is a really handsome fox." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(informal) a red-haired person or horse.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Unser Paul ist ja ein kleiner Fuchs." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Our Paul is a little redhead." + } + ] + } + } + ] + ] + } ], 0, "" @@ -267,18 +408,6 @@ 0, "" ], - [ - "Fuchs", - "", - "n masc strong cards", - "n", - 0, - [ - "In Doppelkopf, the ace of diamonds, which earns a side of players an extra point if they win it from the other side" - ], - 0, - "" - ], [ "Fuchs", "", @@ -359,11 +488,242 @@ "prep", 0, [ - "from", - "of, belonging to (often replacing genitive; see usage note below)", - "by (with passive voice)", - "about, of (a topic)", - "on, with (a resource)" + { + "type": "structured-content", + "content": [ + "from", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ich fahre von Köln nach Hamburg." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I'm travelling from Cologne to Hamburg." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ich hab’s von meiner Schwester gehört." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I heard it from my sister." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "of, belonging to (often replacing genitive; see usage note below)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "das Auto meines Vaters = das Auto von meinem Vater" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "my father’s car / the car of my father" + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "by (with passive voice)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Das Hotel wird von der Firma bezahlt." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The hotel is paid for by the company." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "about, of (a topic)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Er hat von seiner Jugend erzählt." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He told about his youth." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "on, with (a resource)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Von welchem Geld soll ich als Arbeitsloser in Urlaub fahren?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Being unemployed, on what money should I go on holidays?" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Man kann nicht nur von Luft und Liebe leben." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "You can’t live on air and love alone. (proverb)" + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/en/de/term_bank_1.json b/data/test/dict/en/de/term_bank_1.json index ec78976..185c7f5 100644 --- a/data/test/dict/en/de/term_bank_1.json +++ b/data/test/dict/en/de/term_bank_1.json @@ -6,7 +6,41 @@ "verb", 0, [ - "[1] aussuchen, auswählen, vorziehen, wählen" + { + "type": "structured-content", + "content": [ + "[1] aussuchen, auswählen, vorziehen, wählen", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "\"Connolly was chosen for the case by prosecutors because the sheriff and Bristol District Attorney C. Samuel Sutter are “close professional and personal friends,” said Sutter spokesman Gregg Miliote.\"" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/en/en/term_bank_1.json b/data/test/dict/en/en/term_bank_1.json index d452b34..163253b 100644 --- a/data/test/dict/en/en/term_bank_1.json +++ b/data/test/dict/en/en/term_bank_1.json @@ -6,7 +6,41 @@ "v", 0, [ - "To transport toward somebody/somewhere." + { + "type": "structured-content", + "content": [ + "(transitive, ditransitive) To transport toward somebody/somewhere.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Waiter, please bring me a single malt whiskey." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" @@ -18,7 +52,41 @@ "v", 0, [ - "To supply or contribute." + { + "type": "structured-content", + "content": [ + "(transitive, figuratively) To supply or contribute.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "The new company director brought a fresh perspective on sales and marketing." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" @@ -30,7 +98,41 @@ "v", 0, [ - "To occasion or bring about.", + { + "type": "structured-content", + "content": [ + "(transitive) To occasion or bring about.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "The controversial TV broadcast brought a storm of complaints." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + }, "To raise (a lawsuit, charges, etc.) against somebody." ], 0, @@ -44,8 +146,76 @@ 0, [ "To persuade; to induce; to draw; to lead; to guide.", - "To produce in exchange; to sell for; to fetch.", - "(baseball) To pitch, often referring to a particularly hard thrown fastball." + { + "type": "structured-content", + "content": [ + "To produce in exchange; to sell for; to fetch.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "What does coal bring per ton?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(baseball) To pitch, often referring to a particularly hard thrown fastball.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "The closer Jones can really bring it." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" @@ -57,7 +227,41 @@ "n", 0, [ - "A wagon; a four-wheeled cart for hauling loads, usually pulled by horses or oxen." + { + "type": "structured-content", + "content": [ + "(archaic or literary) A wagon; a four-wheeled cart for hauling loads, usually pulled by horses or oxen.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "\"The Hay Wain\" is a famous painting by John Constable." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/es/en/term_bank_1.json b/data/test/dict/es/en/term_bank_1.json index e6514c8..4cccb1d 100644 --- a/data/test/dict/es/en/term_bank_1.json +++ b/data/test/dict/es/en/term_bank_1.json @@ -7,8 +7,106 @@ 0, [ "to live; to be alive", - "to make a living, to live on", - "to live in, reside, inhabit" + { + "type": "structured-content", + "content": [ + "(intransitive) to make a living, to live on", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Vive de migas, nada más." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He lives on crumbs, nothing more." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(intransitive) to live in, reside, inhabit", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Vive en la casa roja." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "She lives in the red house." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "La pobrecita vive con dos hermanas crueles." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The poor girl lives with two cruel sisters." + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/fa/en/term_bank_1.json b/data/test/dict/fa/en/term_bank_1.json index 9e1f608..6accc6f 100644 --- a/data/test/dict/fa/en/term_bank_1.json +++ b/data/test/dict/fa/en/term_bank_1.json @@ -18,9 +18,79 @@ "n", 0, [ - "Persian (the language of modern Iran, Afghanistan and Tajikistan, and widely spoken in Uzbekistan).", + { + "type": "structured-content", + "content": [ + "Persian (the language of modern Iran, Afghanistan and Tajikistan, and widely spoken in Uzbekistan).", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "بَرادَرِ شُوْهَرِش فارْسی بَلَدِه." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Her husband's brother knows Persian." + } + ] + } + } + ] + ] + }, "Persian (the language of Ancient Persia).", - "Persian, main ethnic group of Iran." + { + "type": "structured-content", + "content": [ + "Persian, main ethnic group of Iran.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "فارْسی هَسْتیم." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "We are Persian." + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/fr/en/term_bank_1.json b/data/test/dict/fr/en/term_bank_1.json index 9b77185..457ac10 100644 --- a/data/test/dict/fr/en/term_bank_1.json +++ b/data/test/dict/fr/en/term_bank_1.json @@ -6,11 +6,214 @@ "v", 0, [ - "to take", - "to eat; to drink", - "to get; to buy", - "to rob; to deprive", - "to make", + { + "type": "structured-content", + "content": [ + "(transitive) to take", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prends ma main" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "take my hand" + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(transitive) to eat; to drink", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "elle prend un café" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "she is drinking a coffee" + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(transitive) to get; to buy", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Je vais prendre le plat du jour." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I'll get the dish of the day." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(transitive) to rob; to deprive", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre quelque chose à quelqu’un" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take something from someone" + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(transitive) to make", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre une décision" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to make a decision" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre des mesures draconiennes" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take draconian measures" + } + ] + } + } + ] + ] + }, { "type": "structured-content", "content": [ @@ -61,7 +264,126 @@ "v", 0, [ - "to catch, to work, to start" + { + "type": "structured-content", + "content": [ + "(intransitive) to catch, to work, to start", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "le feu ne prend pas" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "the fire won't start" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "la sauce ne prend pas" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "the sauce isn't thickening" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ma mayonnaise ne prend pas" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "my mayonnaise isn't setting" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ça ne prend pas avec moi" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "that won't wash with me" + } + ] + } + } + ] + ] + } ], 0, "" @@ -73,32 +395,70 @@ "v", 0, [ - "to get (something) caught (in), to jam" - ], - 0, - "" - ], - [ - "prendre", - "", - "v", - "v", - 0, - [ - "(followed by a partitive, in various idiomatic expressions) to gain" - ], - 0, - "" - ], - [ - "prendre", - "", - "v col impers", - "v", - 0, - [ - "(colloquial; impersonal) to take (a certain amount of time)", - "(colloquial; impersonal; by extension) to take (a certain number or amount of)" + { + "type": "structured-content", + "content": [ + "(reflexive) to get (something) caught (in), to jam", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "je me suis pris la main dans la porte" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I caught my hand in the door" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "je me suis pris la porte dans la figure" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "the door hit me in the face" + } + ] + } + } + ] + ] + } ], 0, "" @@ -106,23 +466,671 @@ [ "prendre", "", - "v impers", "v", - 0, - [ - "to come over (to arise in and gain some control over one's thoughts and/or actions)" - ], - 0, - "" - ], - [ - "sembler", - "", - "v impers vi", "v", 0, [ - "to seem, to resemble" + { + "type": "structured-content", + "content": [ + "(followed by a partitive, in various idiomatic expressions) to gain", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la vitesse" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain speed" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du galon" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain a promotion" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’avance" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain ground" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du retard" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to fall behind schedule, to run late, to drop behind" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la hauteur" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain some perspective" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du recul" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take a step back" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la bouteille" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain experience" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "en prendre de la graine" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take away a lesson" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du poids" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain weight" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la masse" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to build muscle" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la brioche, prendre du bide, prendre du ventre" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to get a paunch" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du bouchon" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to fail" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’élan" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain momentum" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’âge" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to get older" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la valeur" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain value" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’importance" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to become important" + } + ] + } + } + ] + ] + } + ], + 0, + "" + ], + [ + "prendre", + "", + "v col impers", + "v", + 0, + [ + { + "type": "structured-content", + "content": [ + "(colloquial; impersonal) to take (a certain amount of time)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ça va me prendre au moins deux heures pour le mettre à jour." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "It's going to take me at least two hours to update it." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(colloquial; impersonal; by extension) to take (a certain number or amount of)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Pour finir dans deux heures, ça prend trois personnes." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "To finish in two hours, it'll take three people." + } + ] + } + } + ] + ] + } + ], + 0, + "" + ], + [ + "prendre", + "", + "v impers", + "v", + 0, + [ + { + "type": "structured-content", + "content": [ + "(impersonal) to come over (to arise in and gain some control over one's thoughts and/or actions)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "il prend [quelque chose] à [quelqu’un]" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "[something] comes over [someone]" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Il lui prend une fantaisie de mettre le feu à la maison." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "A fancy comes over him to set fire to the house." + } + ] + } + } + ] + ] + } + ], + 0, + "" + ], + [ + "sembler", + "", + "v impers vi", + "v", + 0, + [ + { + "type": "structured-content", + "content": [ + "(intransitive, impersonal) to seem, to resemble", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Il semblerait qu’il y ait des différences significatives entre les deux groupes." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "There would seem to be significant differences between the two groups." + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/fr/fr/term_bank_1.json b/data/test/dict/fr/fr/term_bank_1.json index 004b4c6..aaf02f0 100644 --- a/data/test/dict/fr/fr/term_bank_1.json +++ b/data/test/dict/fr/fr/term_bank_1.json @@ -6,8 +6,211 @@ "noun", 0, [ - "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou.", - "Métamorphose, transformation d’un objet ou d’un individu qui en a déjà subi plusieurs." + { + "type": "structured-content", + "content": [ + "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Chenguza est beau comme le grand Avatar, (descendant de Bram) et quoique élevé pour le sacerdoce dans les austérités du cloître, il a l'imagination vive, le désir de s'instruire, et un penchant décidé pour la volupté." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Retiré dans les forêts, placé à la tête de nombreux pasteurs , qui ont dû se plaire à grossir sa renommée, et dont les bras l'ont aidé à renverser le tyran, Crichna évidemment dut sortir des Djangles, où il s'était tenu jusque-là dans le mystère , comme un homme environné d'une sorte d'auréole merveilleuse, qui le fit considérer de bonne heure comme un Avatar (incarnation) de la Divinité, caractère religieux et providentiel que l'Inde a toujours accordé à ses grands hommes" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "Métamorphose, transformation d’un objet ou d’un individu qui en a déjà subi plusieurs.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Que d’avatars dans la vie politique de cet homme d’État !" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Batman est l’avatar moderne de Zorro." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "L’espéranto est l’avatar moderne du sanskrit." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" @@ -19,7 +222,122 @@ "noun", 0, [ - "Mésaventure, malheur." + { + "type": "structured-content", + "content": [ + "Mésaventure, malheur.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Mais la guerre n’en finissait plus et c’est alors que je devins, après bien des démarches, de boulanger, élève-pilote et par la suite pilote aviateur, à ma très grande surprise et sans trop d’avatars." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "N'avions-nous pas aidé à la constituer, en courant bien des risques et au prix de quelles avatars ! Alors, lui mort, il ne savait plus qu'en faire…." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Le service social du travail – Avatars d’une fonction, vicissitudes d’un métier" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Compte tenu des divers avatars rencontrés ces dernières années (rouille du mélèze, puceron lanigère, tempêtes…), il n'est plus possible de s’intéresser à la seule productivité, […]." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/ja/en/tag_bank_1.json b/data/test/dict/ja/en/tag_bank_1.json index 5f2c6d3..434986b 100644 --- a/data/test/dict/ja/en/tag_bank_1.json +++ b/data/test/dict/ja/en/tag_bank_1.json @@ -20,13 +20,6 @@ "figuratively", 0 ], - [ - "fig", - "", - 0, - "figurative", - 0 - ], [ "abbv", "", diff --git a/data/test/dict/ja/en/term_bank_1.json b/data/test/dict/ja/en/term_bank_1.json index bacb81e..60271e5 100644 --- a/data/test/dict/ja/en/term_bank_1.json +++ b/data/test/dict/ja/en/term_bank_1.json @@ -18,7 +18,125 @@ "adj", 0, [ - "liked, favorite" + { + "type": "structured-content", + "content": [ + "liked, favorite", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "好きな食べ物は? アイスクリームです。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "What's your favorite food? - It's ice cream." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "君が好きだからこそこれほど頑張っているんだよ。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "It's precisely because I like you [because of my fondness for you] that I'm working this hard." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "好きです。\nSuki desu.\n“I like you.” → idiomatically used to express: “I love you.”" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "どうとも好きにしなさい。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Feel free to do as you like." + } + ] + } + } + ] + ] + } ], 0, "" @@ -30,7 +148,42 @@ "n", 0, [ - "a raccoon dog, Nyctereutes procyonoides" + { + "type": "structured-content", + "content": [ + "a raccoon dog, Nyctereutes procyonoides", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "アライグマなら尻尾にシマがある。どう見でもタヌキだ。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "If you're a raccoon, you'd have stripes on your tail. No matter how you look at it, you're a raccoon dog." + } + ] + } + } + ] + ] + } ], 0, "" @@ -42,7 +195,42 @@ "n", 0, [ - "a person who pretends to be good but in fact is cunning (compare English sly fox)" + { + "type": "structured-content", + "content": [ + "(figurative) a person who pretends to be good but in fact is cunning (compare English sly fox)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "やいやい、其処な狸め" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Hey there, you sly dog!" + } + ] + } + } + ] + ] + } ], 0, "" @@ -66,7 +254,41 @@ "n", 0, [ - "Short for 狸寝入り (tanuki neiri): pretending to be asleep" + { + "type": "structured-content", + "content": [ + "(rare) Short for 狸寝入り (tanuki neiri): pretending to be asleep", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "狸を決め込む ― tanuki o kimekomu ― pretend to be a raccoon dog → feign sleep" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" @@ -145,13 +367,208 @@ } ] }, - "to move smoothly; to slide", + { + "type": "structured-content", + "content": [ + "to move smoothly; to slide", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "刀が鞘から走る。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The sword slides out of its sheath." + } + ] + } + } + ] + ] + }, "to run away, escape", "to rush, hurry around", - "to give over oneself to; to commit oneself to (usually something bad)", + { + "type": "structured-content", + "content": [ + "to give over oneself to; to commit oneself to (usually something bad)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "彼は敵に走った。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He defected to the enemy." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "立場を忘れて感情に走ってはいけない。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Don't forget your stance and give in to emotions." + } + ] + } + } + ] + ] + }, "to spread out, scatter, splatter, spout", - "to lead or extend in a certain direction", - "to appear briefly; to flash", + { + "type": "structured-content", + "content": [ + "to lead or extend in a certain direction", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "山脈が南北に走る。\nSanmyaku ga nanboku ni hashiru.\nThe mountain range runs north–south." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "to appear briefly; to flash", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "稲妻が走る" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "lightning flashes by" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "背中に痛みが走った。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I felt a brief pain in my back." + } + ] + } + } + ] + ] + }, "(used with 胸(むね)が (mune ga)) to feel palpitations; to have a sense of unease" ], 0, @@ -164,7 +581,42 @@ "v", 0, [ - "to run through some kind of place" + { + "type": "structured-content", + "content": [ + "(transitive) to run through some kind of place", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "彼はこの道をよく走る。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He often runs down this street." + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/dict/la/en/term_bank_1.json b/data/test/dict/la/en/term_bank_1.json index 294da2f..69b7f0b 100644 --- a/data/test/dict/la/en/term_bank_1.json +++ b/data/test/dict/la/en/term_bank_1.json @@ -7,9 +7,197 @@ 0, [ "fame", - "rumour, talk, opinion, report", - "reputation", - "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”" + { + "type": "structured-content", + "content": [ + "rumour, talk, opinion, report", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "hascine propter rēs maledicās fāmās ferunt." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Is it on account of these things that they spread slanderous reports?" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "“Oenōtrī coluēre virī; nunc fāma minōrēs" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "“Oenotrian men tilled [the land]; now rumor [has it that their] descendants call the nation ‘Italy’ after the name of its leader, [Italus].”" + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "reputation", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Dīmīcantī dē fāmā dēesse." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "To abandon one whose reputation is attacked." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Fāma tamen clāra est; et adhūc sine crīmine vīxī." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "My good name is nevertheless unstained; and so far I have lived without blame." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Multī fāmam, conscientiam paucī verentur." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Many fear their reputation, few their conscience." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Extemplō Libyae magnās it Fāma per urbēs —\nFāma, malum quā nōn aliud vēlōcius ūllum.\nStraightaway Rumor runs through the great cities of Libya – Rumor, than whom [there is] not any other evil more swift." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], 0, "" @@ -22,11 +210,165 @@ 0, [ "to choose, select", - "to appoint", + { + "type": "structured-content", + "content": [ + "to appoint", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "[…] dum ne quem militem legeret ex eo numero quibus senatus missionem reditumque in patriam negasset ante belli finem." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "[…] provided he did not choose any soldier from those to whom the Senate had refused discharge and a return home before the end of the war" + } + ] + } + } + ] + ] + }, "to collect, gather, bring together", "to take out, pick out, extract, remove", "to take to one's self unjustly, carry off, steal, purloin, plunder, abstract", - "to read" + { + "type": "structured-content", + "content": [ + "to read", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Librōs lege." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Read books." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Lēgistīne hunc librum?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Have you read this book?" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Lingua Graeca est; nōn potest legī." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "It's Greek; it cannot be read." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Exigis, ut nostrōs dōnem tibi, Tucca, libellōs.\nNōn faciam: nam vīs vēndere, nōn legere.\nYou demand that I give our [⇒ my] little books to you, Tucca." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I will not do [it]: for you want to sell [them], not to read [them]." + } + ] + } + } + ] + ] + } ], 0, "" @@ -64,7 +406,42 @@ [ "all the way", "until, up to (sometimes with \"ad\")", - "constantly, continuously" + { + "type": "structured-content", + "content": [ + "constantly, continuously", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ab ōvō ū̆sque ad māla" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "from the beginning to the end\n(literally, “from the egg to the apples”)" + } + ] + } + } + ] + ] + } ], 0, "" @@ -76,9 +453,79 @@ "v", 0, [ - "led straight along, drawn in a straight line, straight, upright.", + { + "type": "structured-content", + "content": [ + "led straight along, drawn in a straight line, straight, upright.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Quae rectis lineis suos ordines servant" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Which preserve their order in straight lines" + } + ] + } + } + ] + ] + }, "(in general) right, correct, proper, appropriate, befitting.", - "(in particular) morally right, correct, lawful, just, virtuous, noble, good, proper, honest." + { + "type": "structured-content", + "content": [ + "(in particular) morally right, correct, lawful, just, virtuous, noble, good, proper, honest.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Via stultī rēcta in oculīs eius; quī autem sapiēns est audit cōnsilia." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The way of a fool is right in his own eyes: but he that is wise hearkeneth unto counsels. (Douay-Rheims trans., Challoner rev.: 1752 CE)" + } + ] + } + } + ] + ] + } ], 0, "" @@ -235,8 +682,190 @@ "n", 0, [ - "one's own possessions or resources", - "(in locative case in phrases) peace" + { + "type": "structured-content", + "content": [ + "(idiomatic) one's own possessions or resources", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "domum trahere" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to drag into one's pocket" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Domī versūra fit." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "One is one's own creditor. (proverb)" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "domō afferre" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to conceive on one's own" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "id nunc experior domō" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Now I'm learning this first-hand." + } + ] + } + } + ] + ] + }, + { + "type": "structured-content", + "content": [ + "(in locative case in phrases, idiomatic) peace", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "bellī domīque; bellō domīque; vel bellī vel domī; domī bellōque; domī mīlitiaeque" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "in war and peace" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ut non quietior populus domi esset quam militiae" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "so that the people should not become lazier in the time of peace than that of war" + } + ] + } + } + ] + ] + } ], 0, "" diff --git a/data/test/tidy/cs-en-lemmas.json b/data/test/tidy/cs-en-lemmas.json index 34e2591..cc12ce4 100644 --- a/data/test/tidy/cs-en-lemmas.json +++ b/data/test/tidy/cs-en-lemmas.json @@ -11,7 +11,70 @@ "senses": [ { "glosses": [ - "message" + { + "type": "structured-content", + "content": [ + "message", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "textová zpráva" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "text message" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Chcete nechat zprávu?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Would you like to leave a message?" + } + ] + } + } + ] + ] + } ], "tags": [ "feminine" @@ -29,7 +92,70 @@ }, { "glosses": [ - "report" + { + "type": "structured-content", + "content": [ + "report", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "lékařská zpráva" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "medical report" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "podat zprávu" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to file a report" + } + ] + } + } + ] + ] + } ], "tags": [ "feminine" @@ -61,7 +187,42 @@ "senses": [ { "glosses": [ - "for" + { + "type": "structured-content", + "content": [ + "for", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Zabili ho pro peníze." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "They killed him for his money." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ diff --git a/data/test/tidy/de-en-lemmas.json b/data/test/tidy/de-en-lemmas.json index 4e118b1..f628526 100644 --- a/data/test/tidy/de-en-lemmas.json +++ b/data/test/tidy/de-en-lemmas.json @@ -290,7 +290,42 @@ "senses": [ { "glosses": [ - "fox (animal)" + { + "type": "structured-content", + "content": [ + "fox (animal)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Fuchs, du hast die Gans gestohlen. Gib sie wieder her!" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "(line from a popular children’s song)" + } + ] + } + } + ] + ] + } ], "tags": [ "masculine", @@ -305,7 +340,42 @@ }, { "glosses": [ - "(informal) a clever or cunning person" + { + "type": "structured-content", + "content": [ + "(informal) a clever or cunning person", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Er ist ein ganz schöner Fuchs." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He is a really handsome fox." + } + ] + } + } + ] + ] + } ], "tags": [ "informal", @@ -321,7 +391,42 @@ }, { "glosses": [ - "(informal) a red-haired person or horse." + { + "type": "structured-content", + "content": [ + "(informal) a red-haired person or horse.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Unser Paul ist ja ein kleiner Fuchs." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Our Paul is a little redhead." + } + ] + } + } + ] + ] + } ], "tags": [ "informal", @@ -358,7 +463,42 @@ }, { "glosses": [ - "(card games) In Doppelkopf, the ace of diamonds, which earns a side of players an extra point if they win it from the other side" + { + "type": "structured-content", + "content": [ + "(card games) In Doppelkopf, the ace of diamonds, which earns a side of players an extra point if they win it from the other side", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ich hatte nur vier Trümpfe und darunter beide Füchse." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I had only four trumps and among them were both aces of diamonds." + } + ] + } + } + ] + ] + } ], "tags": [ "masculine", @@ -514,7 +654,70 @@ "senses": [ { "glosses": [ - "from" + { + "type": "structured-content", + "content": [ + "from", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ich fahre von Köln nach Hamburg." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I'm travelling from Cologne to Hamburg." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ich hab’s von meiner Schwester gehört." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I heard it from my sister." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -530,7 +733,42 @@ }, { "glosses": [ - "of, belonging to (often replacing genitive; see usage note below)" + { + "type": "structured-content", + "content": [ + "of, belonging to (often replacing genitive; see usage note below)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "das Auto meines Vaters = das Auto von meinem Vater" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "my father’s car / the car of my father" + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -542,7 +780,42 @@ }, { "glosses": [ - "by (with passive voice)" + { + "type": "structured-content", + "content": [ + "by (with passive voice)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Das Hotel wird von der Firma bezahlt." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The hotel is paid for by the company." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -554,7 +827,42 @@ }, { "glosses": [ - "about, of (a topic)" + { + "type": "structured-content", + "content": [ + "about, of (a topic)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Er hat von seiner Jugend erzählt." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He told about his youth." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -566,7 +874,70 @@ }, { "glosses": [ - "on, with (a resource)" + { + "type": "structured-content", + "content": [ + "on, with (a resource)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Von welchem Geld soll ich als Arbeitsloser in Urlaub fahren?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Being unemployed, on what money should I go on holidays?" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Man kann nicht nur von Luft und Liebe leben." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "You can’t live on air and love alone. (proverb)" + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ diff --git a/data/test/tidy/en-de-lemmas.json b/data/test/tidy/en-de-lemmas.json index 699112a..e76851d 100644 --- a/data/test/tidy/en-de-lemmas.json +++ b/data/test/tidy/en-de-lemmas.json @@ -11,7 +11,41 @@ "senses": [ { "glosses": [ - "[1] aussuchen, auswählen, vorziehen, wählen" + { + "type": "structured-content", + "content": [ + "[1] aussuchen, auswählen, vorziehen, wählen", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "\"Connolly was chosen for the case by prosecutors because the sheriff and Bristol District Attorney C. Samuel Sutter are “close professional and personal friends,” said Sutter spokesman Gregg Miliote.\"" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ diff --git a/data/test/tidy/en-en-lemmas.json b/data/test/tidy/en-en-lemmas.json index c1d452e..60001da 100644 --- a/data/test/tidy/en-en-lemmas.json +++ b/data/test/tidy/en-en-lemmas.json @@ -11,7 +11,41 @@ "senses": [ { "glosses": [ - "(transitive, ditransitive) To transport toward somebody/somewhere." + { + "type": "structured-content", + "content": [ + "(transitive, ditransitive) To transport toward somebody/somewhere.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Waiter, please bring me a single malt whiskey." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "ditransitive", @@ -25,7 +59,41 @@ }, { "glosses": [ - "(transitive, figuratively) To supply or contribute." + { + "type": "structured-content", + "content": [ + "(transitive, figuratively) To supply or contribute.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "The new company director brought a fresh perspective on sales and marketing." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "figuratively", @@ -39,7 +107,41 @@ }, { "glosses": [ - "(transitive) To occasion or bring about." + { + "type": "structured-content", + "content": [ + "(transitive) To occasion or bring about.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "The controversial TV broadcast brought a storm of complaints." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "transitive" @@ -68,7 +170,41 @@ }, { "glosses": [ - "To produce in exchange; to sell for; to fetch." + { + "type": "structured-content", + "content": [ + "To produce in exchange; to sell for; to fetch.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "What does coal bring per ton?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -79,7 +215,41 @@ }, { "glosses": [ - "(baseball) To pitch, often referring to a particularly hard thrown fastball." + { + "type": "structured-content", + "content": [ + "(baseball) To pitch, often referring to a particularly hard thrown fastball.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "The closer Jones can really bring it." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -104,7 +274,41 @@ "senses": [ { "glosses": [ - "(archaic or literary) A wagon; a four-wheeled cart for hauling loads, usually pulled by horses or oxen." + { + "type": "structured-content", + "content": [ + "(archaic or literary) A wagon; a four-wheeled cart for hauling loads, usually pulled by horses or oxen.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "\"The Hay Wain\" is a famous painting by John Constable." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "archaic", diff --git a/data/test/tidy/es-en-lemmas.json b/data/test/tidy/es-en-lemmas.json index 54f9066..9db675b 100644 --- a/data/test/tidy/es-en-lemmas.json +++ b/data/test/tidy/es-en-lemmas.json @@ -24,7 +24,42 @@ }, { "glosses": [ - "(intransitive) to make a living, to live on" + { + "type": "structured-content", + "content": [ + "(intransitive) to make a living, to live on", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Vive de migas, nada más." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He lives on crumbs, nothing more." + } + ] + } + } + ] + ] + } ], "tags": [ "intransitive" @@ -38,7 +73,70 @@ }, { "glosses": [ - "(intransitive) to live in, reside, inhabit" + { + "type": "structured-content", + "content": [ + "(intransitive) to live in, reside, inhabit", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Vive en la casa roja." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "She lives in the red house." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "La pobrecita vive con dos hermanas crueles." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The poor girl lives with two cruel sisters." + } + ] + } + } + ] + ] + } ], "tags": [ "intransitive" diff --git a/data/test/tidy/fa-en-lemmas.json b/data/test/tidy/fa-en-lemmas.json index 561d56d..2fb8f34 100644 --- a/data/test/tidy/fa-en-lemmas.json +++ b/data/test/tidy/fa-en-lemmas.json @@ -79,7 +79,42 @@ "senses": [ { "glosses": [ - "Persian (the language of modern Iran, Afghanistan and Tajikistan, and widely spoken in Uzbekistan)." + { + "type": "structured-content", + "content": [ + "Persian (the language of modern Iran, Afghanistan and Tajikistan, and widely spoken in Uzbekistan).", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "بَرادَرِ شُوْهَرِش فارْسی بَلَدِه." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Her husband's brother knows Persian." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -98,7 +133,42 @@ }, { "glosses": [ - "Persian, main ethnic group of Iran." + { + "type": "structured-content", + "content": [ + "Persian, main ethnic group of Iran.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "فارْسی هَسْتیم." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "We are Persian." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ diff --git a/data/test/tidy/fr-en-lemmas.json b/data/test/tidy/fr-en-lemmas.json index a1d7388..c9ef428 100644 --- a/data/test/tidy/fr-en-lemmas.json +++ b/data/test/tidy/fr-en-lemmas.json @@ -11,7 +11,42 @@ "senses": [ { "glosses": [ - "(transitive) to take" + { + "type": "structured-content", + "content": [ + "(transitive) to take", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prends ma main" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "take my hand" + } + ] + } + } + ] + ] + } ], "tags": [ "transitive" @@ -25,7 +60,42 @@ }, { "glosses": [ - "(transitive) to eat; to drink" + { + "type": "structured-content", + "content": [ + "(transitive) to eat; to drink", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "elle prend un café" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "she is drinking a coffee" + } + ] + } + } + ] + ] + } ], "tags": [ "transitive" @@ -39,7 +109,42 @@ }, { "glosses": [ - "(transitive) to get; to buy" + { + "type": "structured-content", + "content": [ + "(transitive) to get; to buy", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Je vais prendre le plat du jour." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I'll get the dish of the day." + } + ] + } + } + ] + ] + } ], "tags": [ "transitive" @@ -53,7 +158,42 @@ }, { "glosses": [ - "(transitive) to rob; to deprive" + { + "type": "structured-content", + "content": [ + "(transitive) to rob; to deprive", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre quelque chose à quelqu’un" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take something from someone" + } + ] + } + } + ] + ] + } ], "tags": [ "transitive" @@ -67,7 +207,70 @@ }, { "glosses": [ - "(transitive) to make" + { + "type": "structured-content", + "content": [ + "(transitive) to make", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre une décision" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to make a decision" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre des mesures draconiennes" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take draconian measures" + } + ] + } + } + ] + ] + } ], "tags": [ "transitive" @@ -85,7 +288,126 @@ }, { "glosses": [ - "(intransitive) to catch, to work, to start" + { + "type": "structured-content", + "content": [ + "(intransitive) to catch, to work, to start", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "le feu ne prend pas" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "the fire won't start" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "la sauce ne prend pas" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "the sauce isn't thickening" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ma mayonnaise ne prend pas" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "my mayonnaise isn't setting" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ça ne prend pas avec moi" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "that won't wash with me" + } + ] + } + } + ] + ] + } ], "tags": [ "intransitive" @@ -111,7 +433,70 @@ }, { "glosses": [ - "(reflexive) to get (something) caught (in), to jam" + { + "type": "structured-content", + "content": [ + "(reflexive) to get (something) caught (in), to jam", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "je me suis pris la main dans la porte" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I caught my hand in the door" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "je me suis pris la porte dans la figure" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "the door hit me in the face" + } + ] + } + } + ] + ] + } ], "tags": [ "reflexive" @@ -207,56 +592,511 @@ }, { "glosses": [ - "(followed by a partitive, in various idiomatic expressions) to gain" - ], - "tags": [ - "followed by a partitive", - "in various idiomatic expressions" - ], - "examples": [ - { - "text": "prendre de la vitesse", - "english": "to gain speed" - }, - { - "text": "prendre du galon", - "english": "to gain a promotion" - }, - { - "text": "prendre de l’avance", - "english": "to gain ground" - }, - { - "text": "prendre du retard", - "english": "to fall behind schedule, to run late, to drop behind" - }, - { - "text": "prendre de la hauteur", - "english": "to gain some perspective" - }, - { - "text": "prendre du recul", - "english": "to take a step back" - }, - { - "text": "prendre de la bouteille", - "english": "to gain experience" - }, - { - "text": "en prendre de la graine", - "english": "to take away a lesson" - }, - { - "text": "prendre du poids", - "english": "to gain weight" - }, - { - "text": "prendre de la masse", - "english": "to build muscle" - }, { - "text": "prendre de la brioche, prendre du bide, prendre du ventre", - "english": "to get a paunch" + "type": "structured-content", + "content": [ + "(followed by a partitive, in various idiomatic expressions) to gain", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la vitesse" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain speed" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du galon" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain a promotion" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’avance" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain ground" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du retard" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to fall behind schedule, to run late, to drop behind" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la hauteur" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain some perspective" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du recul" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take a step back" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la bouteille" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain experience" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "en prendre de la graine" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to take away a lesson" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du poids" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain weight" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la masse" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to build muscle" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la brioche, prendre du bide, prendre du ventre" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to get a paunch" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre du bouchon" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to fail" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’élan" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain momentum" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’âge" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to get older" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de la valeur" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to gain value" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "prendre de l’importance" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to become important" + } + ] + } + } + ] + ] + } + ], + "tags": [ + "followed by a partitive", + "in various idiomatic expressions" + ], + "examples": [ + { + "text": "prendre de la vitesse", + "english": "to gain speed" + }, + { + "text": "prendre du galon", + "english": "to gain a promotion" + }, + { + "text": "prendre de l’avance", + "english": "to gain ground" + }, + { + "text": "prendre du retard", + "english": "to fall behind schedule, to run late, to drop behind" + }, + { + "text": "prendre de la hauteur", + "english": "to gain some perspective" + }, + { + "text": "prendre du recul", + "english": "to take a step back" + }, + { + "text": "prendre de la bouteille", + "english": "to gain experience" + }, + { + "text": "en prendre de la graine", + "english": "to take away a lesson" + }, + { + "text": "prendre du poids", + "english": "to gain weight" + }, + { + "text": "prendre de la masse", + "english": "to build muscle" + }, + { + "text": "prendre de la brioche, prendre du bide, prendre du ventre", + "english": "to get a paunch" }, { "text": "prendre du bouchon", @@ -282,7 +1122,42 @@ }, { "glosses": [ - "(colloquial; impersonal) to take (a certain amount of time)" + { + "type": "structured-content", + "content": [ + "(colloquial; impersonal) to take (a certain amount of time)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Ça va me prendre au moins deux heures pour le mettre à jour." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "It's going to take me at least two hours to update it." + } + ] + } + } + ] + ] + } ], "tags": [ "colloquial", @@ -297,7 +1172,42 @@ }, { "glosses": [ - "(colloquial; impersonal; by extension) to take (a certain number or amount of)" + { + "type": "structured-content", + "content": [ + "(colloquial; impersonal; by extension) to take (a certain number or amount of)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Pour finir dans deux heures, ça prend trois personnes." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "To finish in two hours, it'll take three people." + } + ] + } + } + ] + ] + } ], "tags": [ "broadly", @@ -313,7 +1223,70 @@ }, { "glosses": [ - "(impersonal) to come over (to arise in and gain some control over one's thoughts and/or actions)" + { + "type": "structured-content", + "content": [ + "(impersonal) to come over (to arise in and gain some control over one's thoughts and/or actions)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "il prend [quelque chose] à [quelqu’un]" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "[something] comes over [someone]" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Il lui prend une fantaisie de mettre le feu à la maison." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "A fancy comes over him to set fire to the house." + } + ] + } + } + ] + ] + } ], "tags": [ "impersonal" @@ -345,7 +1318,42 @@ "senses": [ { "glosses": [ - "(intransitive, impersonal) to seem, to resemble" + { + "type": "structured-content", + "content": [ + "(intransitive, impersonal) to seem, to resemble", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Il semblerait qu’il y ait des différences significatives entre les deux groupes." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "There would seem to be significant differences between the two groups." + } + ] + } + } + ] + ] + } ], "tags": [ "impersonal", diff --git a/data/test/tidy/fr-fr-lemmas.json b/data/test/tidy/fr-fr-lemmas.json index 1f93451..3aa2aeb 100644 --- a/data/test/tidy/fr-fr-lemmas.json +++ b/data/test/tidy/fr-fr-lemmas.json @@ -11,7 +11,95 @@ "senses": [ { "glosses": [ - "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou." + { + "type": "structured-content", + "content": [ + "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Chenguza est beau comme le grand Avatar, (descendant de Bram) et quoique élevé pour le sacerdoce dans les austérités du cloître, il a l'imagination vive, le désir de s'instruire, et un penchant décidé pour la volupté." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Retiré dans les forêts, placé à la tête de nombreux pasteurs , qui ont dû se plaire à grossir sa renommée, et dont les bras l'ont aidé à renverser le tyran, Crichna évidemment dut sortir des Djangles, où il s'était tenu jusque-là dans le mystère , comme un homme environné d'une sorte d'auréole merveilleuse, qui le fit considérer de bonne heure comme un Avatar (incarnation) de la Divinité, caractère religieux et providentiel que l'Inde a toujours accordé à ses grands hommes" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "Hindouisme" @@ -30,7 +118,122 @@ }, { "glosses": [ - "Métamorphose, transformation d’un objet ou d’un individu qui en a déjà subi plusieurs." + { + "type": "structured-content", + "content": [ + "Métamorphose, transformation d’un objet ou d’un individu qui en a déjà subi plusieurs.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Que d’avatars dans la vie politique de cet homme d’État !" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Batman est l’avatar moderne de Zorro." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "L’espéranto est l’avatar moderne du sanskrit." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "figuratively" @@ -52,7 +255,122 @@ }, { "glosses": [ - "Mésaventure, malheur." + { + "type": "structured-content", + "content": [ + "Mésaventure, malheur.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Mais la guerre n’en finissait plus et c’est alors que je devins, après bien des démarches, de boulanger, élève-pilote et par la suite pilote aviateur, à ma très grande surprise et sans trop d’avatars." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "N'avions-nous pas aidé à la constituer, en courant bien des risques et au prix de quelles avatars ! Alors, lui mort, il ne savait plus qu'en faire…." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Le service social du travail – Avatars d’une fonction, vicissitudes d’un métier" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Compte tenu des divers avatars rencontrés ces dernières années (rouille du mélèze, puceron lanigère, tempêtes…), il n'est plus possible de s’intéresser à la seule productivité, […]." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "broadly", diff --git a/data/test/tidy/ja-en-lemmas.json b/data/test/tidy/ja-en-lemmas.json index 90a1696..5e5f466 100644 --- a/data/test/tidy/ja-en-lemmas.json +++ b/data/test/tidy/ja-en-lemmas.json @@ -40,7 +40,125 @@ "senses": [ { "glosses": [ - "liked, favorite" + { + "type": "structured-content", + "content": [ + "liked, favorite", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "好きな食べ物は? アイスクリームです。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "What's your favorite food? - It's ice cream." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "君が好きだからこそこれほど頑張っているんだよ。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "It's precisely because I like you [because of my fondness for you] that I'm working this hard." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "好きです。\nSuki desu.\n“I like you.” → idiomatically used to express: “I love you.”" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "どうとも好きにしなさい。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Feel free to do as you like." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -77,7 +195,42 @@ "senses": [ { "glosses": [ - "a raccoon dog, Nyctereutes procyonoides" + { + "type": "structured-content", + "content": [ + "a raccoon dog, Nyctereutes procyonoides", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "アライグマなら尻尾にシマがある。どう見でもタヌキだ。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "If you're a raccoon, you'd have stripes on your tail. No matter how you look at it, you're a raccoon dog." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -89,7 +242,42 @@ }, { "glosses": [ - "(figurative) a person who pretends to be good but in fact is cunning (compare English sly fox)" + { + "type": "structured-content", + "content": [ + "(figurative) a person who pretends to be good but in fact is cunning (compare English sly fox)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "やいやい、其処な狸め" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Hey there, you sly dog!" + } + ] + } + } + ] + ] + } ], "tags": [ "figuratively" @@ -113,7 +301,41 @@ }, { "glosses": [ - "(rare) Short for 狸寝入り (tanuki neiri): pretending to be asleep" + { + "type": "structured-content", + "content": [ + "(rare) Short for 狸寝入り (tanuki neiri): pretending to be asleep", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "狸を決め込む ― tanuki o kimekomu ― pretend to be a raccoon dog → feign sleep" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "abbreviation", @@ -220,7 +442,42 @@ }, { "glosses": [ - "(transitive) to run through some kind of place" + { + "type": "structured-content", + "content": [ + "(transitive) to run through some kind of place", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "彼はこの道をよく走る。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He often runs down this street." + } + ] + } + } + ] + ] + } ], "tags": [ "transitive" @@ -234,7 +491,42 @@ }, { "glosses": [ - "to move smoothly; to slide" + { + "type": "structured-content", + "content": [ + "to move smoothly; to slide", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "刀が鞘から走る。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The sword slides out of its sheath." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -260,7 +552,70 @@ }, { "glosses": [ - "to give over oneself to; to commit oneself to (usually something bad)" + { + "type": "structured-content", + "content": [ + "to give over oneself to; to commit oneself to (usually something bad)", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "彼は敵に走った。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "He defected to the enemy." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "立場を忘れて感情に走ってはいけない。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Don't forget your stance and give in to emotions." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -283,7 +638,41 @@ }, { "glosses": [ - "to lead or extend in a certain direction" + { + "type": "structured-content", + "content": [ + "to lead or extend in a certain direction", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "山脈が南北に走る。\nSanmyaku ga nanboku ni hashiru.\nThe mountain range runs north–south." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ @@ -294,7 +683,70 @@ }, { "glosses": [ - "to appear briefly; to flash" + { + "type": "structured-content", + "content": [ + "to appear briefly; to flash", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "稲妻が走る" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "lightning flashes by" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "背中に痛みが走った。" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I felt a brief pain in my back." + } + ] + } + } + ] + ] + } ], "tags": [], "examples": [ diff --git a/data/test/tidy/la-en-lemmas.json b/data/test/tidy/la-en-lemmas.json index a035e01..870b685 100644 --- a/data/test/tidy/la-en-lemmas.json +++ b/data/test/tidy/la-en-lemmas.json @@ -34,7 +34,70 @@ }, { "glosses": [ - "rumour, talk, opinion, report" + { + "type": "structured-content", + "content": [ + "rumour, talk, opinion, report", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "hascine propter rēs maledicās fāmās ferunt." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Is it on account of these things that they spread slanderous reports?" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "“Oenōtrī coluēre virī; nunc fāma minōrēs" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "“Oenotrian men tilled [the land]; now rumor [has it that their] descendants call the nation ‘Italy’ after the name of its leader, [Italus].”" + } + ] + } + } + ] + ] + } ], "tags": [ "declension-1" @@ -52,7 +115,98 @@ }, { "glosses": [ - "reputation" + { + "type": "structured-content", + "content": [ + "reputation", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Dīmīcantī dē fāmā dēesse." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "To abandon one whose reputation is attacked." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Fāma tamen clāra est; et adhūc sine crīmine vīxī." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "My good name is nevertheless unstained; and so far I have lived without blame." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Multī fāmam, conscientiam paucī verentur." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Many fear their reputation, few their conscience." + } + ] + } + } + ] + ] + } ], "tags": [ "declension-1" @@ -74,7 +228,41 @@ }, { "glosses": [ - "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”" + { + "type": "structured-content", + "content": [ + "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Extemplō Libyae magnās it Fāma per urbēs —\nFāma, malum quā nōn aliud vēlōcius ūllum.\nStraightaway Rumor runs through the great cities of Libya – Rumor, than whom [there is] not any other evil more swift." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + } + } + ] + } + } + ] + ] + } ], "tags": [ "declension-1" @@ -130,7 +318,42 @@ }, { "glosses": [ - "to appoint" + { + "type": "structured-content", + "content": [ + "to appoint", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "[…] dum ne quem militem legeret ex eo numero quibus senatus missionem reditumque in patriam negasset ante belli finem." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "[…] provided he did not choose any soldier from those to whom the Senate had refused discharge and a return home before the end of the war" + } + ] + } + } + ] + ] + } ], "tags": [ "conjugation-3" @@ -171,7 +394,126 @@ }, { "glosses": [ - "to read" + { + "type": "structured-content", + "content": [ + "to read", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Librōs lege." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Read books." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Lēgistīne hunc librum?" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Have you read this book?" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Lingua Graeca est; nōn potest legī." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "It's Greek; it cannot be read." + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Exigis, ut nostrōs dōnem tibi, Tucca, libellōs.\nNōn faciam: nam vīs vēndere, nōn legere.\nYou demand that I give our [⇒ my] little books to you, Tucca." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "I will not do [it]: for you want to sell [them], not to read [them]." + } + ] + } + } + ] + ] + } ], "tags": [ "conjugation-3" @@ -314,7 +656,42 @@ }, { "glosses": [ - "constantly, continuously" + { + "type": "structured-content", + "content": [ + "constantly, continuously", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ab ōvō ū̆sque ad māla" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "from the beginning to the end\n(literally, “from the egg to the apples”)" + } + ] + } + } + ] + ] + } ], "tags": [ "not-comparable" @@ -362,7 +739,42 @@ "senses": [ { "glosses": [ - "led straight along, drawn in a straight line, straight, upright." + { + "type": "structured-content", + "content": [ + "led straight along, drawn in a straight line, straight, upright.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Quae rectis lineis suos ordines servant" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Which preserve their order in straight lines" + } + ] + } + } + ] + ] + } ], "tags": [ "declension-1", @@ -396,7 +808,42 @@ }, { "glosses": [ - "(in particular) morally right, correct, lawful, just, virtuous, noble, good, proper, honest." + { + "type": "structured-content", + "content": [ + "(in particular) morally right, correct, lawful, just, virtuous, noble, good, proper, honest.", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Via stultī rēcta in oculīs eius; quī autem sapiēns est audit cōnsilia." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "The way of a fool is right in his own eyes: but he that is wise hearkeneth unto counsels. (Douay-Rheims trans., Challoner rev.: 1752 CE)" + } + ] + } + } + ] + ] + } ], "tags": [ "declension-1", @@ -635,7 +1082,126 @@ }, { "glosses": [ - "(idiomatic) one's own possessions or resources" + { + "type": "structured-content", + "content": [ + "(idiomatic) one's own possessions or resources", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "domum trahere" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to drag into one's pocket" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Domī versūra fit." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "One is one's own creditor. (proverb)" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "domō afferre" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "to conceive on one's own" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "id nunc experior domō" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "Now I'm learning this first-hand." + } + ] + } + } + ] + ] + } ], "tags": [ "declension-2", @@ -665,7 +1231,70 @@ }, { "glosses": [ - "(in locative case in phrases, idiomatic) peace" + { + "type": "structured-content", + "content": [ + "(in locative case in phrases, idiomatic) peace", + [ + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "bellī domīque; bellō domīque; vel bellī vel domī; domī bellōque; domī mīlitiaeque" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "in war and peace" + } + ] + } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "ut non quietior populus domi esset quam militiae" + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "so that the people should not become lazier in the time of peace than that of war" + } + ] + } + } + ] + ] + } ], "tags": [ "declension-2", diff --git a/package-lock.json b/package-lock.json index 8f0c213..cbaa146 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,7 @@ "date-and-time": "^2.4.2", "line-by-line": "^0.1.6", "node-stream-zip": "^1.15.0", - "yomichan-dict-builder": "^2.9.1" + "yomichan-dict-builder": "^2.9.2" }, "devDependencies": { "jest": "^29.7.0" @@ -4093,9 +4093,9 @@ } }, "node_modules/yomichan-dict-builder": { - "version": "2.9.1", - "resolved": "https://registry.npmjs.org/yomichan-dict-builder/-/yomichan-dict-builder-2.9.1.tgz", - "integrity": "sha512-21fDggyfjPfGUy6ghdDwyqysn/VlKTEA/C9QiiJibN/sO5rg9LLOAkhU/otP0Owp8i4nBZNgRDa74d9cHba5Jg==", + "version": "2.9.2", + "resolved": "https://registry.npmjs.org/yomichan-dict-builder/-/yomichan-dict-builder-2.9.2.tgz", + "integrity": "sha512-bhRDXjVLc7K+mW/u+i+KY2PuO2fjeudZo8LJZDbgZCNTpQGAmM81ByvReBHByIMgeO8whCmXpOBG8FhSexMWpQ==", "dependencies": { "jszip": "^3.10.1" } @@ -7215,9 +7215,9 @@ "dev": true }, "yomichan-dict-builder": { - "version": "2.9.1", - "resolved": "https://registry.npmjs.org/yomichan-dict-builder/-/yomichan-dict-builder-2.9.1.tgz", - "integrity": "sha512-21fDggyfjPfGUy6ghdDwyqysn/VlKTEA/C9QiiJibN/sO5rg9LLOAkhU/otP0Owp8i4nBZNgRDa74d9cHba5Jg==", + "version": "2.9.2", + "resolved": "https://registry.npmjs.org/yomichan-dict-builder/-/yomichan-dict-builder-2.9.2.tgz", + "integrity": "sha512-bhRDXjVLc7K+mW/u+i+KY2PuO2fjeudZo8LJZDbgZCNTpQGAmM81ByvReBHByIMgeO8whCmXpOBG8FhSexMWpQ==", "requires": { "jszip": "^3.10.1" } diff --git a/package.json b/package.json index 15639f2..098b29c 100755 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "date-and-time": "^2.4.2", "line-by-line": "^0.1.6", "node-stream-zip": "^1.15.0", - "yomichan-dict-builder": "^2.9.1" + "yomichan-dict-builder": "^2.9.2" }, "description": "Converts Kaikki JSON to Yomitan compatible dictionary.", "devDependencies": { diff --git a/types.ts b/types.ts index 5e4440c..9aa2bce 100644 --- a/types.ts +++ b/types.ts @@ -49,7 +49,7 @@ declare global { type Example = { text?: string; - type?: "example" | "quotation"; + type?: "example" | "quotation" | "quote"; english?: string; roman?: string; } From c3a4c426e1796f6166b4e98554780f529a05acca Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 16 Oct 2024 12:40:34 +0200 Subject: [PATCH 13/13] use only 2 examples of appropriate length --- 3-tidy-up.js | 13 +- data/test/dict/de/en/term_bank_1.json | 28 ++ data/test/dict/en/de/term_bank_1.json | 36 +- data/test/dict/fr/en/term_bank_1.json | 485 +--------------------- data/test/dict/fr/fr/term_bank_1.json | 225 +--------- data/test/dict/ja/en/term_bank_1.json | 55 --- data/test/dict/la/en/term_bank_1.json | 241 +---------- data/test/tidy/de-en-lemmas.json | 32 ++ data/test/tidy/en-de-lemmas.json | 42 +- data/test/tidy/fr-en-lemmas.json | 576 +------------------------- data/test/tidy/fr-fr-lemmas.json | 252 +---------- data/test/tidy/ja-en-lemmas.json | 62 --- data/test/tidy/la-en-lemmas.json | 282 +------------ data/test/tidy/sq-en-lemmas.json | 4 - 14 files changed, 86 insertions(+), 2247 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 58ed4cb..2441604 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -309,9 +309,18 @@ function getGlossTree(sensesWithoutInflectionGlosses) { for (const sense of sensesWithoutInflectionGlosses) { const { glossesArray, tags } = sense; let { examples = [] } = sense; + examples = examples - .filter(({type}) => !["quotation", "quote"].includes(type || '')) - .map(({text, english}) => ({text, english})) + .filter(({text, english}) => text && (text.length <= 70 || text.length <= 90 && !english)) // Filter out verbose examples + .map((example, index) => ({ ...example, originalIndex: index })) // Step 1: Decorate with original index + .sort(({ english: englishA, originalIndex: indexA }, { english: englishB, originalIndex: indexB }) => { + if (englishA && !englishB) return -1; // English items first + if (!englishA && englishB) return 1; // Non-English items last + return indexA - indexB; // Step 2: Stable sort by original index if equal + }) + .map(({text, english}) => ({text, english})) // Step 3: Pick only properties that will be used + .slice(0, 2); + let temp = glossTree; for (const [levelIndex, levelGloss] of glossesArray.entries()) { diff --git a/data/test/dict/de/en/term_bank_1.json b/data/test/dict/de/en/term_bank_1.json index e28d685..35a95c0 100644 --- a/data/test/dict/de/en/term_bank_1.json +++ b/data/test/dict/de/en/term_bank_1.json @@ -656,6 +656,34 @@ } ] } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Von dem Nomine Substantivo, oder dem Hauptworte." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "About the substantive noun, or the [alternative term]. (headline)" + } + ] + } } ] ] diff --git a/data/test/dict/en/de/term_bank_1.json b/data/test/dict/en/de/term_bank_1.json index 185c7f5..ec78976 100644 --- a/data/test/dict/en/de/term_bank_1.json +++ b/data/test/dict/en/de/term_bank_1.json @@ -6,41 +6,7 @@ "verb", 0, [ - { - "type": "structured-content", - "content": [ - "[1] aussuchen, auswählen, vorziehen, wählen", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "\"Connolly was chosen for the case by prosecutors because the sheriff and Bristol District Attorney C. Samuel Sutter are “close professional and personal friends,” said Sutter spokesman Gregg Miliote.\"" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - } - ] - ] - } + "[1] aussuchen, auswählen, vorziehen, wählen" ], 0, "" diff --git a/data/test/dict/fr/en/term_bank_1.json b/data/test/dict/fr/en/term_bank_1.json index 457ac10..cb1374f 100644 --- a/data/test/dict/fr/en/term_bank_1.json +++ b/data/test/dict/fr/en/term_bank_1.json @@ -324,62 +324,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "ma mayonnaise ne prend pas" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "my mayonnaise isn't setting" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "ça ne prend pas avec moi" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "that won't wash with me" - } - ] - } } ] ] @@ -530,398 +474,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’avance" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain ground" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du retard" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to fall behind schedule, to run late, to drop behind" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la hauteur" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain some perspective" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du recul" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to take a step back" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la bouteille" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain experience" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "en prendre de la graine" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to take away a lesson" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du poids" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain weight" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la masse" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to build muscle" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la brioche, prendre du bide, prendre du ventre" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to get a paunch" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du bouchon" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to fail" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’élan" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain momentum" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’âge" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to get older" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la valeur" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain value" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’importance" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to become important" - } - ] - } } ] ] @@ -1095,42 +647,7 @@ "v", 0, [ - { - "type": "structured-content", - "content": [ - "(intransitive, impersonal) to seem, to resemble", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Il semblerait qu’il y ait des différences significatives entre les deux groupes." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "There would seem to be significant differences between the two groups." - } - ] - } - } - ] - ] - } + "to seem, to resemble" ], 0, "" diff --git a/data/test/dict/fr/fr/term_bank_1.json b/data/test/dict/fr/fr/term_bank_1.json index aaf02f0..32c6fd3 100644 --- a/data/test/dict/fr/fr/term_bank_1.json +++ b/data/test/dict/fr/fr/term_bank_1.json @@ -6,95 +6,7 @@ "noun", 0, [ - { - "type": "structured-content", - "content": [ - "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou.", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Chenguza est beau comme le grand Avatar, (descendant de Bram) et quoique élevé pour le sacerdoce dans les austérités du cloître, il a l'imagination vive, le désir de s'instruire, et un penchant décidé pour la volupté." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Retiré dans les forêts, placé à la tête de nombreux pasteurs , qui ont dû se plaire à grossir sa renommée, et dont les bras l'ont aidé à renverser le tyran, Crichna évidemment dut sortir des Djangles, où il s'était tenu jusque-là dans le mystère , comme un homme environné d'une sorte d'auréole merveilleuse, qui le fit considérer de bonne heure comme un Avatar (incarnation) de la Divinité, caractère religieux et providentiel que l'Inde a toujours accordé à ses grands hommes" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - } - ] - ] - }, + "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou.", { "type": "structured-content", "content": [ @@ -153,60 +65,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "L’espéranto est l’avatar moderne du sanskrit." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } } ] ] @@ -227,60 +85,6 @@ "content": [ "Mésaventure, malheur.", [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Mais la guerre n’en finissait plus et c’est alors que je devins, après bien des démarches, de boulanger, élève-pilote et par la suite pilote aviateur, à ma très grande surprise et sans trop d’avatars." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "N'avions-nous pas aidé à la constituer, en courant bien des risques et au prix de quelles avatars ! Alors, lui mort, il ne savait plus qu'en faire…." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, { "tag": "div", "data": { @@ -307,33 +111,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Compte tenu des divers avatars rencontrés ces dernières années (rouille du mélèze, puceron lanigère, tempêtes…), il n'est plus possible de s’intéresser à la seule productivité, […]." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } } ] ] diff --git a/data/test/dict/ja/en/term_bank_1.json b/data/test/dict/ja/en/term_bank_1.json index 60271e5..40cbfcd 100644 --- a/data/test/dict/ja/en/term_bank_1.json +++ b/data/test/dict/ja/en/term_bank_1.json @@ -78,61 +78,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "好きです。\nSuki desu.\n“I like you.” → idiomatically used to express: “I love you.”" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "どうとも好きにしなさい。" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "Feel free to do as you like." - } - ] - } } ] ] diff --git a/data/test/dict/la/en/term_bank_1.json b/data/test/dict/la/en/term_bank_1.json index 69b7f0b..20efd49 100644 --- a/data/test/dict/la/en/term_bank_1.json +++ b/data/test/dict/la/en/term_bank_1.json @@ -131,73 +131,11 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Multī fāmam, conscientiam paucī verentur." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "Many fear their reputation, few their conscience." - } - ] - } } ] ] }, - { - "type": "structured-content", - "content": [ - "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Extemplō Libyae magnās it Fāma per urbēs —\nFāma, malum quā nōn aliud vēlōcius ūllum.\nStraightaway Rumor runs through the great cities of Libya – Rumor, than whom [there is] not any other evil more swift." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - } - ] - ] - } + "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”" ], 0, "" @@ -210,42 +148,7 @@ 0, [ "to choose, select", - { - "type": "structured-content", - "content": [ - "to appoint", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "[…] dum ne quem militem legeret ex eo numero quibus senatus missionem reditumque in patriam negasset ante belli finem." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "[…] provided he did not choose any soldier from those to whom the Senate had refused discharge and a return home before the end of the war" - } - ] - } - } - ] - ] - }, + "to appoint", "to collect, gather, bring together", "to take out, pick out, extract, remove", "to take to one's self unjustly, carry off, steal, purloin, plunder, abstract", @@ -309,62 +212,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Lingua Graeca est; nōn potest legī." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "It's Greek; it cannot be read." - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Exigis, ut nostrōs dōnem tibi, Tucca, libellōs.\nNōn faciam: nam vīs vēndere, nōn legere.\nYou demand that I give our [⇒ my] little books to you, Tucca." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "I will not do [it]: for you want to sell [them], not to read [them]." - } - ] - } } ] ] @@ -742,62 +589,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "domō afferre" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to conceive on one's own" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "id nunc experior domō" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "Now I'm learning this first-hand." - } - ] - } } ] ] @@ -807,34 +598,6 @@ "content": [ "(in locative case in phrases, idiomatic) peace", [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "bellī domīque; bellō domīque; vel bellī vel domī; domī bellōque; domī mīlitiaeque" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "in war and peace" - } - ] - } - }, { "tag": "div", "data": { diff --git a/data/test/tidy/de-en-lemmas.json b/data/test/tidy/de-en-lemmas.json index f628526..f3cc615 100644 --- a/data/test/tidy/de-en-lemmas.json +++ b/data/test/tidy/de-en-lemmas.json @@ -859,6 +859,34 @@ } ] } + }, + { + "tag": "div", + "data": { + "content": "extra-info" + }, + "content": { + "tag": "div", + "data": { + "content": "example-sentence" + }, + "content": [ + { + "tag": "div", + "data": { + "content": "example-sentence-a" + }, + "content": "Von dem Nomine Substantivo, oder dem Hauptworte." + }, + { + "tag": "div", + "data": { + "content": "example-sentence-b" + }, + "content": "About the substantive noun, or the [alternative term]. (headline)" + } + ] + } } ] ] @@ -869,6 +897,10 @@ { "text": "Er hat von seiner Jugend erzählt.", "english": "He told about his youth." + }, + { + "text": "Von dem Nomine Substantivo, oder dem Hauptworte.", + "english": "About the substantive noun, or the [alternative term]. (headline)" } ] }, diff --git a/data/test/tidy/en-de-lemmas.json b/data/test/tidy/en-de-lemmas.json index e76851d..b2eb29b 100644 --- a/data/test/tidy/en-de-lemmas.json +++ b/data/test/tidy/en-de-lemmas.json @@ -11,48 +11,10 @@ "senses": [ { "glosses": [ - { - "type": "structured-content", - "content": [ - "[1] aussuchen, auswählen, vorziehen, wählen", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "\"Connolly was chosen for the case by prosecutors because the sheriff and Bristol District Attorney C. Samuel Sutter are “close professional and personal friends,” said Sutter spokesman Gregg Miliote.\"" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - } - ] - ] - } + "[1] aussuchen, auswählen, vorziehen, wählen" ], "tags": [], - "examples": [ - { - "text": "\"Connolly was chosen for the case by prosecutors because the sheriff and Bristol District Attorney C. Samuel Sutter are “close professional and personal friends,” said Sutter spokesman Gregg Miliote.\"" - } - ] + "examples": [] } ] } diff --git a/data/test/tidy/fr-en-lemmas.json b/data/test/tidy/fr-en-lemmas.json index c9ef428..fc91d37 100644 --- a/data/test/tidy/fr-en-lemmas.json +++ b/data/test/tidy/fr-en-lemmas.json @@ -348,62 +348,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "ma mayonnaise ne prend pas" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "my mayonnaise isn't setting" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "ça ne prend pas avec moi" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "that won't wash with me" - } - ] - } } ] ] @@ -420,14 +364,6 @@ { "text": "la sauce ne prend pas", "english": "the sauce isn't thickening" - }, - { - "text": "ma mayonnaise ne prend pas", - "english": "my mayonnaise isn't setting" - }, - { - "text": "ça ne prend pas avec moi", - "english": "that won't wash with me" } ] }, @@ -567,26 +503,6 @@ { "text": "Qu’est-ce qui lui a pris ? Quelle mouche l’a piqué ?", "english": "What was he thinking? What got into him?" - }, - { - "text": "bien lui en prit", - "english": "good for him; it was a good choice (literally, “he took it well”)" - }, - { - "text": "mal lui en prit", - "english": "too bad for him; it was a bad choice (literally, “he took it badly”)" - }, - { - "text": "prendre en aversion", - "english": "take an aversion (to)" - }, - { - "text": "prendre en grippe", - "english": "take a dislike (to)" - }, - { - "text": "prendre en dégoût", - "english": "become disgusted (by)" } ] }, @@ -652,398 +568,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’avance" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain ground" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du retard" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to fall behind schedule, to run late, to drop behind" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la hauteur" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain some perspective" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du recul" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to take a step back" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la bouteille" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain experience" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "en prendre de la graine" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to take away a lesson" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du poids" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain weight" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la masse" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to build muscle" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la brioche, prendre du bide, prendre du ventre" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to get a paunch" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre du bouchon" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to fail" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’élan" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain momentum" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’âge" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to get older" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de la valeur" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to gain value" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "prendre de l’importance" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to become important" - } - ] - } } ] ] @@ -1061,62 +585,6 @@ { "text": "prendre du galon", "english": "to gain a promotion" - }, - { - "text": "prendre de l’avance", - "english": "to gain ground" - }, - { - "text": "prendre du retard", - "english": "to fall behind schedule, to run late, to drop behind" - }, - { - "text": "prendre de la hauteur", - "english": "to gain some perspective" - }, - { - "text": "prendre du recul", - "english": "to take a step back" - }, - { - "text": "prendre de la bouteille", - "english": "to gain experience" - }, - { - "text": "en prendre de la graine", - "english": "to take away a lesson" - }, - { - "text": "prendre du poids", - "english": "to gain weight" - }, - { - "text": "prendre de la masse", - "english": "to build muscle" - }, - { - "text": "prendre de la brioche, prendre du bide, prendre du ventre", - "english": "to get a paunch" - }, - { - "text": "prendre du bouchon", - "english": "to fail" - }, - { - "text": "prendre de l’élan", - "english": "to gain momentum" - }, - { - "text": "prendre de l’âge", - "english": "to get older" - }, - { - "text": "prendre de la valeur", - "english": "to gain value" - }, - { - "text": "prendre de l’importance", - "english": "to become important" } ] }, @@ -1318,53 +786,13 @@ "senses": [ { "glosses": [ - { - "type": "structured-content", - "content": [ - "(intransitive, impersonal) to seem, to resemble", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Il semblerait qu’il y ait des différences significatives entre les deux groupes." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "There would seem to be significant differences between the two groups." - } - ] - } - } - ] - ] - } + "(intransitive, impersonal) to seem, to resemble" ], "tags": [ "impersonal", "intransitive" ], - "examples": [ - { - "text": "Il semblerait qu’il y ait des différences significatives entre les deux groupes.", - "english": "There would seem to be significant differences between the two groups." - } - ] + "examples": [] }, { "glosses": [ diff --git a/data/test/tidy/fr-fr-lemmas.json b/data/test/tidy/fr-fr-lemmas.json index 3aa2aeb..e3b72fc 100644 --- a/data/test/tidy/fr-fr-lemmas.json +++ b/data/test/tidy/fr-fr-lemmas.json @@ -11,110 +11,12 @@ "senses": [ { "glosses": [ - { - "type": "structured-content", - "content": [ - "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou.", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Chenguza est beau comme le grand Avatar, (descendant de Bram) et quoique élevé pour le sacerdoce dans les austérités du cloître, il a l'imagination vive, le désir de s'instruire, et un penchant décidé pour la volupté." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Retiré dans les forêts, placé à la tête de nombreux pasteurs , qui ont dû se plaire à grossir sa renommée, et dont les bras l'ont aidé à renverser le tyran, Crichna évidemment dut sortir des Djangles, où il s'était tenu jusque-là dans le mystère , comme un homme environné d'une sorte d'auréole merveilleuse, qui le fit considérer de bonne heure comme un Avatar (incarnation) de la Divinité, caractère religieux et providentiel que l'Inde a toujours accordé à ses grands hommes" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - } - ] - ] - } + "Dans la religion hindouiste, chacune des incarnations du dieu Vishnou." ], "tags": [ "Hindouisme" ], - "examples": [ - { - "text": "Chenguza est beau comme le grand Avatar, (descendant de Bram) et quoique élevé pour le sacerdoce dans les austérités du cloître, il a l'imagination vive, le désir de s'instruire, et un penchant décidé pour la volupté." - }, - { - "text": "Retiré dans les forêts, placé à la tête de nombreux pasteurs , qui ont dû se plaire à grossir sa renommée, et dont les bras l'ont aidé à renverser le tyran, Crichna évidemment dut sortir des Djangles, où il s'était tenu jusque-là dans le mystère , comme un homme environné d'une sorte d'auréole merveilleuse, qui le fit considérer de bonne heure comme un Avatar (incarnation) de la Divinité, caractère religieux et providentiel que l'Inde a toujours accordé à ses grands hommes" - }, - { - "text": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." - } - ] + "examples": [] }, { "glosses": [ @@ -176,60 +78,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "L’espéranto est l’avatar moderne du sanskrit." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } } ] ] @@ -244,12 +92,6 @@ }, { "text": "Batman est l’avatar moderne de Zorro." - }, - { - "text": "L’espéranto est l’avatar moderne du sanskrit." - }, - { - "text": "Plusieurs passages des Pouranas peuvent aussi servir à prouver que les eaux ont fait éprouver au globe terrestre une grande révolution , et la plupart des avatars ou métamorphoses et incarnations de Vichenou, paraissent avoir trait à ce cataclysme général." } ] }, @@ -260,60 +102,6 @@ "content": [ "Mésaventure, malheur.", [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Mais la guerre n’en finissait plus et c’est alors que je devins, après bien des démarches, de boulanger, élève-pilote et par la suite pilote aviateur, à ma très grande surprise et sans trop d’avatars." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "N'avions-nous pas aidé à la constituer, en courant bien des risques et au prix de quelles avatars ! Alors, lui mort, il ne savait plus qu'en faire…." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, { "tag": "div", "data": { @@ -340,33 +128,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Compte tenu des divers avatars rencontrés ces dernières années (rouille du mélèze, puceron lanigère, tempêtes…), il n'est plus possible de s’intéresser à la seule productivité, […]." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } } ] ] @@ -377,17 +138,8 @@ "Utilisé à tort" ], "examples": [ - { - "text": "Mais la guerre n’en finissait plus et c’est alors que je devins, après bien des démarches, de boulanger, élève-pilote et par la suite pilote aviateur, à ma très grande surprise et sans trop d’avatars." - }, - { - "text": "N'avions-nous pas aidé à la constituer, en courant bien des risques et au prix de quelles avatars ! Alors, lui mort, il ne savait plus qu'en faire…." - }, { "text": "Le service social du travail – Avatars d’une fonction, vicissitudes d’un métier" - }, - { - "text": "Compte tenu des divers avatars rencontrés ces dernières années (rouille du mélèze, puceron lanigère, tempêtes…), il n'est plus possible de s’intéresser à la seule productivité, […]." } ] } diff --git a/data/test/tidy/ja-en-lemmas.json b/data/test/tidy/ja-en-lemmas.json index 5e5f466..88f5ba9 100644 --- a/data/test/tidy/ja-en-lemmas.json +++ b/data/test/tidy/ja-en-lemmas.json @@ -100,61 +100,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "好きです。\nSuki desu.\n“I like you.” → idiomatically used to express: “I love you.”" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "どうとも好きにしなさい。" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "Feel free to do as you like." - } - ] - } } ] ] @@ -169,13 +114,6 @@ { "text": "君が好きだからこそこれほど頑張っているんだよ。", "english": "It's precisely because I like you [because of my fondness for you] that I'm working this hard." - }, - { - "text": "好きです。\nSuki desu.\n“I like you.” → idiomatically used to express: “I love you.”" - }, - { - "text": "どうとも好きにしなさい。", - "english": "Feel free to do as you like." } ] } diff --git a/data/test/tidy/la-en-lemmas.json b/data/test/tidy/la-en-lemmas.json index 870b685..2566c0f 100644 --- a/data/test/tidy/la-en-lemmas.json +++ b/data/test/tidy/la-en-lemmas.json @@ -175,34 +175,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Multī fāmam, conscientiam paucī verentur." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "Many fear their reputation, few their conscience." - } - ] - } } ] ] @@ -219,59 +191,17 @@ { "text": "Fāma tamen clāra est; et adhūc sine crīmine vīxī.", "english": "My good name is nevertheless unstained; and so far I have lived without blame." - }, - { - "text": "Multī fāmam, conscientiam paucī verentur.", - "english": "Many fear their reputation, few their conscience." } ] }, { "glosses": [ - { - "type": "structured-content", - "content": [ - "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Extemplō Libyae magnās it Fāma per urbēs —\nFāma, malum quā nōn aliud vēlōcius ūllum.\nStraightaway Rumor runs through the great cities of Libya – Rumor, than whom [there is] not any other evil more swift." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - } - } - ] - } - } - ] - ] - } + "Fama, personified as a fast-moving, malicious goddess, the daughter of Terra. From the Greek φήμη, Pheme. Typically translated from the Latin as “Rumor.”" ], "tags": [ "declension-1" ], - "examples": [ - { - "text": "Extemplō Libyae magnās it Fāma per urbēs —\nFāma, malum quā nōn aliud vēlōcius ūllum.\nStraightaway Rumor runs through the great cities of Libya – Rumor, than whom [there is] not any other evil more swift." - } - ] + "examples": [] } ] } @@ -318,52 +248,12 @@ }, { "glosses": [ - { - "type": "structured-content", - "content": [ - "to appoint", - [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "[…] dum ne quem militem legeret ex eo numero quibus senatus missionem reditumque in patriam negasset ante belli finem." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "[…] provided he did not choose any soldier from those to whom the Senate had refused discharge and a return home before the end of the war" - } - ] - } - } - ] - ] - } + "to appoint" ], "tags": [ "conjugation-3" ], - "examples": [ - { - "text": "[…] dum ne quem militem legeret ex eo numero quibus senatus missionem reditumque in patriam negasset ante belli finem.", - "english": "[…] provided he did not choose any soldier from those to whom the Senate had refused discharge and a return home before the end of the war" - } - ] + "examples": [] }, { "glosses": [ @@ -454,62 +344,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Lingua Graeca est; nōn potest legī." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "It's Greek; it cannot be read." - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "Exigis, ut nostrōs dōnem tibi, Tucca, libellōs.\nNōn faciam: nam vīs vēndere, nōn legere.\nYou demand that I give our [⇒ my] little books to you, Tucca." - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "I will not do [it]: for you want to sell [them], not to read [them]." - } - ] - } } ] ] @@ -526,14 +360,6 @@ { "text": "Lēgistīne hunc librum?", "english": "Have you read this book?" - }, - { - "text": "Lingua Graeca est; nōn potest legī.", - "english": "It's Greek; it cannot be read." - }, - { - "text": "Exigis, ut nostrōs dōnem tibi, Tucca, libellōs.\nNōn faciam: nam vīs vēndere, nōn legere.\nYou demand that I give our [⇒ my] little books to you, Tucca.", - "english": "I will not do [it]: for you want to sell [them], not to read [them]." } ] }, @@ -945,10 +771,6 @@ { "text": "Stet fortūna domūs", "english": "Let the good fortune of the house stand (motto of Harrow School, England)" - }, - { - "text": "nox erat et tōta lūmina nūlla domō", - "english": "It was night, and [there were] no lights in the whole house." } ] }, @@ -1142,62 +964,6 @@ } ] } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "domō afferre" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "to conceive on one's own" - } - ] - } - }, - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "id nunc experior domō" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "Now I'm learning this first-hand." - } - ] - } } ] ] @@ -1218,14 +984,6 @@ { "text": "Domī versūra fit.", "english": "One is one's own creditor. (proverb)" - }, - { - "text": "domō afferre", - "english": "to conceive on one's own" - }, - { - "text": "id nunc experior domō", - "english": "Now I'm learning this first-hand." } ] }, @@ -1236,34 +994,6 @@ "content": [ "(in locative case in phrases, idiomatic) peace", [ - { - "tag": "div", - "data": { - "content": "extra-info" - }, - "content": { - "tag": "div", - "data": { - "content": "example-sentence" - }, - "content": [ - { - "tag": "div", - "data": { - "content": "example-sentence-a" - }, - "content": "bellī domīque; bellō domīque; vel bellī vel domī; domī bellōque; domī mīlitiaeque" - }, - { - "tag": "div", - "data": { - "content": "example-sentence-b" - }, - "content": "in war and peace" - } - ] - } - }, { "tag": "div", "data": { @@ -1305,10 +1035,6 @@ "in locative case in phrases" ], "examples": [ - { - "text": "bellī domīque; bellō domīque; vel bellī vel domī; domī bellōque; domī mīlitiaeque", - "english": "in war and peace" - }, { "text": "ut non quietior populus domi esset quam militiae", "english": "so that the people should not become lazier in the time of peace than that of war" diff --git a/data/test/tidy/sq-en-lemmas.json b/data/test/tidy/sq-en-lemmas.json index 6436081..a17281f 100644 --- a/data/test/tidy/sq-en-lemmas.json +++ b/data/test/tidy/sq-en-lemmas.json @@ -254,10 +254,6 @@ { "text": "gjuhë e trashë", "english": "foul language" - }, - { - "text": "gjuha e shkrimtarit", - "english": "the author's style" } ] }