From 56b26f2bc019855fba98c250a75ee12f53b2c58a Mon Sep 17 00:00:00 2001 From: Stefan Vukovic Date: Wed, 7 Aug 2024 17:13:25 +0200 Subject: [PATCH] [de-en] fix missing propositions --- 3-tidy-up.js | 11 ++++-- data/test/dict/de/en/tag_bank_1.json | 7 ++++ data/test/dict/de/en/term_bank_1.json | 16 ++++++++ data/test/ipa/de/en/term_meta_bank_1.json | 17 ++++++++ data/test/kaikki/de-en.json | 1 + data/test/tidy/de-en-lemmas.json | 48 +++++++++++++++++++++++ 6 files changed, 97 insertions(+), 3 deletions(-) diff --git a/3-tidy-up.js b/3-tidy-up.js index 61fa023..d5f53ea 100644 --- a/3-tidy-up.js +++ b/3-tidy-up.js @@ -353,11 +353,16 @@ function getCanonicalForm({word, forms}) { form.tags && form.tags.includes('canonical') ); - if (canonicalForm) { + if (canonicalForm && canonicalForm.form) { word = canonicalForm.form; - if (word && word.includes('{{#ifexist:Wiktionary')) { // TODO: remove once fixed in kaikki - word = word.replace(/ {{#if:.+/, ''); + if (word.includes('{{#ifexist:Wiktionary')) { // TODO: remove once fixed in kaikki + word = word.replace(/ {{#if:.+/, '').trim(); + } + + bracketsRegex = /\[.*\]$/; + if (bracketsRegex.test(word)) { + word = word.replace(bracketsRegex, '').trim(); } } return word; diff --git a/data/test/dict/de/en/tag_bank_1.json b/data/test/dict/de/en/tag_bank_1.json index fbf423d..721d192 100644 --- a/data/test/dict/de/en/tag_bank_1.json +++ b/data/test/dict/de/en/tag_bank_1.json @@ -96,5 +96,12 @@ 1, "rare", -1 + ], + [ + "prep", + "partOfSpeech", + -1, + "preposition", + 1 ] ] \ No newline at end of file diff --git a/data/test/dict/de/en/term_bank_1.json b/data/test/dict/de/en/term_bank_1.json index c523025..78d4cd3 100644 --- a/data/test/dict/de/en/term_bank_1.json +++ b/data/test/dict/de/en/term_bank_1.json @@ -351,5 +351,21 @@ ], 0, "" + ], + [ + "von", + "", + "prep", + "prep", + 0, + [ + "from", + "of, belonging to (often replacing genitive; see usage note below)", + "by (with passive voice)", + "about, of (a topic)", + "on, with (a resource)" + ], + 0, + "" ] ] \ No newline at end of file diff --git a/data/test/ipa/de/en/term_meta_bank_1.json b/data/test/ipa/de/en/term_meta_bank_1.json index e10987f..2349348 100644 --- a/data/test/ipa/de/en/term_meta_bank_1.json +++ b/data/test/ipa/de/en/term_meta_bank_1.json @@ -70,5 +70,22 @@ } ] } + ], + [ + "von", + "ipa", + { + "reading": "von", + "transcriptions": [ + { + "ipa": "/fɔn/", + "tags": [] + }, + { + "ipa": "[fɔn]", + "tags": [] + } + ] + } ] ] \ No newline at end of file diff --git a/data/test/kaikki/de-en.json b/data/test/kaikki/de-en.json index cbb2878..fa8041f 100644 --- a/data/test/kaikki/de-en.json +++ b/data/test/kaikki/de-en.json @@ -2,3 +2,4 @@ {"pos": "noun", "head_templates": [{"name": "de-noun", "args": {"1": "m,,^e", "dim": "^lein,^chen", "f": "^in"}, "expansion": "Fuchs m (strong, genitive Fuchses, plural Füchse, diminutive Füchslein n or Füchschen n, feminine Füchsin)"}], "forms": [{"form": "Fuchses", "tags": ["genitive"]}, {"form": "Füchse", "tags": ["plural"]}, {"form": "Füchslein", "tags": ["diminutive", "neuter"]}, {"form": "Füchschen", "tags": ["diminutive", "neuter"]}, {"form": "Füchsin", "tags": ["feminine"]}, {"form": "strong", "source": "declension", "tags": ["table-tags"]}, {"form": "de-ndecl", "source": "declension", "tags": ["inflection-template"]}, {"form": "Fuchs", "tags": ["nominative", "singular"], "source": "declension"}, {"form": "Füchse", "tags": ["definite", "nominative", "plural"], "source": "declension"}, {"form": "Fuchses", "tags": ["genitive", "singular"], "source": "declension"}, {"form": "Füchse", "tags": ["definite", "genitive", "plural"], "source": "declension"}, {"form": "Fuchs", "tags": ["dative", "singular"], "source": "declension"}, {"form": "Fuchse", "tags": ["dative", "singular"], "source": "declension"}, {"form": "Füchsen", "tags": ["dative", "definite", "plural"], "source": "declension"}, {"form": "Fuchs", "tags": ["accusative", "singular"], "source": "declension"}, {"form": "Füchse", "tags": ["accusative", "definite", "plural"], "source": "declension"}], "inflection_templates": [{"name": "de-ndecl", "args": {"1": "m,,^e"}}], "etymology_number": 1, "etymology_text": "From Middle High German vuhs, from Old High German fuhs, from Proto-West Germanic *fuhs, from Proto-Germanic *fuhsaz, from Proto-Indo-European *púḱsos (“the tailed one”), from *puḱ- (“tail”). Cognate with English fox, Sanskrit पुच्छ (púccha).", "etymology_templates": [{"name": "inh", "args": {"1": "de", "2": "gmh", "3": "vuhs"}, "expansion": "Middle High German vuhs"}, {"name": "inh", "args": {"1": "de", "2": "goh", "3": "fuhs"}, "expansion": "Old High German fuhs"}, {"name": "inh", "args": {"1": "de", "2": "gmw-pro", "3": "*fuhs"}, "expansion": "Proto-West Germanic *fuhs"}, {"name": "inh", "args": {"1": "de", "2": "gem-pro", "3": "*fuhsaz"}, "expansion": "Proto-Germanic *fuhsaz"}, {"name": "inh", "args": {"1": "de", "2": "ine-pro", "3": "*púḱsos", "4": "", "5": "the tailed one"}, "expansion": "Proto-Indo-European *púḱsos (“the tailed one”)"}, {"name": "m", "args": {"1": "ine-pro", "2": "*puḱ-", "3": "", "4": "tail"}, "expansion": "*puḱ- (“tail”)"}, {"name": "cog", "args": {"1": "en", "2": "fox"}, "expansion": "English fox"}, {"name": "cog", "args": {"1": "sa", "2": "पुच्छ", "tr": "púccha"}, "expansion": "Sanskrit पुच्छ (púccha)"}], "word": "Fuchs", "lang": "German", "lang_code": "de", "sounds": [{"ipa": "/fʊks/"}, {"rhymes": "-ʊks"}, {"audio": "De-Fuchs.ogg", "text": "audio", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/1/11/De-Fuchs.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/1/11/De-Fuchs.ogg/De-Fuchs.ogg.mp3"}, {"audio": "De-Fuchs2.ogg", "text": "audio", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/e/e0/De-Fuchs2.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/e/e0/De-Fuchs2.ogg/De-Fuchs2.ogg.mp3"}], "categories": [], "senses": [{"examples": [{"text": "Fuchs, du hast die Gans gestohlen. Gib sie wieder her!", "english": "(line from a popular children’s song)", "type": "example", "roman": "Fox, you stole the goose. Bring it back again!"}], "links": [["fox", "fox"]], "glosses": ["fox (animal)"], "tags": ["masculine", "strong"], "id": "Fuchs-de-noun-yo8s1zGs", "categories": []}, {"examples": [{"text": "Er ist ein ganz schöner Fuchs.", "english": "He is a really handsome fox.", "type": "example"}], "links": [["clever", "clever"], ["cunning", "cunning"]], "raw_glosses": ["(informal) a clever or cunning person"], "glosses": ["a clever or cunning person"], "tags": ["informal", "masculine", "strong"], "id": "Fuchs-de-noun-4ynxxigR", "categories": []}, {"examples": [{"text": "Unser Paul ist ja ein kleiner Fuchs.", "english": "Our Paul is a little redhead.", "type": "example"}], "links": [["red-haired", "red-haired"], ["horse", "horse"]], "raw_glosses": ["(informal) a red-haired person or horse."], "glosses": ["a red-haired person or horse."], "tags": ["informal", "masculine", "strong"], "id": "Fuchs-de-noun-~KVMNOJb", "categories": []}, {"links": [["pledge", "pledge"]], "glosses": ["pledge (prospective member of a fraternity)"], "tags": ["masculine", "strong"], "id": "Fuchs-de-noun-JWK5SYEL"}, {"links": [["military", "military"], ["recruit", "recruit"]], "raw_glosses": ["(military, slang) A new recruit."], "topics": ["government", "military", "politics", "war"], "glosses": ["A new recruit."], "tags": ["masculine", "slang", "strong"], "id": "Fuchs-de-noun-5Hi11wxz", "categories": [{"name": "Military", "kind": "topical", "parents": ["Society", "All topics", "Fundamental"], "source": "w", "orig": "de:Military", "langcode": "de"}]}, {"examples": [{"text": "Ich hatte nur vier Trümpfe und darunter beide Füchse.", "english": "I had only four trumps and among them were both aces of diamonds.", "type": "example"}], "links": [["card game", "card game"], ["Doppelkopf", "Doppelkopf#German"]], "raw_glosses": ["(card games) In Doppelkopf, the ace of diamonds, which earns a side of players an extra point if they win it from the other side"], "topics": ["card-games", "games"], "glosses": ["In Doppelkopf, the ace of diamonds, which earns a side of players an extra point if they win it from the other side"], "tags": ["masculine", "strong"], "id": "Fuchs-de-noun-~QUh3ViV", "categories": [{"name": "Card games", "kind": "topical", "parents": ["Games", "Recreation", "Human activity", "Human behaviour", "Human", "All topics", "Fundamental"], "source": "w", "orig": "de:Card games", "langcode": "de"}]}, {"links": [["military", "military"]], "raw_glosses": ["(military) a tank Transportpanzer Fuchs"], "topics": ["government", "military", "politics", "war"], "glosses": ["a tank Transportpanzer Fuchs"], "tags": ["masculine", "strong"], "id": "Fuchs-de-noun-y6KoNKLo", "categories": [{"name": "Military", "kind": "topical", "parents": ["Society", "All topics", "Fundamental"], "source": "w", "orig": "de:Military", "langcode": "de"}, {"name": "Military vehicles", "kind": "topical", "parents": ["Military", "Vehicles", "Society", "Machines", "Transport", "All topics", "Technology", "Fundamental"], "source": "w", "orig": "de:Military vehicles", "langcode": "de"}], "hyponyms": [{"word": "Afghanfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Andenfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Azarafuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Bastard-Fuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Bengalfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Blassfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Blaufuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Brandfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Canafuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Chama-Fuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Culpeofuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Darwin-Fuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Eisfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Falklandfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Festland-Graufuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Feuerfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Feuerlandfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Graufuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Großohr-Kitfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Großohrfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Insel-Graufuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Jungfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Kama-Fuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Kapfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Kitfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Kurzohrfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Löffelfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Magellanfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Mauerfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Obstfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Pampasfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Polarfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Rotfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Rüppellfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Sandfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Savannenfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Schlaufuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Schneefuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Sechurafuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Sechuran-Fuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Seefuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Silberfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Silberrückenfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Sparfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Spefuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Steinfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Steppenfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Swift-Fuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Swiftfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Taktikfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Tibetfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Waldfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Weißfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Wüstenfuchs", "_dis1": "0 0 0 0 0 0 100 0 0 0"}], "derived": [{"word": "fuchsig", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "fuchsartig", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "fuchsen", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "fuchsteufelswild", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Füchsin", "_dis1": "0 0 0 0 0 0 100 0 0 0"}], "related": [{"word": "Fuchsammer", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsart", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsauge", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsbaby", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsbandwurm", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsbau", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsjagd", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsjunges", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsmännchen", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchspelz", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "fuchsrot", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsrüde", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsschwanz", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsstute", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchsweibchen", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchswelpe", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Fuchswelpin", "_dis1": "0 0 0 0 0 0 100 0 0 0"}], "coordinate_terms": [{"word": "Kojote", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Hund", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Schakal", "_dis1": "0 0 0 0 0 0 100 0 0 0"}, {"word": "Wolf", "_dis1": "0 0 0 0 0 0 100 0 0 0"}]}, {"links": [["sunscald", "sunscald"], ["hop", "hop"]], "synonyms": [{"word": "Röte"}, {"word": "rote Lohe"}, {"word": "Sommerbrand"}], "raw_glosses": ["(archaic) A form of sunscald on hops."], "glosses": ["A form of sunscald on hops."], "tags": ["archaic", "masculine", "strong"], "id": "Fuchs-de-noun-5UnS0Uxp", "categories": [{"name": "Plant diseases", "kind": "topical", "parents": ["Diseases", "Phytopathology", "Disease", "Botany", "Pathology", "Health", "Biology", "Medicine", "Body", "Sciences", "Human", "All topics", "Fundamental"], "source": "w", "orig": "de:Plant diseases", "langcode": "de"}]}, {"links": [["fox", "fox"], ["radiosport", "radiosport"], ["foxhunt", "foxhunt"]], "glosses": ["a fox in radiosport foxhunt"], "tags": ["masculine", "strong"], "id": "Fuchs-de-noun-g64kC0Gx"}, {"links": [["gold", "gold"], ["coin", "coin"]], "raw_glosses": ["(obsolete) any gold coin"], "glosses": ["any gold coin"], "tags": ["masculine", "obsolete", "strong"], "id": "Fuchs-de-noun-cpfceUVA", "categories": [{"name": "Coins", "kind": "topical", "parents": ["Money", "Business", "Economics", "Society", "Social sciences", "All topics", "Sciences", "Fundamental"], "source": "w", "orig": "de:Coins", "langcode": "de"}]}]} {"pos": "noun", "head_templates": [{"name": "de-noun", "args": {"1": "n,ens:es􂀿very rare􂁀.weak.dat:en:-", "dim": "chen,lein,ken􂀿(also) Ruhrpöttisch􂁀"}, "expansion": "Herz n (weak, genitive Herzens or (very rare) Herzes, plural Herzen, diminutive Herzchen n or Herzlein n or ((also) Ruhrpöttisch) Herzken n)"}], "forms": [{"form": "Ruhrpöttisch", "tags": ["also"]}, {"form": "weak", "source": "declension", "tags": ["table-tags"]}, {"form": "de-ndecl", "source": "declension", "tags": ["inflection-template"]}, {"form": "Herz", "tags": ["nominative", "singular"], "source": "declension"}, {"form": "Herzen", "tags": ["definite", "nominative", "plural"], "source": "declension"}, {"form": "Herzens", "tags": ["genitive", "singular"], "source": "declension"}, {"form": "Herzes", "tags": ["genitive", "singular"], "source": "declension"}, {"form": "Herzen", "tags": ["definite", "genitive", "plural"], "source": "declension"}, {"form": "Herzen", "tags": ["dative", "singular"], "source": "declension"}, {"form": "Herz", "tags": ["dative", "singular"], "source": "declension"}, {"form": "Herzen", "tags": ["dative", "definite", "plural"], "source": "declension"}, {"form": "Herz", "tags": ["accusative", "singular"], "source": "declension"}, {"form": "Herzen", "tags": ["accusative", "definite", "plural"], "source": "declension"}], "inflection_templates": [{"name": "de-ndecl", "args": {"1": "n,ens:es􂀿very rare􂁀.weak.dat:en:-􂀿as a card suit or term of endearment, otherwise informal􂁀"}}], "descendants": [{"depth": 1, "templates": [{"name": "desc", "args": {"1": "mk", "2": "херц", "bor": "1", "g": "m"}, "expansion": "→ Macedonian: херц m (herc)"}], "text": "→ Macedonian: херц m (herc)"}, {"depth": 1, "templates": [{"name": "desc", "args": {"1": "sh", "2": "-", "bor": "1"}, "expansion": "→ Serbo-Croatian:"}], "text": "→ Serbo-Croatian:"}, {"depth": 1, "templates": [{"name": "desc", "args": {"1": "sh", "2": "хе̏рц", "sclb": "1", "g": "m"}, "expansion": "Cyrillic script: хе̏рц m"}], "text": "Cyrillic script: хе̏рц m"}, {"depth": 1, "templates": [{"name": "desc", "args": {"1": "sh", "2": "hȅrc", "sclb": "1", "g": "m"}, "expansion": "Latin script: hȅrc m"}], "text": "Latin script: hȅrc m"}], "sounds": [{"ipa": "/hɛrts/"}, {"ipa": "[hɛʁt͡s]"}, {"ipa": "[hɛɐ̯t͡s]"}, {"homophone": "Hertz"}, {"audio": "De-Herz.ogg", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/5/56/De-Herz.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/5/56/De-Herz.ogg/De-Herz.ogg.mp3"}, {"audio": "De-at-Herz.ogg", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/a/ae/De-at-Herz.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/a/ae/De-at-Herz.ogg/De-at-Herz.ogg.mp3"}], "etymology_text": "From Middle High German herze, from Old High German herza, from Proto-West Germanic *hertā, from Proto-Germanic *hertô (“heart”), from Proto-Indo-European *ḱḗr (“heart”).\nCognate with Dutch hart, English heart, Danish hjerte, Gothic 𐌷𐌰𐌹𐍂𐍄𐍉 (hairtō).", "etymology_templates": [{"name": "inh", "args": {"1": "de", "2": "gmh", "3": "herze"}, "expansion": "Middle High German herze"}, {"name": "inh", "args": {"1": "de", "2": "goh", "3": "herza"}, "expansion": "Old High German herza"}, {"name": "inh", "args": {"1": "de", "2": "gmw-pro", "3": "*hertā"}, "expansion": "Proto-West Germanic *hertā"}, {"name": "inh", "args": {"1": "de", "2": "gem-pro", "3": "*hertô", "4": "", "5": "heart"}, "expansion": "Proto-Germanic *hertô (“heart”)"}, {"name": "der", "args": {"1": "de", "2": "ine-pro", "3": "*ḱḗr", "4": "", "5": "heart"}, "expansion": "Proto-Indo-European *ḱḗr (“heart”)"}, {"name": "cog", "args": {"1": "nl", "2": "hart"}, "expansion": "Dutch hart"}, {"name": "cog", "args": {"1": "en", "2": "heart"}, "expansion": "English heart"}, {"name": "cog", "args": {"1": "da", "2": "hjerte"}, "expansion": "Danish hjerte"}, {"name": "cog", "args": {"1": "got", "2": "𐌷𐌰𐌹𐍂𐍄𐍉"}, "expansion": "Gothic 𐌷𐌰𐌹𐍂𐍄𐍉 (hairtō)"}], "word": "Herz", "lang": "German", "lang_code": "de", "synonyms": [{"word": "Herze", "_dis1": "0 0 0"}, {"tags": ["obsolete"], "word": "Hertz", "_dis1": "0 0 0"}], "derived": [{"word": "jemandem am Herzen liegen", "_dis1": "0 0 0"}, {"word": "Athletenherz", "_dis1": "0 0 0"}, {"word": "beherzt", "_dis1": "0 0 0"}, {"word": "Bruderherz", "_dis1": "0 0 0"}, {"word": "Hand aufs Herz", "_dis1": "0 0 0"}, {"word": "Herz-Jesu-Bild", "_dis1": "0 0 0"}, {"word": "herzallerliebst", "_dis1": "0 0 0"}, {"word": "herzbewegend", "_dis1": "0 0 0"}, {"word": "Herzblatt", "_dis1": "0 0 0"}, {"word": "Herzblut", "_dis1": "0 0 0"}, {"word": "Herzbube", "_dis1": "0 0 0"}, {"word": "Herzchen", "_dis1": "0 0 0"}, {"word": "Herzchirurgie", "_dis1": "0 0 0"}, {"word": "Herzdame", "_dis1": "0 0 0"}, {"word": "herzhaft", "_dis1": "0 0 0"}, {"word": "herzig", "_dis1": "0 0 0"}, {"tags": ["masculine"], "english": "heart attack", "word": "Herzinfarkt", "_dis1": "0 0 0"}, {"word": "Herzinsuffizienz", "_dis1": "0 0 0"}, {"word": "Herzkammer", "_dis1": "0 0 0"}, {"word": "Herzklabaster", "_dis1": "0 0 0"}, {"word": "Herzklaps", "_dis1": "0 0 0"}, {"word": "Herzkönig", "_dis1": "0 0 0"}, {"word": "Herzkrankheit", "_dis1": "0 0 0"}, {"word": "herzlich", "_dis1": "0 0 0"}, {"word": "herzlos", "_dis1": "0 0 0"}, {"word": "Herzrhythmusstörung", "_dis1": "0 0 0"}, {"word": "Herzschild", "_dis1": "0 0 0"}, {"word": "Herzschlag", "_dis1": "0 0 0"}, {"word": "Herzstillstand", "_dis1": "0 0 0"}, {"word": "Herzton", "_dis1": "0 0 0"}, {"word": "Herzversagen", "_dis1": "0 0 0"}, {"word": "kaltherzig", "_dis1": "0 0 0"}, {"word": "Schwesterherz", "_dis1": "0 0 0"}, {"word": "Sportherz", "_dis1": "0 0 0"}, {"word": "Sportlerherz", "_dis1": "0 0 0"}, {"english": "wholeheartedly", "word": "von ganzem Herzen", "_dis1": "0 0 0"}, {"english": "to take to heart", "word": "zu Herzen nehmen", "_dis1": "0 0 0"}], "related": [{"word": "Kardia", "_dis1": "0 0 0"}, {"word": "Karo", "_dis1": "0 0 0"}, {"word": "Pik", "_dis1": "0 0 0"}, {"word": "Schippe", "_dis1": "0 0 0"}, {"word": "Kreuz", "_dis1": "0 0 0"}, {"word": "Treff", "_dis1": "0 0 0"}, {"word": "Rot", "_dis1": "0 0 0"}, {"word": "Schellen", "_dis1": "0 0 0"}, {"word": "Laub", "_dis1": "0 0 0"}, {"word": "Grün", "_dis1": "0 0 0"}, {"word": "Eichel", "_dis1": "0 0 0"}, {"word": "Eckern", "_dis1": "0 0 0"}], "senses": [{"links": [["heart", "heart"]], "glosses": ["heart"], "tags": ["also", "neuter", "rare"], "id": "en-Herz-de-noun-PLloqYII"}, {"links": [["card game", "card game"], ["hearts", "hearts"]], "raw_glosses": ["(card games) hearts"], "topics": ["card-games", "games"], "glosses": ["hearts"], "tags": ["also", "neuter", "rare"], "id": "en-Herz-de-noun-ybX83RAD", "categories": [{"name": "Card games", "kind": "topical", "parents": ["Games", "Recreation", "Human activity", "Human behaviour", "Human", "All topics", "Fundamental"], "source": "w", "orig": "de:Card games", "langcode": "de"}, {"name": "German entries with incorrect language header", "kind": "other", "parents": ["Entries with incorrect language header", "Entry maintenance"], "source": "w+disamb", "_dis": "5 82 13"}, {"name": "Card games", "kind": "topical", "parents": ["Games", "Recreation", "Human activity", "Human behaviour", "Human", "All topics", "Fundamental"], "source": "w+disamb", "orig": "de:Card games", "langcode": "de", "_dis": "2 93 5"}, {"name": "Organs", "kind": "topical", "parents": ["Body parts", "Body", "Anatomy", "Human", "Biology", "Medicine", "All topics", "Sciences", "Fundamental"], "source": "w+disamb", "orig": "de:Organs", "langcode": "de", "_dis": "5 70 25"}]}, {"links": [["sweetheart", "sweetheart"], ["darling", "darling"]], "glosses": ["sweetheart, darling"], "tags": ["also", "neuter", "rare"], "id": "en-Herz-de-noun-oOGg0s0R"}]} {"pos": "noun", "head_templates": [{"name": "de-noun", "args": {"1": "m", "f": "in"}, "expansion": "Fahrer m (strong, genitive Fahrers, plural Fahrer, feminine Fahrerin)"}], "forms": [{"form": "Fahrers", "tags": ["genitive"]}, {"form": "Fahrer", "tags": ["plural"]}, {"form": "Fahrerin", "tags": ["feminine"]}, {"form": "strong", "source": "declension", "tags": ["table-tags"]}, {"form": "de-ndecl", "source": "declension", "tags": ["inflection-template"]}, {"form": "Fahrer", "tags": ["nominative", "singular"], "source": "declension"}, {"form": "Fahrer", "tags": ["definite", "nominative", "plural"], "source": "declension"}, {"form": "Fahrers", "tags": ["genitive", "singular"], "source": "declension"}, {"form": "Fahrer", "tags": ["definite", "genitive", "plural"], "source": "declension"}, {"form": "Fahrer", "tags": ["dative", "singular"], "source": "declension"}, {"form": "Fahrern", "tags": ["dative", "definite", "plural"], "source": "declension"}, {"form": "Fahrer", "tags": ["accusative", "singular"], "source": "declension"}, {"form": "Fahrer", "tags": ["accusative", "definite", "plural"], "source": "declension"}], "inflection_templates": [{"name": "de-ndecl", "args": {"1": "m"}}], "sounds": [{"ipa": "/ˈfaːʁɐ/"}, {"rhymes": "-aːʁɐ"}, {"audio": "De-at-Fahrer.ogg", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/a/aa/De-at-Fahrer.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/a/aa/De-at-Fahrer.ogg/De-at-Fahrer.ogg.mp3"}, {"audio": "De-Fahrer.ogg", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/8/8a/De-Fahrer.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/8/8a/De-Fahrer.ogg/De-Fahrer.ogg.mp3"}], "etymology_text": "fahren (“to drive”) + -er", "etymology_templates": [{"name": "suffix", "args": {"1": "de", "2": "fahren", "3": "er", "gloss1": "to drive"}, "expansion": "fahren (“to drive”) + -er"}], "word": "Fahrer", "lang": "German", "lang_code": "de", "senses": [{"links": [["fahren", "fahren#German"], ["driver", "driver"]], "glosses": ["agent noun of fahren; driver (person)"], "tags": ["agent", "form-of", "masculine", "strong"], "form_of": [{"word": "fahren", "extra": "driver (person)"}], "id": "en-Fahrer-de-noun-yy2lWCGM", "categories": [{"name": "German entries with incorrect language header", "kind": "other", "parents": ["Entries with incorrect language header", "Entry maintenance"], "source": "w"}, {"name": "German terms suffixed with -er", "kind": "other", "parents": [], "source": "w"}], "derived": [{"word": "Autofahrer"}, {"word": "Bahnfahrer"}, {"word": "Busfahrer"}, {"word": "Mondfahrer"}, {"word": "Raumfahrer"}, {"word": "Sonntagsfahrer"}], "related": [{"word": "fahren"}]}]} +{"pos": "prep", "head_templates": [{"name": "head", "args": {"1": "de", "2": "preposition"}, "expansion": "von"}], "forms": [{"form": "von [with dative]", "tags": ["canonical"]}], "sounds": [{"ipa": "/fɔn/"}, {"ipa": "[fɔn]"}, {"audio": "De-von.ogg", "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/b/ba/De-von.ogg", "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/b/ba/De-von.ogg/De-von.ogg.mp3"}], "etymology_text": "From Middle High German von (“from”), from Old High German fon, fona (“from”), from Proto-Germanic *afanē, *fanē, *funē (“from”), compound of *afa (from Proto-Indo-European *h₂epó (“from, off”)) + *ana (from Proto-Indo-European *h₂en- (“on”)). Cognate with Old Saxon fana, fan (“from”), Dutch van (“from; of”), Old Frisian fon (“from”).", "etymology_templates": [{"name": "inh", "args": {"1": "de", "2": "gmh", "3": "von", "4": "", "5": "from"}, "expansion": "Middle High German von (“from”)"}, {"name": "inh", "args": {"1": "de", "2": "goh", "3": "fon"}, "expansion": "Old High German fon"}, {"name": "inh", "args": {"1": "de", "2": "gem-pro", "3": "*afanē"}, "expansion": "Proto-Germanic *afanē"}, {"name": "der", "args": {"1": "de", "2": "ine-pro", "3": "*h₂epó", "4": "", "5": "from, off"}, "expansion": "Proto-Indo-European *h₂epó (“from, off”)"}, {"name": "der", "args": {"1": "de", "2": "ine-pro", "3": "*h₂en-", "4": "", "5": "on"}, "expansion": "Proto-Indo-European *h₂en- (“on”)"}, {"name": "cog", "args": {"1": "osx", "2": "fana"}, "expansion": "Old Saxon fana"}, {"name": "cog", "args": {"1": "nl", "2": "van", "3": "", "4": "from; of"}, "expansion": "Dutch van (“from; of”)"}, {"name": "cog", "args": {"1": "ofs", "2": "fon", "3": "", "4": "from"}, "expansion": "Old Frisian fon (“from”)"}], "word": "von", "lang": "German", "lang_code": "de", "synonyms": [{"tags": ["abbreviation"], "word": "v.", "_dis1": "0 0 0 0 0"}], "derived": [{"word": "vom", "_dis1": "0 0 0 0 0"}, {"word": "wovon", "_dis1": "0 0 0 0 0"}, {"word": "davon", "_dis1": "0 0 0 0 0"}, {"word": "hiervon", "_dis1": "0 0 0 0 0"}, {"word": "voneinander", "_dis1": "0 0 0 0 0"}], "related": [{"word": "-s", "_dis1": "0 0 0 0 0"}], "senses": [{"examples": [{"text": "Ich fahre von Köln nach Hamburg.", "english": "I'm travelling from Cologne to Hamburg.", "type": "example"}, {"text": "Ich hab’s von meiner Schwester gehört.", "english": "I heard it from my sister.", "type": "example"}], "links": [["from", "from"]], "glosses": ["from"], "id": "en-von-de-prep-dYV6RYmZ", "categories": []}, {"examples": [{"text": "das Auto meines Vaters = das Auto von meinem Vater", "english": "my father’s car / the car of my father", "type": "example"}], "links": [["of", "of"]], "glosses": ["of, belonging to (often replacing genitive; see usage note below)"], "id": "en-von-de-prep-dhi3s2Ke", "categories": [{"name": "German usage examples with omitted translation", "kind": "other", "parents": ["Usage examples with omitted translation", "Entry maintenance"], "source": "w"}, {"name": "German entries with incorrect language header", "kind": "other", "parents": ["Entries with incorrect language header", "Entry maintenance"], "source": "w+disamb", "_dis": "13 67 0 4 16"}, {"name": "German prepositions", "kind": "other", "parents": [], "source": "w+disamb", "_dis": "19 30 14 16 20"}]}, {"examples": [{"text": "Das Hotel wird von der Firma bezahlt.", "english": "The hotel is paid for by the company.", "type": "example"}], "links": [["by", "by"]], "synonyms": [{"word": "durch"}], "glosses": ["by (with passive voice)"], "id": "en-von-de-prep-CQir5ilW", "categories": []}, {"examples": [{"text": "Er hat von seiner Jugend erzählt.", "english": "He told about his youth.", "type": "example"}, {"text": "Von dem Nomine Substantivo, oder dem Hauptworte.", "ref": "1796, Abraham Sahlstedt, Schwedische Grammatik nach dem Sprachgebrauch unserer Zeiten, Lübeck & Leipzig, page 259", "english": "About the substantive noun, or the [alternative term]. (headline)", "type": "quotation"}], "links": [["about", "about"], ["of", "of"]], "glosses": ["about, of (a topic)"], "id": "en-von-de-prep-s-HPCjrD", "categories": []}, {"examples": [{"text": "Von welchem Geld soll ich als Arbeitsloser in Urlaub fahren?", "english": "Being unemployed, on what money should I go on holidays?", "type": "example"}, {"text": "Man kann nicht nur von Luft und Liebe leben.", "english": "You can’t live on air and love alone. (proverb)", "type": "example"}], "links": [["on", "on"], ["with", "with"]], "glosses": ["on, with (a resource)"], "id": "en-von-de-prep-QAf~aDYH", "categories": []}]} diff --git a/data/test/tidy/de-en-lemmas.json b/data/test/tidy/de-en-lemmas.json index 9c53bb8..27e4383 100644 --- a/data/test/tidy/de-en-lemmas.json +++ b/data/test/tidy/de-en-lemmas.json @@ -442,5 +442,53 @@ ] } } + }, + "von": { + "von": { + "prep": { + "ipa": [ + { + "ipa": "/fɔn/", + "tags": [] + }, + { + "ipa": "[fɔn]", + "tags": [] + } + ], + "senses": [ + { + "glosses": [ + "from" + ], + "tags": [] + }, + { + "glosses": [ + "of, belonging to (often replacing genitive; see usage note below)" + ], + "tags": [] + }, + { + "glosses": [ + "by (with passive voice)" + ], + "tags": [] + }, + { + "glosses": [ + "about, of (a topic)" + ], + "tags": [] + }, + { + "glosses": [ + "on, with (a resource)" + ], + "tags": [] + } + ] + } + } } } \ No newline at end of file