diff --git a/src/wiktextract/extractor/de/linkage.py b/src/wiktextract/extractor/de/linkage.py
index 5805c35e..40d1d924 100644
--- a/src/wiktextract/extractor/de/linkage.py
+++ b/src/wiktextract/extractor/de/linkage.py
@@ -2,7 +2,8 @@
from wikitextprocessor import NodeKind, WikiNode
from wikitextprocessor.parser import LevelNode
-from wiktextract.extractor.de.models import WordEntry
+
+from wiktextract.extractor.de.models import Linkage, WordEntry
from wiktextract.extractor.share import split_senseids
from wiktextract.page import clean_node
from wiktextract.wxr_context import WiktextractContext
@@ -25,7 +26,7 @@ def extract_linkages(
)
# Extract links
- linkages: list[str] = []
+ linkages: list[Linkage] = []
if linkage_type == "expressions":
for child in list_item.children:
if isinstance(child, str) and contains_dash(child):
@@ -90,12 +91,12 @@ def extract_linkages(
def process_link(
- wxr: WiktextractContext, semantic_links: list[str], link: WikiNode
+ wxr: WiktextractContext, semantic_links: list[Linkage], link: WikiNode
):
clean_link = clean_node(wxr, {}, link)
if clean_link.startswith("Verzeichnis:"):
return
- semantic_links.append(clean_link)
+ semantic_links.append(Linkage(word=clean_link))
def contains_dash(text: str):
diff --git a/src/wiktextract/extractor/de/models.py b/src/wiktextract/extractor/de/models.py
index 137442b3..f6ecaa6b 100644
--- a/src/wiktextract/extractor/de/models.py
+++ b/src/wiktextract/extractor/de/models.py
@@ -7,6 +7,10 @@ class BaseModelWrap(BaseModel):
model_config = ConfigDict(validate_assignment=True, extra="forbid")
+class Linkage(BaseModelWrap):
+ word: str
+
+
class Translation(BaseModelWrap):
sense: Optional[str] = Field(
default=None, description="A gloss of the sense being translated"
@@ -16,7 +20,7 @@ class Translation(BaseModelWrap):
default=None,
description="Wiktionary language code of the translation term",
)
- lang_name: Optional[str] = Field(
+ lang: Optional[str] = Field(
default=None, description="Localized language name"
)
uncertain: Optional[bool] = Field(
@@ -120,15 +124,15 @@ class Sense(BaseModelWrap):
default=None, description="Sense number used in Wiktionary"
)
translations: Optional[list[Translation]] = []
- antonyms: Optional[list[str]] = []
- derived: Optional[list[str]] = []
- hyponyms: Optional[list[str]] = []
- hypernyms: Optional[list[str]] = []
- holonyms: Optional[list[str]] = []
- expressions: Optional[list[str]] = []
- coordinate_terms: Optional[list[str]] = []
- proverbs: Optional[list[str]] = []
- synonyms: Optional[list[str]] = []
+ antonyms: Optional[list[Linkage]] = []
+ derived: Optional[list[Linkage]] = []
+ hyponyms: Optional[list[Linkage]] = []
+ hypernyms: Optional[list[Linkage]] = []
+ holonyms: Optional[list[Linkage]] = []
+ expressions: Optional[list[Linkage]] = []
+ coordinate_terms: Optional[list[Linkage]] = []
+ proverbs: Optional[list[Linkage]] = []
+ synonyms: Optional[list[Linkage]] = []
class Sound(BaseModelWrap):
@@ -147,9 +151,7 @@ class Sound(BaseModelWrap):
lang_code: list[str] = Field(
default=[], description="Wiktionary language code"
)
- lang_name: list[str] = Field(
- default=[], description="Localized language name"
- )
+ lang: list[str] = Field(default=[], description="Localized language name")
# roman: list[str] = Field(
# default=[], description="Translitaration to Roman characters"
# )
@@ -175,7 +177,7 @@ class WordEntry(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code", examples=["es"]
)
- lang_name: str = Field(
+ lang: str = Field(
description="Localized language name of the word", examples=["español"]
)
senses: Optional[list[Sense]] = []
@@ -185,12 +187,12 @@ class WordEntry(BaseModelWrap):
# )
translations: Optional[list[Translation]] = []
sounds: Optional[list[Sound]] = []
- antonyms: Optional[list[str]] = []
- derived: Optional[list[str]] = []
- hyponyms: Optional[list[str]] = []
- hypernyms: Optional[list[str]] = []
- holonyms: Optional[list[str]] = []
- expressions: Optional[list[str]] = []
- coordinate_terms: Optional[list[str]] = []
- proverbs: Optional[list[str]] = []
- synonyms: Optional[list[str]] = []
+ antonyms: Optional[list[Linkage]] = []
+ derived: Optional[list[Linkage]] = []
+ hyponyms: Optional[list[Linkage]] = []
+ hypernyms: Optional[list[Linkage]] = []
+ holonyms: Optional[list[Linkage]] = []
+ expressions: Optional[list[Linkage]] = []
+ coordinate_terms: Optional[list[Linkage]] = []
+ proverbs: Optional[list[Linkage]] = []
+ synonyms: Optional[list[Linkage]] = []
diff --git a/src/wiktextract/extractor/de/page.py b/src/wiktextract/extractor/de/page.py
index 52993e1c..eda76540 100644
--- a/src/wiktextract/extractor/de/page.py
+++ b/src/wiktextract/extractor/de/page.py
@@ -272,15 +272,15 @@ def parse_page(
for level2_node in tree.find_child(NodeKind.LEVEL2):
for subtitle_template in level2_node.find_content(NodeKind.TEMPLATE):
# The language sections are marked with
- # ==
({{Sprache|}}) ==
- # where is the title of the page and is the
+ # == ({{Sprache|}}) ==
+ # where is the title of the page and is the
# German name of the language of the section.
if subtitle_template.template_name == "Sprache":
- lang_name = subtitle_template.template_parameters.get(1)
- lang_code = name_to_code(lang_name, "de")
+ lang = subtitle_template.template_parameters.get(1)
+ lang_code = name_to_code(lang, "de")
if lang_code == "":
wxr.wtp.warning(
- f"Unknown language: {lang_name}",
+ f"Unknown language: {lang}",
sortid="extractor/de/page/parse_page/76",
)
if (
@@ -290,7 +290,7 @@ def parse_page(
continue
base_data = WordEntry(
- lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
+ lang=lang, lang_code=lang_code, word=wxr.wtp.title
)
parse_section(wxr, page_data, base_data, level2_node.children)
diff --git a/src/wiktextract/extractor/de/pronunciation.py b/src/wiktextract/extractor/de/pronunciation.py
index 545d016f..43563fa2 100644
--- a/src/wiktextract/extractor/de/pronunciation.py
+++ b/src/wiktextract/extractor/de/pronunciation.py
@@ -99,14 +99,14 @@ def process_lautschrift_template(
lang_code = template_parameters.get("spr")
if lang_code:
- lang_name = code_to_name(lang_code, "de")
+ lang = code_to_name(lang_code, "de")
add_sound_data_without_appending_to_existing_properties(
wxr,
sound_data,
{
"ipa": [ipa],
"lang_code": lang_code,
- "lang_name": lang_name,
+ "lang": lang,
},
)
else:
diff --git a/src/wiktextract/extractor/de/translation.py b/src/wiktextract/extractor/de/translation.py
index 4f9c744b..f2dcfc43 100644
--- a/src/wiktextract/extractor/de/translation.py
+++ b/src/wiktextract/extractor/de/translation.py
@@ -103,10 +103,10 @@ def process_translation_list(
lang_code = node.template_parameters.get(1)
translation_data.lang_code = lang_code
- translation_data.lang_name = code_to_name(lang_code, "de")
- if translation_data.lang_name == "":
+ translation_data.lang = code_to_name(lang_code, "de")
+ if translation_data.lang == "":
wxr.wtp.debug(
- f"Unknown language code: {translation_data.lang_name}",
+ f"Unknown language code: {translation_data.lang}",
sortid="extractor/de/translation/process_translation_list/70",
)
if node.template_name[-1] == "?":
diff --git a/src/wiktextract/extractor/es/gloss.py b/src/wiktextract/extractor/es/gloss.py
index 47924a61..b8bb567c 100644
--- a/src/wiktextract/extractor/es/gloss.py
+++ b/src/wiktextract/extractor/es/gloss.py
@@ -2,6 +2,7 @@
from wikitextprocessor import NodeKind, WikiNode
from wikitextprocessor.parser import WikiNodeChildrenList
+
from wiktextract.extractor.es.models import Sense, WordEntry
from wiktextract.extractor.es.sense_data import process_sense_data_list
from wiktextract.page import clean_node
@@ -38,7 +39,7 @@ def extract_gloss(
match = re.match(r"^(\d+)", gloss_note)
if match:
- gloss_data.senseid = int(match.group(1))
+ gloss_data.senseid = match.group(1)
tag_string = gloss_note[len(match.group(1)) :].strip()
else:
tag_string = gloss_note.strip()
diff --git a/src/wiktextract/extractor/es/models.py b/src/wiktextract/extractor/es/models.py
index 54c7e261..77ed1026 100644
--- a/src/wiktextract/extractor/es/models.py
+++ b/src/wiktextract/extractor/es/models.py
@@ -93,7 +93,7 @@ class Sense(BaseModelWrap):
# subsenses: list["Sense"] = Field(
# default=[], description="List of subsenses"
# )
- senseid: Optional[int] = Field(
+ senseid: Optional[str] = Field(
default=None, description="Sense number used in Wiktionary"
)
antonyms: Optional[list[Linkage]] = []
@@ -156,7 +156,7 @@ class WordEntry(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code", examples=["es"]
)
- lang_name: str = Field(
+ lang: str = Field(
description="Localized language name of the word", examples=["español"]
)
senses: Optional[list[Sense]] = []
diff --git a/src/wiktextract/extractor/es/page.py b/src/wiktextract/extractor/es/page.py
index 14d4a4f9..6602cc38 100644
--- a/src/wiktextract/extractor/es/page.py
+++ b/src/wiktextract/extractor/es/page.py
@@ -4,6 +4,7 @@
from wikitextprocessor import NodeKind, WikiNode
from wikitextprocessor.parser import WikiNodeChildrenList
+
from wiktextract.extractor.es.etymology import process_etymology_block
from wiktextract.extractor.es.example import extract_example
from wiktextract.extractor.es.gloss import extract_gloss
@@ -368,10 +369,10 @@ def parse_page(
):
continue
- lang_name = clean_node(wxr, categories, subtitle_template)
- wxr.wtp.start_section(lang_name)
+ lang = clean_node(wxr, categories, subtitle_template)
+ wxr.wtp.start_section(lang)
base_data = WordEntry(
- lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
+ lang=lang, lang_code=lang_code, word=wxr.wtp.title
)
base_data.categories.extend(categories["categories"])
parse_entries(wxr, page_data, base_data, level2_node)
diff --git a/src/wiktextract/extractor/ru/models.py b/src/wiktextract/extractor/ru/models.py
index 07e7aab7..0f431fdd 100644
--- a/src/wiktextract/extractor/ru/models.py
+++ b/src/wiktextract/extractor/ru/models.py
@@ -12,7 +12,7 @@ class Translation(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code of the translation term"
)
- lang_name: str = Field(
+ lang: str = Field(
description="Localized language name of the translation term"
)
sense: Optional[str] = Field(
@@ -112,7 +112,7 @@ class WordEntry(BaseModelWrap):
lang_code: str = Field(
description="Wiktionary language code", examples=["ru"]
)
- lang_name: str = Field(
+ lang: str = Field(
description="Localized language name of the word", examples=["Русский"]
)
categories: list[str] = Field(
diff --git a/src/wiktextract/extractor/ru/page.py b/src/wiktextract/extractor/ru/page.py
index c63567c1..de225dda 100644
--- a/src/wiktextract/extractor/ru/page.py
+++ b/src/wiktextract/extractor/ru/page.py
@@ -202,11 +202,11 @@ def parse_page(
categories = {"categories": []}
- lang_name = clean_node(wxr, categories, subtitle_template)
- wxr.wtp.start_section(lang_name)
+ lang = clean_node(wxr, categories, subtitle_template)
+ wxr.wtp.start_section(lang)
base_data = WordEntry(
- lang_name=lang_name, lang_code=lang_code, word=wxr.wtp.title
+ lang=lang, lang_code=lang_code, word=wxr.wtp.title
)
base_data.categories.extend(categories["categories"])
diff --git a/src/wiktextract/extractor/ru/translation.py b/src/wiktextract/extractor/ru/translation.py
index e1f1a0b0..15c9c2b7 100644
--- a/src/wiktextract/extractor/ru/translation.py
+++ b/src/wiktextract/extractor/ru/translation.py
@@ -20,7 +20,7 @@ def extract_translations(
for key, raw_value in template_node.template_parameters.items():
if isinstance(key, str):
lang_code = key
- lang_name = code_to_name(lang_code, "ru")
+ lang = code_to_name(lang_code, "ru")
for value_node in (
raw_value
@@ -36,7 +36,7 @@ def extract_translations(
word_entry.translations.append(
Translation(
lang_code=lang_code,
- lang_name=lang_name,
+ lang=lang,
word=word,
sense=sense if sense else None,
)
diff --git a/tests/test_de_example.py b/tests/test_de_example.py
index 29a903f6..a95667f0 100644
--- a/tests/test_de_example.py
+++ b/tests/test_de_example.py
@@ -20,7 +20,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_page_data(self) -> list[WordEntry]:
- return [WordEntry(word="Beispiel", lang_code="de", lang_name="Deutsch")]
+ return [WordEntry(word="Beispiel", lang_code="de", lang="Deutsch")]
def test_de_extract_examples(self):
self.wxr.wtp.start_page("")
diff --git a/tests/test_de_gloss.py b/tests/test_de_gloss.py
index a79f8ee9..6eeb264f 100644
--- a/tests/test_de_gloss.py
+++ b/tests/test_de_gloss.py
@@ -28,7 +28,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_word_entry(self):
- return WordEntry(lang_code="de", lang_name="Deutsch", word="Beispiel")
+ return WordEntry(lang_code="de", lang="Deutsch", word="Beispiel")
def test_de_extract_glosses(self):
self.wxr.wtp.start_page("")
diff --git a/tests/test_de_linkages.py b/tests/test_de_linkages.py
index 70a73a31..7de21683 100644
--- a/tests/test_de_linkages.py
+++ b/tests/test_de_linkages.py
@@ -20,7 +20,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_word_entry(self) -> WordEntry:
- return WordEntry(word="Beispiel", lang_code="de", lang_name="Deutsch")
+ return WordEntry(word="Beispiel", lang_code="de", lang="Deutsch")
def test_de_extract_linkages(self):
test_cases = [
@@ -33,11 +33,17 @@ def test_de_extract_linkages(self):
"senses": [
{
"senseid": "1",
- "coordinate_terms": ["Beleg", "Exempel"],
+ "coordinate_terms": [
+ {"word": "Beleg"},
+ {"word": "Exempel"},
+ ],
},
{
"senseid": "2",
- "coordinate_terms": ["Muster", "Vorbild"],
+ "coordinate_terms": [
+ {"word": "Muster"},
+ {"word": "Vorbild"},
+ ],
},
]
},
@@ -50,7 +56,9 @@ def test_de_extract_linkages(self):
"expected": {
"senses": [
{
- "expressions": ["ein gutes Beispiel geben"],
+ "expressions": [
+ {"word": "ein gutes Beispiel geben"}
+ ],
}
]
},
@@ -60,7 +68,9 @@ def test_de_extract_linkages(self):
"input": "====Synonyme====\n:[[Synonym1]]",
"senses": [Sense(senseid="1")],
"expected": {
- "senses": [{"senseid": "1", "synonyms": ["Synonym1"]}],
+ "senses": [
+ {"senseid": "1", "synonyms": [{"word": "Synonym1"}]}
+ ],
},
},
# https://de.wiktionary.org/wiki/Kokospalme
@@ -73,8 +83,8 @@ def test_de_extract_linkages(self):
{
"senseid": "1",
"synonyms": [
- "Kokosnusspalme",
- "Cocos nucifera",
+ {"word": "Kokosnusspalme"},
+ {"word": "Cocos nucifera"},
],
}
],
@@ -95,7 +105,7 @@ def test_de_extract_linkages(self):
self.assertEqual(
word_entry.model_dump(
exclude_defaults=True,
- exclude={"word", "lang_code", "lang_name"},
+ exclude={"word", "lang_code", "lang"},
),
case["expected"],
)
diff --git a/tests/test_de_page.py b/tests/test_de_page.py
index 33f0e64f..77e11e7a 100644
--- a/tests/test_de_page.py
+++ b/tests/test_de_page.py
@@ -29,7 +29,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_base_data(self):
- return WordEntry(lang_code="de", lang_name="Deutsch", word="Beispiel")
+ return WordEntry(lang_code="de", lang="Deutsch", word="Beispiel")
def test_de_parse_page(self):
self.wxr.wtp.add_page("Vorlage:Sprache", 10, "")
@@ -45,7 +45,7 @@ def test_de_parse_page(self):
lst,
[
{
- "lang_name": "Deutsch",
+ "lang": "Deutsch",
"lang_code": "de",
"word": "Beispiel",
"pos": "noun",
@@ -71,7 +71,7 @@ def test_de_parse_page_skipping_head_templates(self):
lst,
[
{
- "lang_name": "Deutsch",
+ "lang": "Deutsch",
"lang_code": "de",
"word": "Beispiel",
"pos": "noun",
@@ -104,7 +104,7 @@ def test_de_parse_section(self):
{
"word": "Beispiel",
"lang_code": "de",
- "lang_name": "Deutsch",
+ "lang": "Deutsch",
"pos": "adj",
"senses": [
{
@@ -118,7 +118,7 @@ def test_de_parse_section(self):
"word": "Beispiel",
"lang_code": "de",
"pos": "adv",
- "lang_name": "Deutsch",
+ "lang": "Deutsch",
"senses": [
{
"glosses": ["gloss1"],
@@ -131,7 +131,7 @@ def test_de_parse_section(self):
"word": "Beispiel",
"lang_code": "de",
"pos": "verb",
- "lang_name": "Deutsch",
+ "lang": "Deutsch",
"senses": [
{
"glosses": ["gloss2"],
@@ -144,7 +144,7 @@ def test_de_parse_section(self):
"word": "Beispiel",
"lang_code": "de",
"pos": "noun",
- "lang_name": "Deutsch",
+ "lang": "Deutsch",
"senses": [
{
"glosses": ["gloss3"],
diff --git a/tests/test_de_pronunciation.py b/tests/test_de_pronunciation.py
index ccf288de..19047dc8 100644
--- a/tests/test_de_pronunciation.py
+++ b/tests/test_de_pronunciation.py
@@ -4,8 +4,10 @@
from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.de.models import Sound
-from wiktextract.extractor.de.pronunciation import (process_hoerbeispiele,
- process_ipa)
+from wiktextract.extractor.de.pronunciation import (
+ process_hoerbeispiele,
+ process_ipa,
+)
from wiktextract.wxr_context import WiktextractContext
@@ -35,7 +37,7 @@ def test_de_process_ipa(self):
"expected": [
{
"ipa": ["ipa1"],
- "lang_name": ["Deutsch"],
+ "lang": ["Deutsch"],
"lang_code": ["de"],
}
],
@@ -46,7 +48,7 @@ def test_de_process_ipa(self):
{"ipa": ["ipa1", "ipa2"]},
{
"ipa": ["ipa3"],
- "lang_name": ["Deutsch"],
+ "lang": ["Deutsch"],
"lang_code": ["de"],
},
],
diff --git a/tests/test_de_translation.py b/tests/test_de_translation.py
index 5bd65ed3..21e20493 100644
--- a/tests/test_de_translation.py
+++ b/tests/test_de_translation.py
@@ -4,8 +4,10 @@
from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.de.models import Sense, Translation, WordEntry
-from wiktextract.extractor.de.translation import (extract_translation,
- process_translation_list)
+from wiktextract.extractor.de.translation import (
+ extract_translation,
+ process_translation_list,
+)
from wiktextract.wxr_context import WiktextractContext
@@ -21,7 +23,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_word_entry(self):
- return WordEntry(word="Beispiel", lang_code="de", lang_name="Deutsch")
+ return WordEntry(word="Beispiel", lang_code="de", lang="Deutsch")
def test_de_extract_translation(self):
test_cases = [
@@ -37,7 +39,7 @@ def test_de_extract_translation(self):
{
"sense": "Beispiel",
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "example",
}
],
@@ -59,7 +61,7 @@ def test_de_extract_translation(self):
{
"sense": "Beispiel",
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "example",
}
],
@@ -79,7 +81,7 @@ def test_de_extract_translation(self):
{
"sense": "Beispiel",
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "example",
}
],
@@ -100,7 +102,7 @@ def test_de_extract_translation(self):
self.assertEqual(
word_entry.model_dump(
exclude_defaults=True,
- exclude={"word", "lang_code", "lang_name"},
+ exclude={"word", "lang_code", "lang"},
),
case["expected"],
)
@@ -114,7 +116,7 @@ def test_de_process_translation_list(self):
"expected_sense_translations": [
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "example",
}
],
@@ -126,7 +128,7 @@ def test_de_process_translation_list(self):
"expected_sense_translations": [
{
"lang_code": "hy",
- "lang_name": "Armenisch",
+ "lang": "Armenisch",
"word": "օրինակ",
"roman": "orinak",
}
@@ -140,7 +142,7 @@ def test_de_process_translation_list(self):
"expected_sense_translations": [
{
"lang_code": "ru",
- "lang_name": "Russisch",
+ "lang": "Russisch",
"word": "пример",
"roman": "primer",
}
@@ -154,7 +156,7 @@ def test_de_process_translation_list(self):
"expected_sense_translations": [
{
"lang_code": "ar",
- "lang_name": "Arabisch",
+ "lang": "Arabisch",
"word": "عريضة",
"uncertain": True,
}
@@ -201,12 +203,12 @@ def test_de_process_translation_list_with_modifiers(self):
"expected_sense_translations": [
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "instance",
},
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "model",
"tags": ["Vorbild"],
},
@@ -220,7 +222,7 @@ def test_de_process_translation_list_with_modifiers(self):
"expected_sense_translations": [
{
"lang_code": "fr",
- "lang_name": "Französisch",
+ "lang": "Französisch",
"word": "exemple",
"tags": ["m"],
}
@@ -234,19 +236,19 @@ def test_de_process_translation_list_with_modifiers(self):
"expected_sense_translations": [
{
"lang_code": "la",
- "lang_name": "Latein",
+ "lang": "Latein",
"word": "crus",
"tags": ["f"],
},
{
"lang_code": "la",
- "lang_name": "Latein",
+ "lang": "Latein",
"word": "camba",
"tags": ["vulgärlateinisch", "f"],
},
{
"lang_code": "la",
- "lang_name": "Latein",
+ "lang": "Latein",
"word": "gamba",
"tags": ["vulgärlateinisch", "f"],
},
@@ -262,30 +264,30 @@ def test_de_process_translation_list_with_modifiers(self):
"expected_sense_translations": [
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "subscription",
"tags": ["[1a]"],
},
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "dues",
},
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "membership fee",
"tags": ["[1", "2]"],
},
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "contribution",
"tags": ["[3]"],
},
{
"lang_code": "en",
- "lang_name": "Englisch",
+ "lang": "Englisch",
"word": "article",
},
],
diff --git a/tests/test_es_etymology.py b/tests/test_es_etymology.py
index fccd3484..dbe69b37 100644
--- a/tests/test_es_etymology.py
+++ b/tests/test_es_etymology.py
@@ -1,6 +1,7 @@
import unittest
from wikitextprocessor import Wtp
+
from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.es.etymology import process_etymology_block
from wiktextract.extractor.es.models import WordEntry
@@ -84,15 +85,13 @@ def test_es_extract_etymology(self):
with self.subTest(case=case):
self.wxr.wtp.start_page("")
root = self.wxr.wtp.parse(case["input"])
- data = WordEntry(
- word="test", lang_code="es", lang_name="Español"
- )
+ data = WordEntry(word="test", lang_code="es", lang="Español")
process_etymology_block(self.wxr, data, root)
case["expected"].update(
{
"word": "test",
"lang_code": "es",
- "lang_name": "Español",
+ "lang": "Español",
}
)
self.assertEqual(
diff --git a/tests/test_es_gloss.py b/tests/test_es_gloss.py
index aed14433..5105821a 100644
--- a/tests/test_es_gloss.py
+++ b/tests/test_es_gloss.py
@@ -2,6 +2,7 @@
from typing import List
from wikitextprocessor import Wtp
+
from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.es.gloss import extract_gloss
from wiktextract.extractor.es.models import WordEntry
@@ -19,7 +20,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_page_data(self) -> List[WordEntry]:
- return [WordEntry(word="test", lang_code="es", lang_name="Language")]
+ return [WordEntry(word="test", lang_code="es", lang="Language")]
def test_es_extract_glosses(self):
# https://es.wiktionary.org/wiki/ayudar
@@ -43,13 +44,13 @@ def test_es_extract_glosses(self):
"glosses": [
"Contribuir esfuerzo o recursos para la realización de algo."
],
- "senseid": 1,
+ "senseid": "1",
},
{
"glosses": [
"Por antonomasia, cooperar a que alguno salga de una situación dificultosa"
],
- "senseid": 2,
+ "senseid": "2",
},
],
)
@@ -79,7 +80,7 @@ def test_es_extract_gloss_categories(self):
"glosses": [
"Sentimiento afectivo de atracción, unión y afinidad que se experimenta hacia una persona, animal o cosa"
],
- "senseid": 1,
+ "senseid": "1",
"tags": ["Humanidades"],
"categories": ["ES:Sentimientos"],
}
diff --git a/tests/test_es_page.py b/tests/test_es_page.py
index aeda2e44..9e98a043 100644
--- a/tests/test_es_page.py
+++ b/tests/test_es_page.py
@@ -1,6 +1,7 @@
import unittest
from wikitextprocessor import Wtp
+
from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.es.models import WordEntry
from wiktextract.extractor.es.page import parse_entries
@@ -18,7 +19,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_page_data(self) -> list[WordEntry]:
- return [WordEntry(word="test", lang_code="es", lang_name="Language")]
+ return [WordEntry(word="test", lang_code="es", lang="Language")]
def test_es_parse_entries(self):
"""
diff --git a/tests/test_es_pronunciation.py b/tests/test_es_pronunciation.py
index 7a57f54c..6cc44fbe 100644
--- a/tests/test_es_pronunciation.py
+++ b/tests/test_es_pronunciation.py
@@ -1,6 +1,7 @@
import unittest
from wikitextprocessor import Wtp
+
from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.es.models import WordEntry
from wiktextract.extractor.es.pronunciation import (
@@ -22,7 +23,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_page_data(self) -> list[WordEntry]:
- return [WordEntry(word="test", lang_code="es", lang_name="Language")]
+ return [WordEntry(word="test", lang_code="es", lang="Language")]
def test_es_extract_pronunciation(self):
# Test cases taken from https://es.wiktionary.org/wiki/Plantilla:pron-graf
diff --git a/tests/test_es_translation.py b/tests/test_es_translation.py
index caa0fb5c..a902e95d 100644
--- a/tests/test_es_translation.py
+++ b/tests/test_es_translation.py
@@ -1,6 +1,7 @@
import unittest
from wikitextprocessor import Wtp
+
from wiktextract.config import WiktionaryConfig
from wiktextract.extractor.es.models import WordEntry
from wiktextract.extractor.es.translation import extract_translation
@@ -22,7 +23,7 @@ def get_default_page_data(self) -> list[WordEntry]:
WordEntry(
word="test",
lang_code="es",
- lang_name="Language",
+ lang="Language",
)
]
diff --git a/tests/test_ru_gloss.py b/tests/test_ru_gloss.py
index 7b708371..f7b8f7f6 100644
--- a/tests/test_ru_gloss.py
+++ b/tests/test_ru_gloss.py
@@ -19,7 +19,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_page_data(self) -> list[WordEntry]:
- return [WordEntry(word="пример", lang_code="ru", lang_name="Русский")]
+ return [WordEntry(word="пример", lang_code="ru", lang="Русский")]
def test_ru_extract_gloss(self):
# https://ru.wiktionary.org/wiki/овощ
diff --git a/tests/test_ru_page.py b/tests/test_ru_page.py
index 9d34857c..94df766d 100644
--- a/tests/test_ru_page.py
+++ b/tests/test_ru_page.py
@@ -20,7 +20,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
# def get_default_page_data(self) -> list[WordEntry]:
- # return [WordEntry(word="test", lang_code="es", lang_name="Language")]
+ # return [WordEntry(word="test", lang_code="es", lang="Language")]
def test_ru_parse_page_1(self):
# Navigates homonyms/homographs
diff --git a/tests/test_ru_pronunciation.py b/tests/test_ru_pronunciation.py
index 9a29ff4f..e6422c09 100644
--- a/tests/test_ru_pronunciation.py
+++ b/tests/test_ru_pronunciation.py
@@ -27,7 +27,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_word_entry(self) -> WordEntry:
- return WordEntry(word="тест", lang_code="ru", lang_name="русский")
+ return WordEntry(word="тест", lang_code="ru", lang="русский")
def process_template_and_assert(
self,
diff --git a/tests/test_ru_translation.py b/tests/test_ru_translation.py
index 7fdd5595..acf9fa70 100644
--- a/tests/test_ru_translation.py
+++ b/tests/test_ru_translation.py
@@ -19,7 +19,7 @@ def tearDown(self) -> None:
self.wxr.wtp.close_db_conn()
def get_default_word_entry(self) -> WordEntry:
- return WordEntry(word="test", lang_code="ru", lang_name="русский")
+ return WordEntry(word="test", lang_code="ru", lang="русский")
def test_ru_extract_gloss(self):
# Test cases adapted from: https://ru.wiktionary.org/wiki/дом
@@ -41,9 +41,9 @@ def test_ru_extract_gloss(self):
{
"word": "house",
"lang_code": "en",
- "lang_name": "английский",
+ "lang": "английский",
},
- {"word": "بيت", "lang_code": "ar", "lang_name": "арабский"},
+ {"word": "بيت", "lang_code": "ar", "lang": "арабский"},
],
},
{
@@ -53,13 +53,13 @@ def test_ru_extract_gloss(self):
{
"word": "house",
"lang_code": "en",
- "lang_name": "английский",
+ "lang": "английский",
"sense": "сооружение",
},
{
"word": "بيت",
"lang_code": "ar",
- "lang_name": "арабский",
+ "lang": "арабский",
"sense": "сооружение",
},
],
@@ -70,22 +70,22 @@ def test_ru_extract_gloss(self):
{
"word": "ti",
"lang_code": "br",
- "lang_name": "бретонский",
+ "lang": "бретонский",
},
{
"word": "αὐλή",
"lang_code": "grc",
- "lang_name": "древнегреческий",
+ "lang": "древнегреческий",
},
{
"word": "δόμος",
"lang_code": "grc",
- "lang_name": "древнегреческий",
+ "lang": "древнегреческий",
},
{
"word": "δῶμα",
"lang_code": "grc",
- "lang_name": "древнегреческий",
+ "lang": "древнегреческий",
},
],
},