-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdeclension.py
144 lines (128 loc) · 3.47 KB
/
declension.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# SPDX-FileCopyrightText: 2021 Tuomas Siipola
# SPDX-License-Identifier: MIT
import logging
from wikibaseintegrator import wbi_core, wbi_datatype, wbi_functions
from common import create_login_instance
FIFTH_DECLENSION_EXCEPTIONS = [("mus", "möss"), ("gås", "gäss"), ("man", "män")]
def classify(singular, plural):
"""
>>> classify('flicka', 'flickor')
1
>>> classify('våg', 'vågor')
1
>>> classify('ros', 'rosor')
1
>>> classify('finger', 'fingrar')
2
>>> classify('arm', 'armar')
2
>>> classify('hund', 'hundar')
2
>>> classify('sjö', 'sjöar')
2
>>> classify('pojke', 'pojkar')
2
>>> classify('sjukdom', 'sjukdomar')
2
>>> classify('främling', 'främlingar')
2
>>> classify('afton', 'aftnar')
2
>>> classify('sommar', 'somrar')
2
>>> classify('moder', 'mödrar')
2
>>> classify('mor', 'mödrar')
2
>>> classify('park', 'parker')
3
>>> classify('museum', 'museer')
3
>>> classify('sko', 'skor')
3
>>> classify('fiende', 'fiender')
3
>>> classify('hand', 'händer')
3
>>> classify('land', 'länder')
3
>>> classify('bok', 'böcker')
3
>>> classify('nöt', 'nötter')
3
>>> classify('bi', 'bin')
4
>>> classify('äpple', 'äpplen')
4
>>> classify('öga', 'ögon')
4
>>> classify('öra', 'öron')
4
>>> classify('barn', 'barn')
5
>>> classify('djur', 'djur')
5
>>> classify('lärare', 'lärare')
5
>>> classify('mus', 'möss')
5
>>> classify('gås', 'gäss')
5
>>> classify('man', 'män')
5
"""
if singular == plural:
return 5
for s, p in FIFTH_DECLENSION_EXCEPTIONS:
if singular.endswith(s) and plural.endswith(p):
return 5
if not singular.endswith("o") and plural.endswith("or"):
return 1
if not singular.endswith("a") and plural.endswith("ar"):
return 2
if plural.endswith("r"):
return 3
if plural.endswith("n"):
return 4
return None
DECLENSION_ID = {
1: "Q106602496",
2: "Q106602498",
3: "Q106602499",
4: "Q106602501",
5: "Q106602503",
}
def main():
logging.basicConfig(level=logging.INFO)
login_instance = create_login_instance()
data = wbi_functions.execute_sparql_query(
"""
SELECT ?lexeme ?singular ?plural WHERE {
?lexeme dct:language wd:Q9027;
wikibase:lexicalCategory wd:Q1084;
wikibase:lemma ?singular;
ontolex:lexicalForm ?form.
?form wikibase:grammaticalFeature wd:Q131105, wd:Q146786, wd:Q53997857;
ontolex:representation ?plural.
FILTER(NOT EXISTS { ?lexeme wdt:P5911 []. })
}
LIMIT 1000
"""
)
for row in data["results"]["bindings"]:
lexeme = row["lexeme"]["value"].removeprefix("http://www.wikidata.org/entity/")
singular = row["singular"]["value"]
plural = row["plural"]["value"]
klass = classify(singular, plural)
logging.info(
f"lexeme={lexeme} singular={singular} plural={plural} class={klass}"
)
if not klass:
continue
data = [wbi_datatype.ItemID(value=DECLENSION_ID[klass], prop_nr="P5911")]
item = wbi_core.ItemEngine(item_id=lexeme, data=data)
item.write(
login_instance, edit_summary="add declension [[User:Kriobot#Task_1|#task1]]"
)
if __name__ == "__main__":
main()