-
Notifications
You must be signed in to change notification settings - Fork 0
/
die.py
119 lines (107 loc) · 5.35 KB
/
die.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python
"""A module applying semantic, morphological, and phonological criteria to explain the feminine gender assignment"""
import deepl
from hypernyms import taxonomy
from rules import fem_category_dict, fem_classes, fem_suffixes
license_key = "#" # replace with your own DeepL licence key
deepl_translator = deepl.Translator(license_key)
def fem_rule1(hypernyms: list) -> set:
"""returns an intersection of the semantic categories associated with the feminine noun class and the set of hypernyms generated by the input noun across all of its synsets. To simplify the output, the initially generated classes are also expressed in terms of general semantic categories."""
granular_categories = []
general_categories = []
if (
hypernyms is None
): # handles the None object occasionally generated by taxonomy()
return set()
else:
for category in hypernyms:
if category in fem_classes:
granular_categories.append(category)
for category in granular_categories:
for key, value in fem_category_dict.items():
if category in value:
general_categories.append(key)
return set(general_categories), set(granular_categories)
def fem_rule2(token: str) -> list:
"""checks the input noun for the suffixes associated with the masculine noun class.
Note that there are no predominanty feminine prefixes."""
suffixes = []
for suffix in fem_suffixes:
if token.endswith(suffix):
suffixes.append("-" + suffix)
if suffixes:
return max(
suffixes, key=len
) # In case of nested suffixes, only the longest will be returned
def fem_evaluate(lemmatized: str, hypernyms: list, parsed_base: str) -> None:
"""the fucntion takes the noun and a set of hypernyms generated over all noun synsets and determines if any of the hypernyms are affiliated with the feminine noun class. It then performs simple morphological analysis by checking if the noun contains the prefixes and suffixes associated with the feminine gender."""
fem_flag = False
print(f"The noun '{lemmatized}' is feminine.")
# check the semantic taxonomy
if hypernyms: # check if it generated hypernyms
semantic_general, semantic_granular = fem_rule1(
hypernyms
) # generate the taxonomy
if semantic_granular:
print(
f"It belongs to the following predominantly feminine semantic categories: {', '.join(semantic_granular)}"
)
print(
f"The above classification can be expressed in terms of the following general semantic categories: {', '.join(semantic_general)}"
)
fem_flag = True
else:
print(
"Grammatical gender assignment could not be determined based on the semantic category alone."
)
elif (
not hypernyms
): # if the word returns no hypernyms, see if its parsed base returns them
if parsed_base: # checks if the parser returns a base in the first place
parsed_translation = deepl_translator.translate_text(
parsed_base, source_lang="DE", target_lang="EN-US"
)
translated_base = parsed_translation.text.casefold()
base_hypernyms = taxonomy(
translated_base
) # generate all possible hypernyms
if base_hypernyms:
base_semantic_general, base_semantic_granular = fem_rule1(
base_hypernyms
)
if base_semantic_granular:
print(f"Couldn't find any semantic categories for '{lemmatized}'.")
print(
f"The base noun '{parsed_base}' belongs to the following predominantly feminine semantic categories: {', '.join(base_semantic_granular)}"
)
print(
f"The above classification can be expressed in terms of the following general semantic categories: {', '.join(base_semantic_general)}"
)
fem_flag = True
else:
print(
f"Couldn't find any semantic categories for '{lemmatized}'. There don't seem to be any predomiantly feminine semantic categories to which the base noun '{parsed_base}' blelongs."
)
else:
print(f"Couldn't generate any semantic categories for '{parsed_base}'.")
else:
print(f"Couldn't parse '{lemmatized}'.")
elif hypernyms is None:
print(f"Couldn't generate a semantic taxonomy for '{lemmatized}'.")
# check the morphology
morphological = fem_rule2(lemmatized)
if parsed_base:
print(f"'{lemmatized}' has the following feminine base noun: '{parsed_base}'.")
if morphological:
print(f"The noun has the following feminine suffixes: {morphological}")
fem_flag = True
else:
print(
"Grammatical gender assignment cannot be determined based on the noun's suffixes alone."
)
# print this if none of the above applies
if fem_flag == False:
print(
f"The grammatical gender of '{lemmatized}' cannot be explained with the available rules."
)
print("For better or worse, it has to be memorized")