Skip to content

Commit

Permalink
add missing inflections that include 'nominative', remove redundant i…
Browse files Browse the repository at this point in the history
…nflections (#63)

* add identity tags

* add infinitive to identity tags

* add redundant tags
  • Loading branch information
StefanVukovic99 authored Jun 11, 2024
1 parent f33d0b1 commit 5774fae
Show file tree
Hide file tree
Showing 12 changed files with 1,239 additions and 583 deletions.
52 changes: 34 additions & 18 deletions 3-tidy-up.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,25 @@ function addDeinflections(form, pos, lemma, inflections) {
const blacklistedTags = [
'inflection-template',
'table-tags',
'nominative',
'canonical',
'class',
'error-unknown-tag',
'error-unrecognized-form',
'infinitive',
'includes-article',
'obsolete',
'archaic',
'used-in-the-form'
'used-in-the-form',
];

const identityTags = [
'nominative',
'singular',
'infinitive',
]

const redundantTags = [
'multiword-construction',
'combined-form'
];

let lineCount = 0;
Expand All @@ -133,21 +142,28 @@ function handleLine(line) {
if (word && pos && senses) {
if (forms) {
forms.forEach((formData) => {
const { form, tags } = formData;

if (form && tags && !tags.some(value => blacklistedTags.includes(value)) && form !== '-') {
const wordMap = automatedForms.get(word) || new Map();
const formMap = wordMap.get(form) || new Map();
formMap.get(pos) || formMap.set(pos, new Set());
wordMap.set(form, formMap);
automatedForms.set(word, wordMap);

const tagsSet = new Set((formMap.get(pos)));

tagsSet.add(sortTags(targetIso, tags).join(' '));

formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet))));
}
const { form } = formData;
let { tags } = formData;
if(!form) return;
if(!tags) return;
if(form === '-') return;
tags = tags.filter(tag => !redundantTags.includes(tag));
const isBlacklisted = tags.some(value => blacklistedTags.includes(value));
if (isBlacklisted) return;
const isIdentity = !tags.some(value => !identityTags.includes(value));
if (isIdentity) return;

const wordMap = automatedForms.get(word) || new Map();
const formMap = wordMap.get(form) || new Map();
formMap.get(pos) || formMap.set(pos, new Set());
wordMap.set(form, formMap);
automatedForms.set(word, wordMap);

const tagsSet = new Set((formMap.get(pos)));

tagsSet.add(sortTags(targetIso, tags).join(' '));

formMap.set(pos, similarSort(mergePersonTags(targetIso, Array.from(tagsSet))));
});
}

Expand Down
10 changes: 3 additions & 7 deletions 4-make-yomitan.js
Original file line number Diff line number Diff line change
Expand Up @@ -290,14 +290,10 @@ let lastTermBankIndex = 0;
if (!gloss) { return []; }

gloss = gloss
.replace(/-automated- /g, '')
if(target_iso === 'en'){
gloss = gloss
.replace(/multiword-construction /g, '')
.replace(/multiword-construction /g, '')

for (const multiwordInflection of multiwordInflections) {
gloss = gloss.replace(new RegExp(multiwordInflection), multiwordInflection.replace(/ /g, '\u00A0'));
}
for (const multiwordInflection of multiwordInflections) {
gloss = gloss.replace(new RegExp(multiwordInflection), multiwordInflection.replace(/ /g, '\u00A0'));
}

// TODO: decide on format for de-de
Expand Down
7 changes: 7 additions & 0 deletions data/test/dict/cs/en/term_bank_2.json
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,13 @@
"plural"
]
],
[
"zpráva",
[
"nominative",
"plural"
]
],
[
"zpráva",
[
Expand Down
Loading

0 comments on commit 5774fae

Please sign in to comment.