Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix missing meanings caused by multiple etymologies #170

Merged
merged 1 commit into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions 3-tidy-up.js
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ lr.on('line', (line) => {
* @param {KaikkiLine} parsedLine
*/
function handleLine(parsedLine) {
const { pos, sounds, forms } = parsedLine;
const { pos, sounds, forms, etymology_number = 0 } = parsedLine;
if(!pos) return;
const word = getCanonicalWordForm(parsedLine);
if (!word) return;
Expand Down Expand Up @@ -177,16 +177,16 @@ function handleLine(parsedLine) {
if (sensesWithoutInflectionGlosses.length === 0) return;

const readings = getReadings(word, parsedLine);
initializeWordResult(word, readings, pos);
initializeWordResult(word, readings, pos, String(etymology_number));

for (const ipaObj of ipa) {
saveIpaResult(word, readings, pos, ipaObj);
saveIpaResult(word, readings, pos, String(etymology_number), ipaObj);
}

const glossTree = getGlossTree(sensesWithoutInflectionGlosses);

for (const reading of readings) {
lemmaDict[word][reading][pos].glossTree = glossTree;
lemmaDict[word][reading][pos][String(etymology_number)].glossTree = glossTree;
}
}

Expand Down Expand Up @@ -273,11 +273,12 @@ function processForms(forms, word, pos) {
* @param {string} word
* @param {string[]} readings
* @param {string} pos
* @param {string} etymology_number
* @param {IpaInfo} ipaObj
*/
function saveIpaResult(word, readings, pos, ipaObj) {
function saveIpaResult(word, readings, pos, etymology_number, ipaObj) {
for (const reading of readings) {
const result = lemmaDict[word][reading][pos];
const result = lemmaDict[word][reading][pos][etymology_number];
const existingIpa = result.ipa.find(obj => obj.ipa === ipaObj.ipa);
if (!existingIpa) {
result.ipa.push(ipaObj);
Expand All @@ -291,10 +292,11 @@ function saveIpaResult(word, readings, pos, ipaObj) {
* @param {string} word
* @param {string[]} readings
* @param {string} pos
* @param {string} etymology_number
*/
function initializeWordResult(word, readings, pos) {
function initializeWordResult(word, readings, pos, etymology_number) {
for (const reading of readings) {
const result = ensureNestedObject(lemmaDict, [word, reading, pos]);
const result = ensureNestedObject(lemmaDict, [word, reading, pos, etymology_number]);
result.ipa ??= [];
result.glossTree ??= new Map();
}
Expand Down
86 changes: 44 additions & 42 deletions 4-make-yomitan.js
Original file line number Diff line number Diff line change
Expand Up @@ -271,50 +271,52 @@ let lastTermBankIndex = 0;

const ipa = [];

for (const [pos, info] of Object.entries(partsOfSpeechOfWord)) {
const foundPos = findPartOfSpeech(pos, partsOfSpeech, skippedPartsOfSpeech);
const {glossTree} = info;

const lemmaTags = [pos];
ipa.push(...info.ipa);

/** @type {Object<string, import('types').TermBank.TermInformation>} */
const entries = {};

for (const [gloss, branches] of glossTree.entries()) {
const tags = branches.get('_tags') || [];
branches.delete('_tags');

const senseTags = [...tags, ...lemmaTags];

/** @type {GlossBranch} */
const syntheticBranch = new Map();
syntheticBranch.set(gloss, branches);
const {glosses, recognizedTags} = handleNest(syntheticBranch, senseTags, pos);
const joinedTags = recognizedTags.join(' ');

if(!glosses || !glosses.length) continue;

if (entries[joinedTags]) {
// entries[joinedTags][5].push(gloss);
entries[joinedTags][5].push(...glosses);
} else {
entries[joinedTags] = [
term, // term
reading !== normalizedLemma ? reading : '', // reading
joinedTags, // definition_tags
foundPos, // rules
0, // frequency
glosses, // definitions
0, // sequence
'', // term_tags
];
for (const [pos, etyms] of Object.entries(partsOfSpeechOfWord)) {
for (const [etym_number, info] of Object.entries(etyms)) {
const foundPos = findPartOfSpeech(pos, partsOfSpeech, skippedPartsOfSpeech);
const {glossTree} = info;

const lemmaTags = [pos];
ipa.push(...info.ipa);

/** @type {Object<string, import('types').TermBank.TermInformation>} */
const entries = {};

for (const [gloss, branches] of glossTree.entries()) {
const tags = branches.get('_tags') || [];
branches.delete('_tags');

const senseTags = [...tags, ...lemmaTags];

/** @type {GlossBranch} */
const syntheticBranch = new Map();
syntheticBranch.set(gloss, branches);
const {glosses, recognizedTags} = handleNest(syntheticBranch, senseTags, pos);
const joinedTags = recognizedTags.join(' ');

if(!glosses || !glosses.length) continue;

if (entries[joinedTags]) {
// entries[joinedTags][5].push(gloss);
entries[joinedTags][5].push(...glosses);
} else {
entries[joinedTags] = [
term, // term
reading !== normalizedLemma ? reading : '', // reading
joinedTags, // definition_tags
foundPos, // rules
0, // frequency
glosses, // definitions
0, // sequence
'', // term_tags
];
}
}
}

debug(entries);
for (const [tags, entry] of Object.entries(entries)) {
ymtLemmas.push(entry);
debug(entries);
for (const [tags, entry] of Object.entries(entries)) {
ymtLemmas.push(entry);
}
}
}

Expand Down
14 changes: 14 additions & 0 deletions data/test/dict/de/en/tag_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,19 @@
-1,
"preposition",
1
],
[
"fem",
"",
-1,
"feminine",
1
],
[
"chem",
"",
0,
"chemistry",
0
]
]
66 changes: 66 additions & 0 deletions data/test/dict/de/en/term_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -1122,5 +1122,71 @@
],
0,
""
],
[
"Base",
"",
"arch fem n",
"n",
0,
[
{
"type": "structured-content",
"content": [
{
"tag": "div",
"content": [
"A female cousin."
]
}
]
}
],
0,
""
],
[
"Base",
"",
"fem obs n",
"n",
0,
[
{
"type": "structured-content",
"content": [
{
"tag": "div",
"content": [
"paternal aunt"
]
}
]
}
],
0,
""
],
[
"Base",
"",
"fem n chem",
"n",
0,
[
{
"type": "structured-content",
"content": [
{
"tag": "div",
"content": [
"base (compound that will neutralize an acid)"
]
}
]
}
],
0,
""
]
]
49 changes: 49 additions & 0 deletions data/test/dict/de/en/term_bank_2.json
Original file line number Diff line number Diff line change
Expand Up @@ -2317,5 +2317,54 @@
],
0,
""
],
[
"Basen",
"",
"non-lemma",
"",
0,
[
[
"Base",
[
"plural"
]
],
[
"Base",
[
"accusative",
"plural",
"definite"
]
],
[
"Base",
[
"dative",
"plural",
"definite"
]
],
[
"Base",
[
"genitive",
"plural",
"definite"
]
],
[
"Base",
[
"nominative",
"plural",
"definite"
]
]
],
0,
""
]
]
13 changes: 13 additions & 0 deletions data/test/ipa/de/en/term_meta_bank_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,18 @@
}
]
}
],
[
"Base",
"ipa",
{
"reading": "Base",
"transcriptions": [
{
"ipa": "/ˈbaːzə/",
"tags": []
}
]
}
]
]
Loading
Loading