yomidevs · StefanVukovic99 · Nov 19, 2024 · Nov 19, 2024
diff --git a/3-tidy-up.js b/3-tidy-up.js
@@ -124,7 +124,7 @@ lr.on('line', (line) => {
  * @param {KaikkiLine} parsedLine 
  */
 function handleLine(parsedLine) {
-    const { pos, sounds, forms } = parsedLine;
+    const { pos, sounds, forms, etymology_number = 0 } = parsedLine;
     if(!pos) return;
     const word = getCanonicalWordForm(parsedLine);
     if (!word) return;
@@ -177,16 +177,16 @@ function handleLine(parsedLine) {
     if (sensesWithoutInflectionGlosses.length === 0) return;
 
     const readings = getReadings(word, parsedLine);
-    initializeWordResult(word, readings, pos);
+    initializeWordResult(word, readings, pos, String(etymology_number));
 
     for (const ipaObj of ipa) {
-        saveIpaResult(word, readings, pos, ipaObj);
+        saveIpaResult(word, readings, pos, String(etymology_number), ipaObj);
     }
 
     const glossTree = getGlossTree(sensesWithoutInflectionGlosses);
 
     for (const reading of readings) {
-        lemmaDict[word][reading][pos].glossTree = glossTree;
+        lemmaDict[word][reading][pos][String(etymology_number)].glossTree = glossTree;
     }
 }
 
@@ -273,11 +273,12 @@ function processForms(forms, word, pos) {
  * @param {string} word 
  * @param {string[]} readings 
  * @param {string} pos 
+ * @param {string} etymology_number
  * @param {IpaInfo} ipaObj 
  */
-function saveIpaResult(word, readings, pos, ipaObj) {
+function saveIpaResult(word, readings, pos, etymology_number, ipaObj) {
     for (const reading of readings) {
-        const result = lemmaDict[word][reading][pos];
+        const result = lemmaDict[word][reading][pos][etymology_number];
         const existingIpa = result.ipa.find(obj => obj.ipa === ipaObj.ipa);
         if (!existingIpa) {
             result.ipa.push(ipaObj);
@@ -291,10 +292,11 @@ function saveIpaResult(word, readings, pos, ipaObj) {
  * @param {string} word 
  * @param {string[]} readings 
  * @param {string} pos 
+ * @param {string} etymology_number
  */
-function initializeWordResult(word, readings, pos) {
+function initializeWordResult(word, readings, pos, etymology_number) {
     for (const reading of readings) {
-        const result = ensureNestedObject(lemmaDict, [word, reading, pos]);
+        const result = ensureNestedObject(lemmaDict, [word, reading, pos, etymology_number]);
         result.ipa ??= [];
         result.glossTree ??= new Map();
     }

diff --git a/4-make-yomitan.js b/4-make-yomitan.js
@@ -271,50 +271,52 @@ let lastTermBankIndex = 0;
 
             const ipa = [];
 
-            for (const [pos, info] of Object.entries(partsOfSpeechOfWord)) {
-                const foundPos = findPartOfSpeech(pos, partsOfSpeech, skippedPartsOfSpeech);
-                const {glossTree} = info;
-
-                const lemmaTags = [pos];
-                ipa.push(...info.ipa);
-
-                /** @type {Object<string, import('types').TermBank.TermInformation>} */
-                const entries = {};
-
-                for (const [gloss, branches] of glossTree.entries()) {
-                    const tags = branches.get('_tags') || [];
-                    branches.delete('_tags');
-
-                    const senseTags = [...tags, ...lemmaTags];
-
-                    /** @type {GlossBranch} */
-                    const syntheticBranch = new Map();
-                    syntheticBranch.set(gloss, branches);
-                    const {glosses, recognizedTags} = handleNest(syntheticBranch, senseTags, pos);
-                    const joinedTags = recognizedTags.join(' ');
-
-                    if(!glosses || !glosses.length) continue;
-
-                    if (entries[joinedTags]) {
-                        // entries[joinedTags][5].push(gloss);
-                        entries[joinedTags][5].push(...glosses);
-                    } else {
-                        entries[joinedTags] = [
-                            term, // term
-                            reading !== normalizedLemma ? reading : '', // reading
-                            joinedTags, // definition_tags
-                            foundPos, // rules
-                            0, // frequency
-                            glosses, // definitions
-                            0, // sequence
-                            '', // term_tags
-                        ];
+            for (const [pos, etyms] of Object.entries(partsOfSpeechOfWord)) {
+                for (const [etym_number, info] of Object.entries(etyms)) {
+                    const foundPos = findPartOfSpeech(pos, partsOfSpeech, skippedPartsOfSpeech);
+                    const {glossTree} = info;
+
+                    const lemmaTags = [pos];
+                    ipa.push(...info.ipa);
+
+                    /** @type {Object<string, import('types').TermBank.TermInformation>} */
+                    const entries = {};
+
+                    for (const [gloss, branches] of glossTree.entries()) {
+                        const tags = branches.get('_tags') || [];
+                        branches.delete('_tags');
+
+                        const senseTags = [...tags, ...lemmaTags];
+
+                        /** @type {GlossBranch} */
+                        const syntheticBranch = new Map();
+                        syntheticBranch.set(gloss, branches);
+                        const {glosses, recognizedTags} = handleNest(syntheticBranch, senseTags, pos);
+                        const joinedTags = recognizedTags.join(' ');
+
+                        if(!glosses || !glosses.length) continue;
+
+                        if (entries[joinedTags]) {
+                            // entries[joinedTags][5].push(gloss);
+                            entries[joinedTags][5].push(...glosses);
+                        } else {
+                            entries[joinedTags] = [
+                                term, // term
+                                reading !== normalizedLemma ? reading : '', // reading
+                                joinedTags, // definition_tags
+                                foundPos, // rules
+                                0, // frequency
+                                glosses, // definitions
+                                0, // sequence
+                                '', // term_tags
+                            ];
+                        }
                     }
-                }
 
-                debug(entries);
-                for (const [tags, entry] of Object.entries(entries)) {
-                    ymtLemmas.push(entry);
+                    debug(entries);
+                    for (const [tags, entry] of Object.entries(entries)) {
+                        ymtLemmas.push(entry);
+                    }
                 }
             }
 

diff --git a/data/test/dict/de/en/tag_bank_1.json b/data/test/dict/de/en/tag_bank_1.json
@@ -103,5 +103,19 @@
     -1,
     "preposition",
     1
+  ],
+  [
+    "fem",
+    "",
+    -1,
+    "feminine",
+    1
+  ],
+  [
+    "chem",
+    "",
+    0,
+    "chemistry",
+    0
   ]
 ]
diff --git a/data/test/dict/de/en/term_bank_1.json b/data/test/dict/de/en/term_bank_1.json
@@ -1122,5 +1122,71 @@
     ],
     0,
     ""
+  ],
+  [
+    "Base",
+    "",
+    "arch fem n",
+    "n",
+    0,
+    [
+      {
+        "type": "structured-content",
+        "content": [
+          {
+            "tag": "div",
+            "content": [
+              "A female cousin."
+            ]
+          }
+        ]
+      }
+    ],
+    0,
+    ""
+  ],
+  [
+    "Base",
+    "",
+    "fem obs n",
+    "n",
+    0,
+    [
+      {
+        "type": "structured-content",
+        "content": [
+          {
+            "tag": "div",
+            "content": [
+              "paternal aunt"
+            ]
+          }
+        ]
+      }
+    ],
+    0,
+    ""
+  ],
+  [
+    "Base",
+    "",
+    "fem n chem",
+    "n",
+    0,
+    [
+      {
+        "type": "structured-content",
+        "content": [
+          {
+            "tag": "div",
+            "content": [
+              "base (compound that will neutralize an acid)"
+            ]
+          }
+        ]
+      }
+    ],
+    0,
+    ""
   ]
 ]
diff --git a/data/test/dict/de/en/term_bank_2.json b/data/test/dict/de/en/term_bank_2.json
@@ -2317,5 +2317,54 @@
     ],
     0,
     ""
+  ],
+  [
+    "Basen",
+    "",
+    "non-lemma",
+    "",
+    0,
+    [
+      [
+        "Base",
+        [
+          "plural"
+        ]
+      ],
+      [
+        "Base",
+        [
+          "accusative",
+          "plural",
+          "definite"
+        ]
+      ],
+      [
+        "Base",
+        [
+          "dative",
+          "plural",
+          "definite"
+        ]
+      ],
+      [
+        "Base",
+        [
+          "genitive",
+          "plural",
+          "definite"
+        ]
+      ],
+      [
+        "Base",
+        [
+          "nominative",
+          "plural",
+          "definite"
+        ]
+      ]
+    ],
+    0,
+    ""
   ]
 ]
diff --git a/data/test/ipa/de/en/term_meta_bank_1.json b/data/test/ipa/de/en/term_meta_bank_1.json
@@ -87,5 +87,18 @@
         }
       ]
     }
+  ],
+  [
+    "Base",
+    "ipa",
+    {
+      "reading": "Base",
+      "transcriptions": [
+        {
+          "ipa": "/ˈbaːzə/",
+          "tags": []
+        }
+      ]
+    }
   ]
 ]