From 3f2b37f264acc110ec905f0ffc24709dc5f8f1f4 Mon Sep 17 00:00:00 2001 From: MarvNC Date: Sat, 20 Jan 2024 18:34:55 -0800 Subject: [PATCH 1/4] Refactor headwords to support multiple readings --- src/test/parseEntry.test.js | 16 ++++++++-------- src/types.d.ts | 7 ++++++- src/util/csv/parseEntryToJson.js | 6 +++--- src/util/yomitan/convertEntryToYomitanTerm.js | 10 ++++++---- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/test/parseEntry.test.js b/src/test/parseEntry.test.js index 63efc6d..c4ad9c8 100644 --- a/src/test/parseEntry.test.js +++ b/src/test/parseEntry.test.js @@ -11,7 +11,7 @@ const expectedEntries = [ headwords: [ { text: '大電', - reading: 'daai6 din6', + readings: ['daai6 din6'], }, ], tags: [ @@ -35,7 +35,7 @@ const expectedEntries = [ headwords: [ { text: '發電廠', - reading: 'faat3 din6 cong2', + readings: ['faat3 din6 cong2'], }, ], tags: [ @@ -59,7 +59,7 @@ const expectedEntries = [ headwords: [ { text: '排污', - reading: 'paai4 wu1', + readings: ['paai4 wu1'], }, ], tags: [ @@ -114,7 +114,7 @@ const expectedEntries = [ headwords: [ { text: '揀選', - reading: 'gaan2 syun2', + readings: ['gaan2 syun2'], }, ], tags: [ @@ -163,7 +163,7 @@ const expectedEntries = [ headwords: [ { text: '背景', - reading: 'bui3 ging2', + readings: ['bui3 ging2'], }, ], tags: [ @@ -220,7 +220,7 @@ const expectedEntries = [ headwords: [ { text: '天干地支', - reading: 'tin1 gon1 dei6 zi1', + readings: ['tin1 gon1 dei6 zi1'], }, ], tags: [ @@ -279,11 +279,11 @@ const expectedEntries = [ headwords: [ { text: '着', - reading: 'zoek6', + readings: ['zoek6'], }, { text: '著', - reading: 'zoek6', + readings: ['zoek6'], }, ], tags: [ diff --git a/src/types.d.ts b/src/types.d.ts index 3907e31..8cead02 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -34,6 +34,11 @@ type TextReadingPair = { reading: string; }; +type Headword = { + text: string; + readings: string[]; +}; + type Tag = { name: string; value: string; @@ -50,7 +55,7 @@ type LanguageData = { type DictionaryEntry = { id: number; - headwords: TextReadingPair[]; + headwords: Headword[]; tags: Tag[]; senses: Sense[]; }; diff --git a/src/util/csv/parseEntryToJson.js b/src/util/csv/parseEntryToJson.js index c9d1b0b..fc4e208 100644 --- a/src/util/csv/parseEntryToJson.js +++ b/src/util/csv/parseEntryToJson.js @@ -46,13 +46,13 @@ function parseEntry(entry) { */ function parseHeadwords(headwordString) { return headwordString.split(',').map((headword) => { - const [text, reading] = headword.split(':'); - if (!text || !reading) { + const [text, ...readings] = headword.split(':'); + if (!text || !readings) { throw new Error(`Invalid headword: ${headword}`); } return { text, - reading, + readings, }; }); } diff --git a/src/util/yomitan/convertEntryToYomitanTerm.js b/src/util/yomitan/convertEntryToYomitanTerm.js index d8634a6..5cca320 100644 --- a/src/util/yomitan/convertEntryToYomitanTerm.js +++ b/src/util/yomitan/convertEntryToYomitanTerm.js @@ -14,10 +14,12 @@ function convertEntryToYomitanTerms(entry) { const detailedDefinition = convertEntryToDetailedDefinition(entry); for (const headword of entry.headwords) { - const termEntry = new TermEntry(headword.text) - .setReading(headword.reading) - .addDetailedDefinition(detailedDefinition); - yomitanTerms.push(termEntry.build()); + for (const reading of headword.readings) { + const termEntry = new TermEntry(headword.text) + .setReading(reading) + .addDetailedDefinition(detailedDefinition); + yomitanTerms.push(termEntry.build()); + } } return yomitanTerms; From fd3732358fc5f82a4b8a4ba6f19f39a1435068fb Mon Sep 17 00:00:00 2001 From: MarvNC Date: Sat, 20 Jan 2024 18:37:48 -0800 Subject: [PATCH 2/4] Add test for multiple readings --- src/test/parseEntry.test.js | 37 +++++++++++++++++++++++++++++++++++++ src/test/testdata.csv | 7 +++++++ 2 files changed, 44 insertions(+) diff --git a/src/test/parseEntry.test.js b/src/test/parseEntry.test.js index c4ad9c8..49cbba7 100644 --- a/src/test/parseEntry.test.js +++ b/src/test/parseEntry.test.js @@ -371,6 +371,43 @@ const expectedEntries = [ }, ], }, + { + id: 93305, + headwords: [ + { + text: '揸正嚟做', + readings: ['zaa1 zeng3 lai4 zou6', 'zaa1 zeng3 lei4 zou6'], + }, + ], + tags: [ + { + name: 'pos', + value: '動詞', + }, + { + name: 'sim', + value: '揸正', + }, + ], + senses: [ + { + explanation: { + yue: ['嚴格依照規矩,不留餘地,冇人情講'], + eng: [ + 'to follow the rules strictly; to "go by the book"; to leave no room for discretion', + ], + }, + egs: [ + { + yue: [ + '唔好怪我揸正嚟做。 (m4 hou2 gwaai3 ngo5 zaa1 zeng3 lei4 zou6.)', + ], + eng: ["Don't blame me for following the rules too strictly."], + }, + ], + }, + ], + }, ]; /** diff --git a/src/test/testdata.csv b/src/test/testdata.csv index 6ecbc02..750392e 100644 --- a/src/test/testdata.csv +++ b/src/test/testdata.csv @@ -123,3 +123,10 @@ zho:你可好生給我應付着。 (nei5 ho2 hou2 sang1 kap1 ngo5 jing3 fu6 zoek yue:你好好哋同我應付下。 (nei5 hou2 hou2 dei2 tung4 ngo5 jing3 fu6 haa5.) yue:你小心啲同我應付下。 (nei5 siu2 sam1 di1 tung4 ngo5 jing3 fu6 haa5.) eng:Handle this well (for me).",,OK,已公開 +93305,揸正嚟做:zaa1 zeng3 lai4 zou6:zaa1 zeng3 lei4 zou6,"(pos:動詞)(sim:揸正) + +yue:嚴格依照規矩,不留餘地,冇人情講 +eng:to follow the rules strictly; to ""go by the book""; to leave no room for discretion + +yue:唔好怪我揸正嚟做。 (m4 hou2 gwaai3 ngo5 zaa1 zeng3 lei4 zou6.) +eng:Don't blame me for following the rules too strictly.",,OK,已公開 From 7cbacffbd8ec64d9ce0fbbd4cf383e2517956cd8 Mon Sep 17 00:00:00 2001 From: MarvNC Date: Sat, 20 Jan 2024 19:02:55 -0800 Subject: [PATCH 3/4] Refactor headword conversion for multiple readings --- src/util/yomitan/convertHeadwordsToSC.js | 23 +++++++++++++++++------ src/util/yomitan/parseTextToSC.js | 20 +++++++++++++++----- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/util/yomitan/convertHeadwordsToSC.js b/src/util/yomitan/convertHeadwordsToSC.js index 760f934..f8fbaca 100644 --- a/src/util/yomitan/convertHeadwordsToSC.js +++ b/src/util/yomitan/convertHeadwordsToSC.js @@ -2,10 +2,10 @@ import { convertReadingToRubySC } from './parseTextToSC.js'; /** * Converts headword(s) to structured content. - * @param {TextReadingPair[]} headwords + * @param {Headword[]} headwords */ function convertHeadwordsToSC(headwords) { - const headwordsSCList = headwords.map(headwordToSC); + const headwordsSCList = headwordsToSC(headwords); const separator = '・'; /** * @type {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]} @@ -36,11 +36,22 @@ function convertHeadwordsToSC(headwords) { /** * Converts a headword to structured content. - * @param {TextReadingPair} headword - * @returns {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent} + * @param {Headword[]} headwords + * @returns {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]} */ -function headwordToSC(headword) { - return convertReadingToRubySC(headword); +function headwordsToSC(headwords) { + /** + * @type {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]} + */ + const headwordsSCList = []; + for (const headword of headwords) { + headwordsSCList.push( + ...headword.readings.map((reading) => + convertReadingToRubySC(headword.text, reading) + ) + ); + } + return headwordsSCList; } export { convertHeadwordsToSC }; diff --git a/src/util/yomitan/parseTextToSC.js b/src/util/yomitan/parseTextToSC.js index d33da67..7b34f6f 100644 --- a/src/util/yomitan/parseTextToSC.js +++ b/src/util/yomitan/parseTextToSC.js @@ -28,7 +28,9 @@ function convertTextToSC(rawText, languageCode) { try { const readings = parseCantoneseReadings(phrase, reading); - return readings.map(convertReadingToRubySC); + return readings.map(({ text, reading }) => + convertReadingToRubySC(text, reading) + ); } catch (error) { return cleanedText; } @@ -44,17 +46,25 @@ function cleanRawText(rawText) { /** * Parses a text string into a structured content object with ruby text for readings - * @param {TextReadingPair} reading + * @param {string} text + * @param {string} reading * @returns {import("yomichan-dict-builder/dist/types/yomitan/termbank").StructuredContent} */ -function convertReadingToRubySC(reading) { +function convertReadingToRubySC(text, reading) { + // Check that both text and reading are type string, if not then cast to string + if (typeof text !== 'string') { + text = String(text); + } + if (typeof reading !== 'string') { + reading = String(reading); + } return { tag: 'ruby', content: [ - reading.text, + text, { tag: 'rt', - content: reading.reading, + content: reading, }, ], }; From da76074f286887a3192572b4d273bdc9a501a47f Mon Sep 17 00:00:00 2001 From: MarvNC Date: Sat, 20 Jan 2024 19:07:05 -0800 Subject: [PATCH 4/4] Fix zho/lzh not getting ruby text --- src/util/yomitan/convertSenseToSC.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/yomitan/convertSenseToSC.js b/src/util/yomitan/convertSenseToSC.js index 5a0d7e6..064b99d 100644 --- a/src/util/yomitan/convertSenseToSC.js +++ b/src/util/yomitan/convertSenseToSC.js @@ -186,7 +186,7 @@ function convertLanguageEntryToDiv(language, languageTexts) { data: { wordshk: 'langtext', }, - content: convertTextToSC(languageText, languageInfo.langCode), + content: convertTextToSC(languageText, language), }; // Change text size for selected languages const cjkLangs = ['yue', 'zho', 'jpn', 'kor', 'lzh'];