Skip to content

Commit

Permalink
Merge pull request #19 from MarvNC:parse-multiple-readings
Browse files Browse the repository at this point in the history
Parse Multiple Readings for the Same Entry
  • Loading branch information
MarvNC authored Jan 21, 2024
2 parents 95d44f4 + da76074 commit 824919c
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 28 deletions.
53 changes: 45 additions & 8 deletions src/test/parseEntry.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const expectedEntries = [
headwords: [
{
text: '大電',
reading: 'daai6 din6',
readings: ['daai6 din6'],
},
],
tags: [
Expand All @@ -35,7 +35,7 @@ const expectedEntries = [
headwords: [
{
text: '發電廠',
reading: 'faat3 din6 cong2',
readings: ['faat3 din6 cong2'],
},
],
tags: [
Expand All @@ -59,7 +59,7 @@ const expectedEntries = [
headwords: [
{
text: '排污',
reading: 'paai4 wu1',
readings: ['paai4 wu1'],
},
],
tags: [
Expand Down Expand Up @@ -114,7 +114,7 @@ const expectedEntries = [
headwords: [
{
text: '揀選',
reading: 'gaan2 syun2',
readings: ['gaan2 syun2'],
},
],
tags: [
Expand Down Expand Up @@ -163,7 +163,7 @@ const expectedEntries = [
headwords: [
{
text: '背景',
reading: 'bui3 ging2',
readings: ['bui3 ging2'],
},
],
tags: [
Expand Down Expand Up @@ -220,7 +220,7 @@ const expectedEntries = [
headwords: [
{
text: '天干地支',
reading: 'tin1 gon1 dei6 zi1',
readings: ['tin1 gon1 dei6 zi1'],
},
],
tags: [
Expand Down Expand Up @@ -279,11 +279,11 @@ const expectedEntries = [
headwords: [
{
text: '着',
reading: 'zoek6',
readings: ['zoek6'],
},
{
text: '著',
reading: 'zoek6',
readings: ['zoek6'],
},
],
tags: [
Expand Down Expand Up @@ -371,6 +371,43 @@ const expectedEntries = [
},
],
},
{
id: 93305,
headwords: [
{
text: '揸正嚟做',
readings: ['zaa1 zeng3 lai4 zou6', 'zaa1 zeng3 lei4 zou6'],
},
],
tags: [
{
name: 'pos',
value: '動詞',
},
{
name: 'sim',
value: '揸正',
},
],
senses: [
{
explanation: {
yue: ['嚴格依照規矩,不留餘地,冇人情講'],
eng: [
'to follow the rules strictly; to "go by the book"; to leave no room for discretion',
],
},
egs: [
{
yue: [
'唔好怪我揸正嚟做。 (m4 hou2 gwaai3 ngo5 zaa1 zeng3 lei4 zou6.)',
],
eng: ["Don't blame me for following the rules too strictly."],
},
],
},
],
},
];

/**
Expand Down
7 changes: 7 additions & 0 deletions src/test/testdata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,10 @@ zho:你可好生給我應付着。 (nei5 ho2 hou2 sang1 kap1 ngo5 jing3 fu6 zoek
yue:你好好哋同我應付下。 (nei5 hou2 hou2 dei2 tung4 ngo5 jing3 fu6 haa5.)
yue:你小心啲同我應付下。 (nei5 siu2 sam1 di1 tung4 ngo5 jing3 fu6 haa5.)
eng:Handle this well (for me).",,OK,已公開
93305,揸正嚟做:zaa1 zeng3 lai4 zou6:zaa1 zeng3 lei4 zou6,"(pos:動詞)(sim:揸正)
<explanation>
yue:嚴格依照規矩,不留餘地,冇人情講
eng:to follow the rules strictly; to ""go by the book""; to leave no room for discretion
<eg>
yue:唔好怪我揸正嚟做。 (m4 hou2 gwaai3 ngo5 zaa1 zeng3 lei4 zou6.)
eng:Don't blame me for following the rules too strictly.",,OK,已公開
7 changes: 6 additions & 1 deletion src/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ type TextReadingPair = {
reading: string;
};

type Headword = {
text: string;
readings: string[];
};

type Tag = {
name: string;
value: string;
Expand All @@ -50,7 +55,7 @@ type LanguageData = {

type DictionaryEntry = {
id: number;
headwords: TextReadingPair[];
headwords: Headword[];
tags: Tag[];
senses: Sense[];
};
6 changes: 3 additions & 3 deletions src/util/csv/parseEntryToJson.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ function parseEntry(entry) {
*/
function parseHeadwords(headwordString) {
return headwordString.split(',').map((headword) => {
const [text, reading] = headword.split(':');
if (!text || !reading) {
const [text, ...readings] = headword.split(':');
if (!text || !readings) {
throw new Error(`Invalid headword: ${headword}`);
}
return {
text,
reading,
readings,
};
});
}
Expand Down
10 changes: 6 additions & 4 deletions src/util/yomitan/convertEntryToYomitanTerm.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ function convertEntryToYomitanTerms(entry) {

const detailedDefinition = convertEntryToDetailedDefinition(entry);
for (const headword of entry.headwords) {
const termEntry = new TermEntry(headword.text)
.setReading(headword.reading)
.addDetailedDefinition(detailedDefinition);
yomitanTerms.push(termEntry.build());
for (const reading of headword.readings) {
const termEntry = new TermEntry(headword.text)
.setReading(reading)
.addDetailedDefinition(detailedDefinition);
yomitanTerms.push(termEntry.build());
}
}

return yomitanTerms;
Expand Down
23 changes: 17 additions & 6 deletions src/util/yomitan/convertHeadwordsToSC.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ import { convertReadingToRubySC } from './parseTextToSC.js';

/**
* Converts headword(s) to structured content.
* @param {TextReadingPair[]} headwords
* @param {Headword[]} headwords
*/
function convertHeadwordsToSC(headwords) {
const headwordsSCList = headwords.map(headwordToSC);
const headwordsSCList = headwordsToSC(headwords);
const separator = '・';
/**
* @type {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]}
Expand Down Expand Up @@ -36,11 +36,22 @@ function convertHeadwordsToSC(headwords) {

/**
* Converts a headword to structured content.
* @param {TextReadingPair} headword
* @returns {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent}
* @param {Headword[]} headwords
* @returns {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]}
*/
function headwordToSC(headword) {
return convertReadingToRubySC(headword);
function headwordsToSC(headwords) {
/**
* @type {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]}
*/
const headwordsSCList = [];
for (const headword of headwords) {
headwordsSCList.push(
...headword.readings.map((reading) =>
convertReadingToRubySC(headword.text, reading)
)
);
}
return headwordsSCList;
}

export { convertHeadwordsToSC };
2 changes: 1 addition & 1 deletion src/util/yomitan/convertSenseToSC.js
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ function convertLanguageEntryToDiv(language, languageTexts) {
data: {
wordshk: 'langtext',
},
content: convertTextToSC(languageText, languageInfo.langCode),
content: convertTextToSC(languageText, language),
};
// Change text size for selected languages
const cjkLangs = ['yue', 'zho', 'jpn', 'kor', 'lzh'];
Expand Down
20 changes: 15 additions & 5 deletions src/util/yomitan/parseTextToSC.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ function convertTextToSC(rawText, languageCode) {

try {
const readings = parseCantoneseReadings(phrase, reading);
return readings.map(convertReadingToRubySC);
return readings.map(({ text, reading }) =>
convertReadingToRubySC(text, reading)
);
} catch (error) {
return cleanedText;
}
Expand All @@ -44,17 +46,25 @@ function cleanRawText(rawText) {

/**
* Parses a text string into a structured content object with ruby text for readings
* @param {TextReadingPair} reading
* @param {string} text
* @param {string} reading
* @returns {import("yomichan-dict-builder/dist/types/yomitan/termbank").StructuredContent}
*/
function convertReadingToRubySC(reading) {
function convertReadingToRubySC(text, reading) {
// Check that both text and reading are type string, if not then cast to string
if (typeof text !== 'string') {
text = String(text);
}
if (typeof reading !== 'string') {
reading = String(reading);
}
return {
tag: 'ruby',
content: [
reading.text,
text,
{
tag: 'rt',
content: reading.reading,
content: reading,
},
],
};
Expand Down

0 comments on commit 824919c

Please sign in to comment.