Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse Multiple Readings for the Same Entry #19

Merged
merged 4 commits into from
Jan 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 45 additions & 8 deletions src/test/parseEntry.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const expectedEntries = [
headwords: [
{
text: '大電',
reading: 'daai6 din6',
readings: ['daai6 din6'],
},
],
tags: [
Expand All @@ -35,7 +35,7 @@ const expectedEntries = [
headwords: [
{
text: '發電廠',
reading: 'faat3 din6 cong2',
readings: ['faat3 din6 cong2'],
},
],
tags: [
Expand All @@ -59,7 +59,7 @@ const expectedEntries = [
headwords: [
{
text: '排污',
reading: 'paai4 wu1',
readings: ['paai4 wu1'],
},
],
tags: [
Expand Down Expand Up @@ -114,7 +114,7 @@ const expectedEntries = [
headwords: [
{
text: '揀選',
reading: 'gaan2 syun2',
readings: ['gaan2 syun2'],
},
],
tags: [
Expand Down Expand Up @@ -163,7 +163,7 @@ const expectedEntries = [
headwords: [
{
text: '背景',
reading: 'bui3 ging2',
readings: ['bui3 ging2'],
},
],
tags: [
Expand Down Expand Up @@ -220,7 +220,7 @@ const expectedEntries = [
headwords: [
{
text: '天干地支',
reading: 'tin1 gon1 dei6 zi1',
readings: ['tin1 gon1 dei6 zi1'],
},
],
tags: [
Expand Down Expand Up @@ -279,11 +279,11 @@ const expectedEntries = [
headwords: [
{
text: '着',
reading: 'zoek6',
readings: ['zoek6'],
},
{
text: '著',
reading: 'zoek6',
readings: ['zoek6'],
},
],
tags: [
Expand Down Expand Up @@ -371,6 +371,43 @@ const expectedEntries = [
},
],
},
{
id: 93305,
headwords: [
{
text: '揸正嚟做',
readings: ['zaa1 zeng3 lai4 zou6', 'zaa1 zeng3 lei4 zou6'],
},
],
tags: [
{
name: 'pos',
value: '動詞',
},
{
name: 'sim',
value: '揸正',
},
],
senses: [
{
explanation: {
yue: ['嚴格依照規矩,不留餘地,冇人情講'],
eng: [
'to follow the rules strictly; to "go by the book"; to leave no room for discretion',
],
},
egs: [
{
yue: [
'唔好怪我揸正嚟做。 (m4 hou2 gwaai3 ngo5 zaa1 zeng3 lei4 zou6.)',
],
eng: ["Don't blame me for following the rules too strictly."],
},
],
},
],
},
];

/**
Expand Down
7 changes: 7 additions & 0 deletions src/test/testdata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,10 @@ zho:你可好生給我應付着。 (nei5 ho2 hou2 sang1 kap1 ngo5 jing3 fu6 zoek
yue:你好好哋同我應付下。 (nei5 hou2 hou2 dei2 tung4 ngo5 jing3 fu6 haa5.)
yue:你小心啲同我應付下。 (nei5 siu2 sam1 di1 tung4 ngo5 jing3 fu6 haa5.)
eng:Handle this well (for me).",,OK,已公開
93305,揸正嚟做:zaa1 zeng3 lai4 zou6:zaa1 zeng3 lei4 zou6,"(pos:動詞)(sim:揸正)
<explanation>
yue:嚴格依照規矩,不留餘地,冇人情講
eng:to follow the rules strictly; to ""go by the book""; to leave no room for discretion
<eg>
yue:唔好怪我揸正嚟做。 (m4 hou2 gwaai3 ngo5 zaa1 zeng3 lei4 zou6.)
eng:Don't blame me for following the rules too strictly.",,OK,已公開
7 changes: 6 additions & 1 deletion src/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ type TextReadingPair = {
reading: string;
};

type Headword = {
text: string;
readings: string[];
};

type Tag = {
name: string;
value: string;
Expand All @@ -50,7 +55,7 @@ type LanguageData = {

type DictionaryEntry = {
id: number;
headwords: TextReadingPair[];
headwords: Headword[];
tags: Tag[];
senses: Sense[];
};
6 changes: 3 additions & 3 deletions src/util/csv/parseEntryToJson.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ function parseEntry(entry) {
*/
function parseHeadwords(headwordString) {
return headwordString.split(',').map((headword) => {
const [text, reading] = headword.split(':');
if (!text || !reading) {
const [text, ...readings] = headword.split(':');
if (!text || !readings) {
throw new Error(`Invalid headword: ${headword}`);
}
return {
text,
reading,
readings,
};
});
}
Expand Down
10 changes: 6 additions & 4 deletions src/util/yomitan/convertEntryToYomitanTerm.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@ function convertEntryToYomitanTerms(entry) {

const detailedDefinition = convertEntryToDetailedDefinition(entry);
for (const headword of entry.headwords) {
const termEntry = new TermEntry(headword.text)
.setReading(headword.reading)
.addDetailedDefinition(detailedDefinition);
yomitanTerms.push(termEntry.build());
for (const reading of headword.readings) {
const termEntry = new TermEntry(headword.text)
.setReading(reading)
.addDetailedDefinition(detailedDefinition);
yomitanTerms.push(termEntry.build());
}
}

return yomitanTerms;
Expand Down
23 changes: 17 additions & 6 deletions src/util/yomitan/convertHeadwordsToSC.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ import { convertReadingToRubySC } from './parseTextToSC.js';

/**
* Converts headword(s) to structured content.
* @param {TextReadingPair[]} headwords
* @param {Headword[]} headwords
*/
function convertHeadwordsToSC(headwords) {
const headwordsSCList = headwords.map(headwordToSC);
const headwordsSCList = headwordsToSC(headwords);
const separator = '・';
/**
* @type {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]}
Expand Down Expand Up @@ -36,11 +36,22 @@ function convertHeadwordsToSC(headwords) {

/**
* Converts a headword to structured content.
* @param {TextReadingPair} headword
* @returns {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent}
* @param {Headword[]} headwords
* @returns {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]}
*/
function headwordToSC(headword) {
return convertReadingToRubySC(headword);
function headwordsToSC(headwords) {
/**
* @type {import('yomichan-dict-builder/dist/types/yomitan/termbank').StructuredContent[]}
*/
const headwordsSCList = [];
for (const headword of headwords) {
headwordsSCList.push(
...headword.readings.map((reading) =>
convertReadingToRubySC(headword.text, reading)
)
);
}
return headwordsSCList;
}

export { convertHeadwordsToSC };
2 changes: 1 addition & 1 deletion src/util/yomitan/convertSenseToSC.js
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ function convertLanguageEntryToDiv(language, languageTexts) {
data: {
wordshk: 'langtext',
},
content: convertTextToSC(languageText, languageInfo.langCode),
content: convertTextToSC(languageText, language),
};
// Change text size for selected languages
const cjkLangs = ['yue', 'zho', 'jpn', 'kor', 'lzh'];
Expand Down
20 changes: 15 additions & 5 deletions src/util/yomitan/parseTextToSC.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ function convertTextToSC(rawText, languageCode) {

try {
const readings = parseCantoneseReadings(phrase, reading);
return readings.map(convertReadingToRubySC);
return readings.map(({ text, reading }) =>
convertReadingToRubySC(text, reading)
);
} catch (error) {
return cleanedText;
}
Expand All @@ -44,17 +46,25 @@ function cleanRawText(rawText) {

/**
* Parses a text string into a structured content object with ruby text for readings
* @param {TextReadingPair} reading
* @param {string} text
* @param {string} reading
* @returns {import("yomichan-dict-builder/dist/types/yomitan/termbank").StructuredContent}
*/
function convertReadingToRubySC(reading) {
function convertReadingToRubySC(text, reading) {
// Check that both text and reading are type string, if not then cast to string
if (typeof text !== 'string') {
text = String(text);
}
if (typeof reading !== 'string') {
reading = String(reading);
}
return {
tag: 'ruby',
content: [
reading.text,
text,
{
tag: 'rt',
content: reading.reading,
content: reading,
},
],
};
Expand Down