Skip to content

Commit

Permalink
Fix line ending stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
MarvNC committed Jan 19, 2024
1 parent a70b810 commit f0fb81e
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 18 deletions.
212 changes: 199 additions & 13 deletions src/test/parseEntry.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,8 @@ import { parseCSVEntries } from '../util/parseCsvEntries.js';

const testCsvFile = 'src/test/testdata.csv';

/**
* @type {DictionaryEntry[]}
*/
let entries;

const testCases = {
101613: {
const expectedEntries = [
{
id: 101613,
headwords: [
{
Expand All @@ -31,23 +26,214 @@ const testCases = {
yue: ['D電池(量詞:粒)'],
eng: ['D cells battery'],
},
examples: [],
egs: [],
},
],
},
{
id: 92456,
headwords: [
{
text: '發電廠',
reading: 'faat3 din6 cong2',
},
],
tags: [
{
name: 'pos',
value: '名詞',
},
],
glosses: [
{
explanation: {
yue: ['產生#電力 嘅大型#建築物(量詞:間/座)'],
eng: ['power plant'],
},
egs: [],
},
],
},
{
id: 82131,
headwords: [
{
text: '排污',
reading: 'paai4 wu1',
},
],
tags: [
{
name: 'pos',
value: '動詞',
},
],
glosses: [
{
explanation: {
yue: ['排走#污水'],
eng: ['to drain away sewage'],
},
egs: [
{
yue: ['排污費 (paai4 wu1)'],
eng: ['sewerage charge'],
},
{
yue: ['排污系統 (paai4 wu1 hai6 tung2)'],
eng: ['sewage system'],
},
{
yue: ['排污設施 (paai4 wu1 cit3 si1)'],
eng: ['sewage works'],
},
{
yue: ['公共排污服務 (gung1 gung6 paai4 wu1 fuk6 mou6)'],
eng: ['public sewage services'],
},
{
yue: [
'排污設備改善計劃 (paai4 wu1 cit3 bei6 goi2 sin6 gai3 waak6)',
],
eng: ['sewerage improvement programme'],
},
{
yue: [
'呢啲市區河道嘅設計以防洪及有效排污為主。 (ni1 di1 si5 keoi1 ho4 dou6 ge3 cit3 gai3 ji5 fong4 hung4 kap6 jau5 haau6 paai4 wu1 wai4 zyu2.)',
],
eng: [
'These urban channels were designed for flood prevention and effective drainage.',
],
},
],
},
],
},
{
id: 72252,
headwords: [
{
text: '揀選',
reading: 'gaan2 syun2',
},
],
tags: [
{
name: 'pos',
value: '動詞',
},
{
name: 'sim',
value: '挑選',
},
{
name: 'sim',
value: '揀',
},
{
name: 'sim',
value: '選',
},
{
name: 'sim',
value: '選擇',
},
],
glosses: [
{
explanation: {
yue: ['根據你嘅取向,喺兩樣嘢或以上當中,抽取一樣'],
eng: ['to select; to choose'],
},
egs: [
{
yue: [
'一個蠢,一個鈍,噉樣邊叫有得揀選? (jat1 go3 ceon2, jat1 go3 deon6, gam2 joeng2 bin1 giu3 jau5 dak1 gaan2 syun2?)',
],
eng: [
'This candidate is stupid and that is dumb. How can I choose among them?',
],
},
],
},
],
},
{
id: 66987,
headwords: [
{
text: '背景',
reading: 'bui3 ging2',
},
],
tags: [
{
name: 'pos',
value: '名詞',
},
],
glosses: [
{
explanation: {
yue: ['喺舞台或者現實襯托主體嘅景物、佈景、環境'],
eng: ['background; setting'],
},
egs: [
{
yue: [
'呢張相嘅背景係一啲椰樹。 (ni1 zoeng1 soeng2 ge3 bui3 ging2 hai6 jat1 di1 je4 syu6.)',
],
eng: ['The coconut trees form a background to this picture.'],
},
{
yue: [
'段片嘅背景音樂叫咩名? (dyun6 pin2 ge3 bui3 ging2 jam1 ngok6 giu3 me1 meng2?)',
],
eng: ['What is the title of the background music in the video?'],
},
],
},
{
explanation: {
yue: [
'人嘅來歷或經歷,例如家庭、教育、工作等等,亦可以指佢哋所倚靠嘅人物或者勢力',
],
eng: [
'the "background" of a person, especially their educational background, occupation, social/family connections, etc.',
],
},
egs: [
{
yue: [
'不如揾人查下佢個背景,我覺得佢好有可疑。 (bat1 jyu4 wan2 jan4 caa4 haa5 keoi5 go3 bui3 ging2, ngo5 gok3 dak1 keoi5 hou2 jau5 ho2 ji4.)',
],
eng: [
'Shall we find someone to look into his background? I think he is so suspicious.',
],
},
],
},
],
},
};
];

/**
* @type {DictionaryEntry[]}
*/
let entries;

test.before(async () => {
entries = await parseCSVEntries(testCsvFile);
});

test('entries is defined', (t) => {
test('CSV successfully parsed', (t) => {
t.not(entries, undefined);
});

for (const [id, expected] of Object.entries(testCases)) {
test(`Entry ${id} matches expected`, (t) => {
for (const expectedEntry of expectedEntries) {
const id = expectedEntry.id;
test(`Entry ${id}`, (t) => {
const entry = entries.find((entry) => entry.id === Number(id));
t.deepEqual(entry, expected);
t.deepEqual(entry, expectedEntry);
});
}
48 changes: 47 additions & 1 deletion src/test/testdata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,50 @@ yue:D電池(量詞:粒)
eng:D cells battery",,OK,未公開
92456,發電廠:faat3 din6 cong2,"(pos:名詞)
yue:產生#電力 嘅大型#建築物(量詞:間/座)
eng:power plant",,OK,已公開
eng:power plant",,OK,已公開
82131,排污:paai4 wu1,"(pos:動詞)
<explanation>
yue:排走#污水
eng:to drain away sewage
<eg>
yue:排污費 (paai4 wu1)
eng:sewerage charge
<eg>
yue:排污系統 (paai4 wu1 hai6 tung2)
eng:sewage system
<eg>
yue:排污設施 (paai4 wu1 cit3 si1)
eng:sewage works
<eg>
yue:公共排污服務 (gung1 gung6 paai4 wu1 fuk6 mou6)
eng:public sewage services
<eg>
yue:排污設備改善計劃 (paai4 wu1 cit3 bei6 goi2 sin6 gai3 waak6)
eng:sewerage improvement programme
<eg>
yue:呢啲市區河道嘅設計以防洪及有效排污為主。 (ni1 di1 si5 keoi1 ho4 dou6 ge3 cit3 gai3 ji5 fong4 hung4 kap6 jau5 haau6 paai4 wu1 wai4 zyu2.)
eng:These urban channels were designed for flood prevention and effective drainage.",排汙,OK,已公開
72252,揀選:gaan2 syun2,"(pos:動詞)(sim:挑選)(sim:揀)(sim:選)(sim:選擇)
<explanation>
yue:根據你嘅取向,喺兩樣嘢或以上當中,抽取一樣
eng:to select; to choose
<eg>
yue:一個蠢,一個鈍,噉樣邊叫有得揀選? (jat1 go3 ceon2, jat1 go3 deon6, gam2 joeng2 bin1 giu3 jau5 dak1 gaan2 syun2?)
eng:This candidate is stupid and that is dumb. How can I choose among them?",㨂選,OK,已公開
66987,背景:bui3 ging2,"(pos:名詞)
<explanation>
yue:喺舞台或者現實襯托主體嘅景物、佈景、環境
eng:background; setting
<eg>
yue:呢張相嘅背景係一啲椰樹。 (ni1 zoeng1 soeng2 ge3 bui3 ging2 hai6 jat1 di1 je4 syu6.)
eng:The coconut trees form a background to this picture.
<eg>
yue:段片嘅背景音樂叫咩名? (dyun6 pin2 ge3 bui3 ging2 jam1 ngok6 giu3 me1 meng2?)
eng:What is the title of the background music in the video?
----
<explanation>
yue:人嘅來歷或經歷,例如家庭、教育、工作等等,亦可以指佢哋所倚靠嘅人物或者勢力
eng:the ""background"" of a person, especially their educational background, occupation, social/family connections, etc.
<eg>
yue:不如揾人查下佢個背景,我覺得佢好有可疑。 (bat1 jyu4 wan2 jan4 caa4 haa5 keoi5 go3 bui3 ging2, ngo5 gok3 dak1 keoi5 hou2 jau5 ho2 ji4.)
eng:Shall we find someone to look into his background? I think he is so suspicious.",,OK,已公開
8 changes: 4 additions & 4 deletions src/util/parseEntry.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function parseEntry(entry) {
const tags = parseTags(entryLines);

const explanationsText = entryLines.join('\n');
const explanationsTexts = explanationsText.split('\n----\n').map((text) => {
const explanationsTexts = explanationsText.split('\r\n----\r\n').map((text) => {
return text;
});

Expand Down Expand Up @@ -101,8 +101,8 @@ function parseTags(entryLines) {
*/
function parseGloss(entryText) {
// Remove first line explanations
entryText = entryText.replace('<explanation>\n', '');
const [explanationText, ...examplesTexts] = entryText.split('\n<eg>\n');
entryText = entryText.replace('<explanation>\r\n', '');
const [explanationText, ...examplesTexts] = entryText.split('\r\n<eg>\r\n');

/**
* @type {LanguageData}
Expand Down Expand Up @@ -131,7 +131,7 @@ function parseLanguageData(text) {
* @type {LanguageData}
*/
const languageData = {};
const lines = text.split('\n');
const lines = text.split('\r\n');
let currentLang = '';
let currentLangData = '';
for (const line of lines) {
Expand Down

0 comments on commit f0fb81e

Please sign in to comment.