From 5dc9dbf50fdb9cf490c5638d502a9335a290875c Mon Sep 17 00:00:00 2001 From: MarvNC Date: Sun, 21 Jan 2024 16:32:20 -0800 Subject: [PATCH] Fix issue with tags splitting naively at colons --- src/test/parseEntry.test.js | 38 +++++++++++++++++++++++++ src/test/testdata.csv | 3 ++ src/util/entryParse/parseEntryToJson.js | 6 ++-- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/test/parseEntry.test.js b/src/test/parseEntry.test.js index 49cbba7..5e20230 100644 --- a/src/test/parseEntry.test.js +++ b/src/test/parseEntry.test.js @@ -408,6 +408,44 @@ const expectedEntries = [ }, ], }, + { + id: 96792, + headwords: [ + { + text: '牛河博士', + readings: ['ngau4 ho2 bok3 si6'], + }, + ], + tags: [ + { + name: 'pos', + value: '名詞', + }, + { + name: 'label', + value: '專名', + }, + { + name: 'label', + value: '潮語', + }, + { + name: 'ref', + value: 'https://evchk.fandom.com/zh/wiki/曹宏威', + }, + ], + senses: [ + { + explanation: { + yue: [ + '香港#學者 曹宏威喺#網民 之間嘅叫法,佢因為#乾炒牛河 而一舉成名', + ], + eng: ['Wung-wai Tso, literally "Doctor Beef Chow-fun"'], + }, + egs: [], + }, + ], + }, ]; /** diff --git a/src/test/testdata.csv b/src/test/testdata.csv index 750392e..8fdfc9a 100644 --- a/src/test/testdata.csv +++ b/src/test/testdata.csv @@ -130,3 +130,6 @@ eng:to follow the rules strictly; to ""go by the book""; to leave no room for di yue:唔好怪我揸正嚟做。 (m4 hou2 gwaai3 ngo5 zaa1 zeng3 lei4 zou6.) eng:Don't blame me for following the rules too strictly.",,OK,已公開 +96792,牛河博士:ngau4 ho2 bok3 si6,"(pos:名詞)(label:專名)(label:潮語)(ref:https://evchk.fandom.com/zh/wiki/曹宏威) +yue:香港#學者 曹宏威喺#網民 之間嘅叫法,佢因為#乾炒牛河 而一舉成名 +eng:Wung-wai Tso, literally ""Doctor Beef Chow-fun""",,OK,未公開 \ No newline at end of file diff --git a/src/util/entryParse/parseEntryToJson.js b/src/util/entryParse/parseEntryToJson.js index fc4e208..f0eb6b4 100644 --- a/src/util/entryParse/parseEntryToJson.js +++ b/src/util/entryParse/parseEntryToJson.js @@ -72,9 +72,9 @@ function parseTags(entryLines) { } const tags = firstLine.split(')(').map((tag) => { tag = tag.replace(/[()]/g, ''); - let [name, value] = tag.split(':'); - name = name.trim(); - value = value.trim(); + let colonIndex = tag.indexOf(':'); + const name = tag.slice(0, colonIndex).trim(); + const value = tag.slice(colonIndex + 1).trim(); return { name, value,