From f39a14237e7bf528b185e5dc29eebee701ab39ae Mon Sep 17 00:00:00 2001 From: Ray Lillywhite Date: Fri, 27 Jan 2017 23:51:03 -0800 Subject: [PATCH 1/4] Check for simplified version first while calculating frequency --- lib/dictionary.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/dictionary.js b/lib/dictionary.js index a2fe0c7..15034e7 100644 --- a/lib/dictionary.js +++ b/lib/dictionary.js @@ -379,16 +379,16 @@ function determinePhoneticRegularity(decomposition){ } function getCharacterFrequency(character){ - if('undefined' != typeof charfreq[character]) return charfreq[character]; - else { - var traditional_character = definitionLookup(character); - if (traditional_character && traditional_character[0]) + var traditional_character = definitionLookup(character); + if (traditional_character && traditional_character[0]) + { + if('undefined' != typeof charfreq[traditional_character[0].simplified]) { - if('undefined' != typeof charfreq[traditional_character[0].simplified]) - { - return charfreq[traditional_character[0].simplified]; - } + return charfreq[traditional_character[0].simplified]; } + } else if('undefined' != typeof charfreq[character]) { + return charfreq[character]; + } else { return 'Character not found'; } } From 6c77d4685d25663377407a49300daa787382d4c1 Mon Sep 17 00:00:00 2001 From: Niel de la Rouviere Date: Fri, 30 Jun 2017 12:05:54 +0200 Subject: [PATCH 2/4] Add more tests to catch additional cases with traditional characters using simplified characters for frequency data --- test/all.js | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/test/all.js b/test/all.js index 6e306f1..a533e74 100644 --- a/test/all.js +++ b/test/all.js @@ -22,7 +22,7 @@ describe('hanzidecomposer', function(){ assert(hanzi.getRadicalMeaning('氵'), "water"); }); - it("gets character frequency data", function() { + it("gets character frequency data for simplified character", function() { assert.deepEqual(hanzi.getCharacterFrequency('热'), { number: '606', character: '热', count: '67051', @@ -31,6 +31,33 @@ describe('hanzidecomposer', function(){ meaning: 'heat/to heat up/fervent/hot (of weather)/warm up' }); }); + it("gets character frequency data for traditional character", function() { + assert.deepEqual(hanzi.getCharacterFrequency('熱'), { number: '606', + character: '热', + count: '67051', + percentage: '79.8453694124', + pinyin: 're4', + meaning: 'heat/to heat up/fervent/hot (of weather)/warm up' }); + }); + + it("gets character frequency data for simplified character with a previously different traditional frequency count", function() { + assert.deepEqual(hanzi.getCharacterFrequency('认'), { number: '213', + character: '认', + count: '191866', + percentage: '57.0890429779', + pinyin: 'ren4', + meaning: 'to recognize/to know/to admit' }); + }); + + it("gets character frequency data for traditional character with a previously different traditional frequency count", function() { + assert.deepEqual(hanzi.getCharacterFrequency('認'), { number: '213', + character: '认', + count: '191866', + percentage: '57.0890429779', + pinyin: 'ren4', + meaning: 'to recognize/to know/to admit' }); + }); + it("gets all characters with a given component", function(){ assert.deepEqual(hanzi.getCharactersWithComponent('囗'), [ '国','因','西','回','口','四','团','图','围','困','恩','固','烟','园','窗','圆','惯','圈','贯','衰','菌','傻','姻','咽','嗯','囚','捆','茵','粤','瑙','圃','囱','涸','媲','锢','胭','泗','蓑','囤','囿','泅','摁','囡','帼','氤','蝈','邋','蓖','崮','囫','囟','掼','圜','囵','驷','阃','鬣','囹','痼','圄','卣','掴','腦','榱','篦','硇','涠','洇','總','鱲','囝','貔','圉','溷','缞','鲴','悃','铟','腘','骢','躐','謴','璁','蒽','骃','鯝','镴','硱','鬛','逌','睏','秵','絪','駰','麕','螕','裀','稛','縕','糰','箇','膕','綑','臘','箘','聰','蔥','驄','薀','祻','繌','圊','罆','謥','貫','鏆','銦','蒕','簑','碅','薗','釦','稇','蜠','蠟','醞' ]) }); From 21d6686bbee7e62f984e20e0faa580ad20d17b53 Mon Sep 17 00:00:00 2001 From: Niel de la Rouviere Date: Fri, 30 Jun 2017 12:06:51 +0200 Subject: [PATCH 3/4] Bump to v0.5.2 --- changelog.txt | 3 ++- package.json | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/changelog.txt b/changelog.txt index 89fc954..050d4b4 100644 --- a/changelog.txt +++ b/changelog.txt @@ -19,4 +19,5 @@ v0.4.0 - Added new function: getCharactersWithComponent v0.4.1 - Bug fix to way the characterswithcomponent object was compiled. v0.4.2 - Fix 耂 encoding error v0.5.0 - Add Longest Match segment function thanks to nikdvp! Browserify builds now also function properly using brfs. -v0.5.1 - Add licence information to package.json. Thanks zurawiki! \ No newline at end of file +v0.5.1 - Add licence information to package.json. Thanks zurawiki! +v0.5.2 - Fix frequency info for some traditional characters. Traditional characters now default to using the simplified list. Thanks raylillywhite! \ No newline at end of file diff --git a/package.json b/package.json index e904f9f..c01a898 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "hanzi", "author": "Niel de la Rouviere", "description": "HanziJS is a Chinese character and NLP module for Chinese language processing for Node.js", - "version": "0.5.1", + "version": "0.5.2", "license": "MIT", "main": "index.js", "browserify": { "transform": [ "brfs" ] }, From ab578ba4aadce586a4685ea061480d335e39be6d Mon Sep 17 00:00:00 2001 From: Niel de la Rouviere Date: Fri, 30 Jun 2017 12:11:52 +0200 Subject: [PATCH 4/4] Bump to v0.6.0 instead. As this might be minor/benign breaking change from previous versions --- changelog.txt | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog.txt b/changelog.txt index 050d4b4..323b7ff 100644 --- a/changelog.txt +++ b/changelog.txt @@ -20,4 +20,4 @@ v0.4.1 - Bug fix to way the characterswithcomponent object was compiled. v0.4.2 - Fix 耂 encoding error v0.5.0 - Add Longest Match segment function thanks to nikdvp! Browserify builds now also function properly using brfs. v0.5.1 - Add licence information to package.json. Thanks zurawiki! -v0.5.2 - Fix frequency info for some traditional characters. Traditional characters now default to using the simplified list. Thanks raylillywhite! \ No newline at end of file +v0.6.0 - Fix frequency info for some traditional characters. Traditional characters now default to using the simplified list. Thanks raylillywhite! \ No newline at end of file diff --git a/package.json b/package.json index c01a898..2d58134 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "hanzi", "author": "Niel de la Rouviere", "description": "HanziJS is a Chinese character and NLP module for Chinese language processing for Node.js", - "version": "0.5.2", + "version": "0.6.0", "license": "MIT", "main": "index.js", "browserify": { "transform": [ "brfs" ] },