Skip to content

Commit

Permalink
Merge pull request #28 from nieldlr/frequency-traditional-fix
Browse files Browse the repository at this point in the history
Frequency traditional fix
  • Loading branch information
nieldlr authored Jun 30, 2017
2 parents c44cbf8 + ab578ba commit 05b2d71
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 11 deletions.
3 changes: 2 additions & 1 deletion changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ v0.4.0 - Added new function: getCharactersWithComponent
v0.4.1 - Bug fix to way the characterswithcomponent object was compiled.
v0.4.2 - Fix 耂 encoding error
v0.5.0 - Add Longest Match segment function thanks to nikdvp! Browserify builds now also function properly using brfs.
v0.5.1 - Add licence information to package.json. Thanks zurawiki!
v0.5.1 - Add licence information to package.json. Thanks zurawiki!
v0.6.0 - Fix frequency info for some traditional characters. Traditional characters now default to using the simplified list. Thanks raylillywhite!
16 changes: 8 additions & 8 deletions lib/dictionary.js
Original file line number Diff line number Diff line change
Expand Up @@ -379,16 +379,16 @@ function determinePhoneticRegularity(decomposition){
}

function getCharacterFrequency(character){
if('undefined' != typeof charfreq[character]) return charfreq[character];
else {
var traditional_character = definitionLookup(character);
if (traditional_character && traditional_character[0])
var traditional_character = definitionLookup(character);
if (traditional_character && traditional_character[0])
{
if('undefined' != typeof charfreq[traditional_character[0].simplified])
{
if('undefined' != typeof charfreq[traditional_character[0].simplified])
{
return charfreq[traditional_character[0].simplified];
}
return charfreq[traditional_character[0].simplified];
}
} else if('undefined' != typeof charfreq[character]) {
return charfreq[character];
} else {
return 'Character not found';
}
}
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "hanzi",
"author": "Niel de la Rouviere",
"description": "HanziJS is a Chinese character and NLP module for Chinese language processing for Node.js",
"version": "0.5.1",
"version": "0.6.0",
"license": "MIT",
"main": "index.js",
"browserify": { "transform": [ "brfs" ] },
Expand Down
29 changes: 28 additions & 1 deletion test/all.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ describe('hanzidecomposer', function(){
assert(hanzi.getRadicalMeaning('氵'), "water");
});

it("gets character frequency data", function() {
it("gets character frequency data for simplified character", function() {
assert.deepEqual(hanzi.getCharacterFrequency('热'), { number: '606',
character: '热',
count: '67051',
Expand All @@ -31,6 +31,33 @@ describe('hanzidecomposer', function(){
meaning: 'heat/to heat up/fervent/hot (of weather)/warm up' });
});

it("gets character frequency data for traditional character", function() {
assert.deepEqual(hanzi.getCharacterFrequency('熱'), { number: '606',
character: '热',
count: '67051',
percentage: '79.8453694124',
pinyin: 're4',
meaning: 'heat/to heat up/fervent/hot (of weather)/warm up' });
});

it("gets character frequency data for simplified character with a previously different traditional frequency count", function() {
assert.deepEqual(hanzi.getCharacterFrequency('认'), { number: '213',
character: '认',
count: '191866',
percentage: '57.0890429779',
pinyin: 'ren4',
meaning: 'to recognize/to know/to admit' });
});

it("gets character frequency data for traditional character with a previously different traditional frequency count", function() {
assert.deepEqual(hanzi.getCharacterFrequency('認'), { number: '213',
character: '认',
count: '191866',
percentage: '57.0890429779',
pinyin: 'ren4',
meaning: 'to recognize/to know/to admit' });
});

it("gets all characters with a given component", function(){
assert.deepEqual(hanzi.getCharactersWithComponent('囗'), [ '国','因','西','回','口','四','团','图','围','困','恩','固','烟','园','窗','圆','惯','圈','贯','衰','菌','傻','姻','咽','嗯','囚','捆','茵','粤','瑙','圃','囱','涸','媲','锢','胭','泗','蓑','囤','囿','泅','摁','囡','帼','氤','蝈','邋','蓖','崮','囫','囟','掼','圜','囵','驷','阃','鬣','囹','痼','圄','卣','掴','腦','榱','篦','硇','涠','洇','總','鱲','囝','貔','圉','溷','缞','鲴','悃','铟','腘','骢','躐','謴','璁','蒽','骃','鯝','镴','硱','鬛','逌','睏','秵','絪','駰','麕','螕','裀','稛','縕','糰','箇','膕','綑','臘','箘','聰','蔥','驄','薀','祻','繌','圊','罆','謥','貫','鏆','銦','蒕','簑','碅','薗','釦','稇','蜠','蠟','醞' ])
});
Expand Down

0 comments on commit 05b2d71

Please sign in to comment.