From 074a93c469914581f55ead80530d2a75696277ab Mon Sep 17 00:00:00 2001 From: Konrad Rieck Date: Tue, 12 Apr 2016 11:44:14 +0200 Subject: [PATCH] fixed incorrect implementation of bag distance (reported by r. feldt) --- src/measures/dist_bag.c | 12 +++++++----- tests/check_measures.txt | 32 ++++++++++++++++---------------- tests/dist_bag.c | 27 ++++++++++++++++----------- 3 files changed, 39 insertions(+), 32 deletions(-) diff --git a/src/measures/dist_bag.c b/src/measures/dist_bag.c index 39edfd9..1cdd2f1 100644 --- a/src/measures/dist_bag.c +++ b/src/measures/dist_bag.c @@ -103,7 +103,7 @@ static void bag_destroy(bag_t * xh) */ float dist_bag_compare(hstring_t x, hstring_t y) { - float d = 0; + float xd = 0, yd = 0; bag_t *xh, *yh, *xb, *yb; xh = bag_create(x); @@ -113,18 +113,20 @@ float dist_bag_compare(hstring_t x, hstring_t y) for (xb = xh; xb != NULL; xb = xb->hh.next) { HASH_FIND(hh, yh, &(xb->sym), sizeof(sym_t), yb); if (!yb) { - d += xb->cnt; + xd += xb->cnt; } else { - d += fabs(xb->cnt - yb->cnt); + float diff = xb->cnt - yb->cnt; + xd += fmax(+diff, 0); + yd += fmax(-diff, 0); missing -= yb->cnt; } } - d += missing; + yd += missing; bag_destroy(xh); bag_destroy(yh); - return lnorm(n, d, x, y); + return lnorm(n, fmax(xd, yd), x, y); } /** @} */ diff --git a/tests/check_measures.txt b/tests/check_measures.txt index e6fdcdd..589ecae 100644 --- a/tests/check_measures.txt +++ b/tests/check_measures.txt @@ -1,12 +1,12 @@ dist_bag -0,12,19,15,27,30,18,15 -12,0,19,15,23,30,18,19 -19,19,0,18,26,23,23,18 -15,15,18,0,22,31,19,16 -27,23,26,22,0,37,27,28 -30,30,23,31,37,0,32,29 -18,18,23,19,27,32,0,21 -15,19,18,16,28,29,21,0 +0,9,11,12,16,24,9,10 +9,0,14,9,17,27,12,15 +11,14,0,15,14,19,13,10 +12,9,15,0,18,29,14,15 +16,17,14,18,0,25,16,14 +24,27,19,29,25,0,25,21 +9,12,13,14,16,25,0,13 +10,15,10,15,14,21,13,0 dist_compression 0.12,0.68,0.5893,0.68,0.7143,0.8049,0.68,0.6833 0.68,0.1053,0.7143,0.5789,0.7143,0.8049,0.68,0.7333 @@ -89,14 +89,14 @@ dist_osa 16,14,17,16,19,29,0,19 18,16,17,15,21,24,19,0 kern_distance -289,133,164,64,22,307,127,274 -133,121,80,-20,38,223,43,122 -164,80,400,70,104,548,80,280 -64,-20,70,64,32,164,-4,146 -22,38,104,32,484,170,22,92 -307,223,548,164,170,1225,245,434 -127,43,80,-4,22,245,289,166 -274,122,280,146,92,434,166,484 +289,164.5,284,104.5,258.5,469,248.5,336.5 +164.5,121,162.5,52,158,308.5,133,190 +284,162.5,400,119.5,344,632,260,392 +104.5,52,119.5,64,112,224,78.5,161.5 +258.5,158,344,112,484,542,258.5,386 +469,308.5,632,224,542,1225,444.5,634 +248.5,133,260,78.5,258.5,444.5,289,302 +336.5,190,392,161.5,386,634,302,484 kern_spectrum 15,0,3,0,0,0,0,1 0,9,0,0,0,0,0,0 diff --git a/tests/dist_bag.c b/tests/dist_bag.c index 7e2dba0..62c47c4 100644 --- a/tests/dist_bag.c +++ b/tests/dist_bag.c @@ -39,26 +39,31 @@ struct hstring_test tests[] = { {"a", "a", "", 0}, {"ab", "ba", "", 0}, {"bab", "ba", "", 1}, - {"abba", "babb", "", 2}, - {"a.b", "a.c", "", 2}, - {".a.b.", "a..c.", "", 2}, + {"abba", "babb", "", 1}, + {"a.b", "a.c", "", 1}, + {".a.b.", "a..c.", "", 1}, /* Comparison using tokens */ {"", "", ".", 0}, {"a", "", ".", 1}, {"", "a", ".", 1}, {"a", "a", ".", 0}, - {"ab", "ba", ".", 2}, - {"bab", "ba", ".", 2}, - {"abba", "babb", ".", 2}, - {"a.b", "a.c", ".", 2}, - {".a.b.", "a..c.", ".", 2}, + {"ab", "ba", ".", 1}, + {"bab", "ba", ".", 1}, + {"abba", "babb", ".", 1}, + {"a.b", "a.c", ".", 1}, + {".a.b.", "a..c.", ".", 1}, /* Further test cases */ - {"abcd", "axcy", "", 4}, - {"abc", "axcy", "", 3}, - {"abcd", "xcy", "", 5}, + {"abcd", "axcy", "", 2}, + {"abc", "axcy", "", 2}, + {"abcd", "xcy", "", 3}, {".x.y.", ".x.y.", ".", 0}, {"x...y..", "...x..y", ".", 0}, {".x.y", "x.y.", ".", 0}, + /* Examples from paper by Bartolini et al. */ + {"spire", "fare", "", 3}, + {"fare", "spire", "", 3}, + {"spire", "paris", "", 1}, + {"paris", "spire", "", 1}, {NULL} };