Skip to content

Commit

Permalink
fixed incorrect implementation of bag distance (reported by r. feldt)
Browse files Browse the repository at this point in the history
  • Loading branch information
Konrad Rieck committed Apr 12, 2016
1 parent 298c951 commit 074a93c
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 32 deletions.
12 changes: 7 additions & 5 deletions src/measures/dist_bag.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static void bag_destroy(bag_t * xh)
*/
float dist_bag_compare(hstring_t x, hstring_t y)
{
float d = 0;
float xd = 0, yd = 0;
bag_t *xh, *yh, *xb, *yb;

xh = bag_create(x);
Expand All @@ -113,18 +113,20 @@ float dist_bag_compare(hstring_t x, hstring_t y)
for (xb = xh; xb != NULL; xb = xb->hh.next) {
HASH_FIND(hh, yh, &(xb->sym), sizeof(sym_t), yb);
if (!yb) {
d += xb->cnt;
xd += xb->cnt;
} else {
d += fabs(xb->cnt - yb->cnt);
float diff = xb->cnt - yb->cnt;
xd += fmax(+diff, 0);
yd += fmax(-diff, 0);
missing -= yb->cnt;
}
}
d += missing;
yd += missing;

bag_destroy(xh);
bag_destroy(yh);

return lnorm(n, d, x, y);
return lnorm(n, fmax(xd, yd), x, y);
}

/** @} */
32 changes: 16 additions & 16 deletions tests/check_measures.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
dist_bag
0,12,19,15,27,30,18,15
12,0,19,15,23,30,18,19
19,19,0,18,26,23,23,18
15,15,18,0,22,31,19,16
27,23,26,22,0,37,27,28
30,30,23,31,37,0,32,29
18,18,23,19,27,32,0,21
15,19,18,16,28,29,21,0
0,9,11,12,16,24,9,10
9,0,14,9,17,27,12,15
11,14,0,15,14,19,13,10
12,9,15,0,18,29,14,15
16,17,14,18,0,25,16,14
24,27,19,29,25,0,25,21
9,12,13,14,16,25,0,13
10,15,10,15,14,21,13,0
dist_compression
0.12,0.68,0.5893,0.68,0.7143,0.8049,0.68,0.6833
0.68,0.1053,0.7143,0.5789,0.7143,0.8049,0.68,0.7333
Expand Down Expand Up @@ -89,14 +89,14 @@ dist_osa
16,14,17,16,19,29,0,19
18,16,17,15,21,24,19,0
kern_distance
289,133,164,64,22,307,127,274
133,121,80,-20,38,223,43,122
164,80,400,70,104,548,80,280
64,-20,70,64,32,164,-4,146
22,38,104,32,484,170,22,92
307,223,548,164,170,1225,245,434
127,43,80,-4,22,245,289,166
274,122,280,146,92,434,166,484
289,164.5,284,104.5,258.5,469,248.5,336.5
164.5,121,162.5,52,158,308.5,133,190
284,162.5,400,119.5,344,632,260,392
104.5,52,119.5,64,112,224,78.5,161.5
258.5,158,344,112,484,542,258.5,386
469,308.5,632,224,542,1225,444.5,634
248.5,133,260,78.5,258.5,444.5,289,302
336.5,190,392,161.5,386,634,302,484
kern_spectrum
15,0,3,0,0,0,0,1
0,9,0,0,0,0,0,0
Expand Down
27 changes: 16 additions & 11 deletions tests/dist_bag.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,31 @@ struct hstring_test tests[] = {
{"a", "a", "", 0},
{"ab", "ba", "", 0},
{"bab", "ba", "", 1},
{"abba", "babb", "", 2},
{"a.b", "a.c", "", 2},
{".a.b.", "a..c.", "", 2},
{"abba", "babb", "", 1},
{"a.b", "a.c", "", 1},
{".a.b.", "a..c.", "", 1},
/* Comparison using tokens */
{"", "", ".", 0},
{"a", "", ".", 1},
{"", "a", ".", 1},
{"a", "a", ".", 0},
{"ab", "ba", ".", 2},
{"bab", "ba", ".", 2},
{"abba", "babb", ".", 2},
{"a.b", "a.c", ".", 2},
{".a.b.", "a..c.", ".", 2},
{"ab", "ba", ".", 1},
{"bab", "ba", ".", 1},
{"abba", "babb", ".", 1},
{"a.b", "a.c", ".", 1},
{".a.b.", "a..c.", ".", 1},
/* Further test cases */
{"abcd", "axcy", "", 4},
{"abc", "axcy", "", 3},
{"abcd", "xcy", "", 5},
{"abcd", "axcy", "", 2},
{"abc", "axcy", "", 2},
{"abcd", "xcy", "", 3},
{".x.y.", ".x.y.", ".", 0},
{"x...y..", "...x..y", ".", 0},
{".x.y", "x.y.", ".", 0},
/* Examples from paper by Bartolini et al. */
{"spire", "fare", "", 3},
{"fare", "spire", "", 3},
{"spire", "paris", "", 1},
{"paris", "spire", "", 1},
{NULL}
};

Expand Down

0 comments on commit 074a93c

Please sign in to comment.