-
Notifications
You must be signed in to change notification settings - Fork 1
/
bf_cms_hll_membership_testing.R
34 lines (28 loc) · 1.03 KB
/
bf_cms_hll_membership_testing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# We compare the false positive rates of
# - Bloom filters
# - Count-Min sketches
# - HyperLogLogs
# for membership testing.
#
# https://github.com/echen/streaming-simulations/wiki/Membership-Testing%3A-Bloom-Filter-vs.-Count-Min-Sketch-vs.-HyperLogLog
library(ggplot2)
library(scales)
#######################################
# STEP 1. Read in the simulated data. #
#######################################
# Each Bloom filter is of size 4096 bits, and uses 200 trials to estimate the
# FP rate.
d = read.csv("data/bf_cms_hll_membership.tsv",
sep = "\t",
header = F,
col.names = c("cardinality", "fp_rate", "Structure"))
############################
# STEP 2. Plot everything. #
############################
base =
qplot(cardinality, fp_rate, data = d, geom = "line",
colour = Structure,
xlab = "Cardinality", ylab = "False Positive Rate",
main = "Membership Testing under Three Streaming Algorithms")
colors = scale_colour_manual(values = c("#377EB8", "#E41A1C", "#4DAF4A"))
base + colors + scale_y_continuous(labels = percent)