Skip to content

Commit

Permalink
aging bloom filter (#170)
Browse files Browse the repository at this point in the history
* aging bloom filter

Use proto enumeratio for BloomFilter type

* BloomWindow replaces DigestWindow.

Remove DigestWindow.

* add doc
  • Loading branch information
Hellblazer authored Nov 25, 2023
1 parent 8383ead commit 7dad046
Show file tree
Hide file tree
Showing 7 changed files with 337 additions and 280 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,143 @@
*/
package com.salesforce.apollo.bloomFilters;

import java.util.BitSet;
import java.util.function.Consumer;

import org.joou.ULong;

import com.salesfoce.apollo.cryptography.proto.Biff;
import com.salesforce.apollo.crypto.Digest;
import org.joou.ULong;

import java.util.BitSet;

import static com.salesfoce.apollo.cryptography.proto.Biff.Type.*;

/**
* Simplified Bloom filter for multiple types, with setable seeds and other
* parameters.
*
* @author hal.hildebrand
* Simplified Bloom filter for multiple types, with setable seeds and other parameters.
*
* @author hal.hildebrand
*/
abstract public class BloomFilter<T> {
private final BitSet bits;
private final Hash<T> h;

private BloomFilter(Hash<T> h) {
this(h, new BitSet(h.getM()));
}

private BloomFilter(Hash<T> h, BitSet bits) {
this.h = h;
this.bits = bits;
}

@SuppressWarnings("unchecked")
public static <Q> BloomFilter<Q> create(long seed, int n, double p, Biff.Type type) {
switch (type) {
case DIGEST:
return (BloomFilter<Q>) new DigestBloomFilter(seed, n, p);
case INT:
return (BloomFilter<Q>) new IntBloomFilter(seed, n, p);
case LONG:
return (BloomFilter<Q>) new LongBloomFilter(seed, n, p);
case BYTES:
return (BloomFilter<Q>) new BytesBloomFilter(seed, n, p);
case STRING:
return (BloomFilter<Q>) new StringBloomFilter(seed, n, p);
case ULONG:
return (BloomFilter<Q>) new ULongBloomFilter(seed, n, p);
default:
throw new IllegalArgumentException("Invalid type: " + type);
}
}

@SuppressWarnings("unchecked")
public static <Q> BloomFilter<Q> create(long seed, int m, int k, long[] bits, Biff.Type type) {
switch (type) {
case DIGEST:
return (BloomFilter<Q>) new DigestBloomFilter(seed, m, k, bits);
case INT:
return (BloomFilter<Q>) new IntBloomFilter(seed, m, k, bits);
case LONG:
return (BloomFilter<Q>) new LongBloomFilter(seed, m, k, bits);
case BYTES:
return (BloomFilter<Q>) new BytesBloomFilter(seed, m, k, bits);
case STRING:
return (BloomFilter<Q>) new StringBloomFilter(seed, m, k, bits);
case ULONG:
return (BloomFilter<Q>) new ULongBloomFilter(seed, m, k, bits);
default:
throw new IllegalArgumentException("Invalid type: " + type);
}
}

public static <Q> BloomFilter<Q> from(Biff bff) {
long[] bits = new long[bff.getBitsCount()];
int i = 0;
for (long l : bff.getBitsList()) {
bits[i++] = l;
}
return create(bff.getSeed(), bff.getM(), bff.getK(), bits, bff.getType());
}

private static double population(BitSet bitSet, int k, int m) {
int oneBits = bitSet.cardinality();
return -m / ((double) k) * Math.log(1 - oneBits / ((double) m));
}

public boolean add(T element) {
final var hashes = h.hashes(element);
var contains = true;
for (int hash : hashes) {
if (!bits.get(hash)) {
contains = false;
}
bits.set(hash);
}
return !contains;
}

public String biffString() {
return bits.toString();
}

public void clear() {
bits.clear();
}

public boolean contains(T element) {
for (int hash : h.hashes(element)) {
if (!bits.get(hash)) {
return false;
}
}
return true;
}

public boolean equivalent(BloomFilter<T> other) {
return h.equivalent(other.h) && bits.equals(other.bits);
}

public double fpp(int n) {
return h.fpp(n);
}

/**
* Estimates the current population of the Bloom filter (see:
* http://en.wikipedia.org/wiki/Bloom_filter#Approximating_the_number_of_items_in_a_Bloom_filter
*
* @return the estimated amount of elements in the filter
*/
public double getEstimatedPopulation() {
return population(bits, h.getK(), h.getM());
}

public Biff toBff() {
Biff.Builder builder = Biff.newBuilder().setSeed(h.getSeed()).setM(h.getM()).setK(h.getK()).setType(getType());

for (long l : bits.toLongArray()) {
builder.addBits(l);
}
return builder.build();
}

protected abstract Biff.Type getType();

public static class BytesBloomFilter extends BloomFilter<byte[]> {

Expand All @@ -44,7 +165,7 @@ protected Hasher<byte[]> newHasher() {
}

@Override
protected int getType() {
protected Biff.Type getType() {
return BYTES;
}
}
Expand All @@ -70,7 +191,7 @@ protected Hasher<Digest> newHasher() {
}

@Override
protected int getType() {
protected Biff.Type getType() {
return DIGEST;
}

Expand All @@ -97,7 +218,7 @@ protected Hasher<Integer> newHasher() {
}

@Override
protected int getType() {
protected Biff.Type getType() {
return INT;
}

Expand All @@ -123,7 +244,7 @@ protected Hasher<Long> newHasher() {
}

@Override
protected int getType() {
protected Biff.Type getType() {
return LONG;
}

Expand All @@ -150,7 +271,7 @@ protected Hasher<String> newHasher() {
}

@Override
protected int getType() {
protected Biff.Type getType() {
return STRING;
}
}
Expand All @@ -175,152 +296,9 @@ protected Hasher<ULong> newHasher() {
}

@Override
protected int getType() {
protected Biff.Type getType() {
return ULONG;
}

}

private static final int BYTES = 3;
private static final int DIGEST = 0;
private static final int INT = 1;
private static final int LONG = 2;
private static final int STRING = 4;
private static final int ULONG = 5;

@SuppressWarnings("unchecked")
public static <Q> BloomFilter<Q> create(long seed, int n, double p, int type) {
switch (type) {
case DIGEST:
return (BloomFilter<Q>) new DigestBloomFilter(seed, n, p);
case INT:
return (BloomFilter<Q>) new IntBloomFilter(seed, n, p);
case LONG:
return (BloomFilter<Q>) new LongBloomFilter(seed, n, p);
case BYTES:
return (BloomFilter<Q>) new BytesBloomFilter(seed, n, p);
case STRING:
return (BloomFilter<Q>) new StringBloomFilter(seed, n, p);
case ULONG:
return (BloomFilter<Q>) new ULongBloomFilter(seed, n, p);
default:
throw new IllegalArgumentException("Invalid type: " + type);
}
}

@SuppressWarnings("unchecked")
public static <Q> BloomFilter<Q> create(long seed, int m, int k, long[] bits, int type) {
switch (type) {
case DIGEST:
return (BloomFilter<Q>) new DigestBloomFilter(seed, m, k, bits);
case INT:
return (BloomFilter<Q>) new IntBloomFilter(seed, m, k, bits);
case LONG:
return (BloomFilter<Q>) new LongBloomFilter(seed, m, k, bits);
case BYTES:
return (BloomFilter<Q>) new BytesBloomFilter(seed, m, k, bits);
case STRING:
return (BloomFilter<Q>) new StringBloomFilter(seed, m, k, bits);
case ULONG:
return (BloomFilter<Q>) new ULongBloomFilter(seed, m, k, bits);
default:
throw new IllegalArgumentException("Invalid type: " + type);
}
}

public static <Q> BloomFilter<Q> from(Biff bff) {
long[] bits = new long[bff.getBitsCount()];
int i = 0;
for (long l : bff.getBitsList()) {
bits[i++] = l;
}
return create(bff.getSeed(), bff.getM(), bff.getK(), bits, bff.getType());
}

private static double population(BitSet bitSet, int k, int m) {
int oneBits = bitSet.cardinality();
return -m / ((double) k) * Math.log(1 - oneBits / ((double) m));
}

private final BitSet bits;
private final Hash<T> h;

private BloomFilter(Hash<T> h) {
this(h, new BitSet(h.getM()));
}

private BloomFilter(Hash<T> h, BitSet bits) {
this.h = h;
this.bits = bits;
}

public void add(T element) {
for (int hash : h.hashes(element)) {
bits.set(hash);
}
}

public boolean add(T element, Consumer<T> ifAbsent) {
final var hashes = h.hashes(element);
var contains = true;
for (int hash : hashes) {
if (!bits.get(hash)) {
contains = false;
break;
}
}
if (!contains) {
ifAbsent.accept(element);
for (int hash : hashes) {
bits.set(hash);
}
}
return !contains;
}

public String biffString() {
return bits.toString();
}

public void clear() {
bits.clear();
}

public boolean contains(T element) {
for (int hash : h.hashes(element)) {
if (!bits.get(hash)) {
return false;
}
}
return true;
}

public boolean equivalent(BloomFilter<T> other) {
return h.equivalent(other.h) && bits.equals(other.bits);
}

public double fpp(int n) {
return h.fpp(n);
}

/**
* Estimates the current population of the Bloom filter (see:
* http://en.wikipedia.org/wiki/Bloom_filter#Approximating_the_number_of_items_in_a_Bloom_filter
*
* @return the estimated amount of elements in the filter
*/
public double getEstimatedPopulation() {
return population(bits, h.getK(), h.getM());
}

public Biff toBff() {
Biff.Builder builder = Biff.newBuilder().setSeed(h.getSeed()).setM(h.getM()).setK(h.getK()).setType(getType());

for (long l : bits.toLongArray()) {
builder.addBits(l);
}
return builder.build();
}

protected abstract int getType();
}
Loading

0 comments on commit 7dad046

Please sign in to comment.