From 7c39955b3548141af27ccd0bcf5e4fa8179b464f Mon Sep 17 00:00:00 2001
From: Lee Rhodes
This implementation uses xxHash64 and follows the approach in Kirsch and Mitzenmacher, - * "Less Hashing, Same Performance: Building a Better Bloom Filter," Wiley Interscience, 2008, - * pp. 187-218.
+ * "Less Hashing, Same Performance: Building a Better Bloom Filter," Wiley Interscience, 2008, pp. 187-218. */ public final class BloomFilter { + /** + * The maximum size of a bloom filter in bits. + */ public static final long MAX_SIZE_BITS = (Integer.MAX_VALUE - Family.BLOOMFILTER.getMaxPreLongs()) * (long) Long.SIZE; private static final int SER_VER = 1; private static final int EMPTY_FLAG_MASK = 4; @@ -133,11 +135,23 @@ public static BloomFilter heapify(final Memory mem) { return internalHeapifyOrWrap((WritableMemory) mem, false, false); } + /** + * Wraps the given Memory into this filter class. The class itself only contains a few metadata items and holds + * a reference to the Memory object, which contains all the data. + * @param mem the given Memory object + * @return the wrapping BloomFilter class. + */ public static BloomFilter wrap(final Memory mem) { // casting to writable, but tracking that the object is read-only return internalHeapifyOrWrap((WritableMemory) mem, true, false); } + /** + * Wraps the given WritableMemory into this filter class. The class itself only contains a few metadata items and holds + * a reference to the Memory object, which contains all the data. + * @param wmem the given WritableMemory object + * @return the wrapping BloomFilter class. + */ public static BloomFilter writableWrap(final WritableMemory wmem) { return internalHeapifyOrWrap(wmem, true, true); } diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java index 19c495af6..8acc36be2 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java @@ -24,6 +24,9 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; +/** + * This class can maintain the BitArray object off-heap. + */ public class DirectBitArrayR extends BitArray { final static protected long NUM_BITS_OFFSET = Long.BYTES; final static protected long DATA_OFFSET = 2L * Long.BYTES; diff --git a/src/main/java/org/apache/datasketches/hll/TgtHllType.java b/src/main/java/org/apache/datasketches/hll/TgtHllType.java index a0ee79a45..a5dc395ce 100644 --- a/src/main/java/org/apache/datasketches/hll/TgtHllType.java +++ b/src/main/java/org/apache/datasketches/hll/TgtHllType.java @@ -50,10 +50,27 @@ * * @author Lee Rhodes */ -public enum TgtHllType { HLL_4, HLL_6, HLL_8; +public enum TgtHllType { + /** + * An HLL sketch with a bin size of 4 bits + */ + HLL_4, + /** + * An HLL sketch with a bin size of 6 bits + */ + HLL_6, + /** + * An Hll Sketch with a bin size of 8 bits + */ + HLL_8; private static final TgtHllType values[] = values(); + /** + * Convert the typeId to the enum type + * @param typeId the given typeId + * @return the enum type + */ public static final TgtHllType fromOrdinal(final int typeId) { return values[typeId]; } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java index 392da0673..6fb9772fb 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java @@ -290,6 +290,10 @@ public void reset() { itemsSV = null; } + /** + * Export the current sketch as a compact byte array. + * @return the current sketch as a compact byte array. + */ public byte[] toByteArray() { return KllHelper.toByteArray(this, false); } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java index 3a0a8da0f..02bda7a20 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java @@ -23,6 +23,7 @@ /** * Iterator over KllItemsSketch. The order is not defined. + * @paramThe sample may be smaller than k and the resulting size of the sample potentially includes
* a probabilistic component, meaning the resulting sample size is not always constant.
- *
+ * @param S[] copySummaryArray(final S[] summaryArr) {
return tmpSummaryArr;
}
+ /**
+ * Creates a new Summary Array with the specified length
+ * @param summaryArr example array, only used to obtain the component type. It has no data.
+ * @param length the desired length of the returned array.
+ * @param the summary class type
+ * @return a new Summary Array with the specified length
+ */
@SuppressWarnings("unchecked")
public static S[] newSummaryArray(final S[] summaryArr, final int length) {
final Class summaryType = (Class) summaryArr.getClass().getComponentType();