From 955b0180368d5c1bd5748da90f47a9917c94fdc5 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 5 Oct 2016 10:21:59 -0700 Subject: [PATCH] Improvements in PairwiseSetOperations. Many other minor javadoc edits and updates. --- .../java/com/yahoo/memory/MemoryRegionR.java | 3 +- .../yahoo/sketches/ArrayOfDoublesSerDe.java | 2 + .../com/yahoo/sketches/ArrayOfItemsSerDe.java | 16 +-- .../com/yahoo/sketches/ArrayOfLongsSerDe.java | 2 + .../yahoo/sketches/ArrayOfNumbersSerDe.java | 4 + .../yahoo/sketches/ArrayOfStringsSerDe.java | 2 + .../sketches/ArrayOfUtf16StringsSerDe.java | 2 + .../BoundsOnRatiosInThetaSketchedSets.java | 45 ++++---- .../sketches/SketchesArgumentException.java | 16 +-- .../com/yahoo/sketches/SketchesException.java | 30 ++--- .../sketches/SketchesStateException.java | 16 +-- .../java/com/yahoo/sketches/package-info.java | 9 +- .../sketches/quantiles/DoublesMergeImpl.java | 39 ++++--- .../sketches/quantiles/DoublesPmfCdfImpl.java | 22 ++-- .../quantiles/DoublesToByteArrayImpl.java | 43 ++++--- .../sketches/quantiles/DoublesUpdateImpl.java | 42 ++++--- .../yahoo/sketches/quantiles/DoublesUtil.java | 41 ++++--- .../sketches/quantiles/HeapDoublesSketch.java | 1 - .../yahoo/sketches/quantiles/ItemsUnion.java | 77 +++++++------ .../com/yahoo/sketches/quantiles/Util.java | 55 +++++---- .../sampling/ReservoirItemsSketch.java | 5 +- .../sampling/ReservoirLongsSketch.java | 4 +- .../sketches/sampling/ReservoirSize.java | 2 +- .../yahoo/sketches/sampling/package-info.java | 4 +- .../sketches/theta/PairwiseSetOperations.java | 109 +++++++++--------- .../com/yahoo/sketches/theta/Rebuilder.java | 42 +++---- .../theta/PairwiseSetOperationsTest.java | 95 ++++++++++----- 27 files changed, 407 insertions(+), 321 deletions(-) diff --git a/memory/src/main/java/com/yahoo/memory/MemoryRegionR.java b/memory/src/main/java/com/yahoo/memory/MemoryRegionR.java index 8ed4c548c..cf8ac9cac 100644 --- a/memory/src/main/java/com/yahoo/memory/MemoryRegionR.java +++ b/memory/src/main/java/com/yahoo/memory/MemoryRegionR.java @@ -9,7 +9,8 @@ */ public class MemoryRegionR extends MemoryRegion { - public MemoryRegionR(Memory memory, long memOffsetBytes, long capacityBytes, MemoryRequest memReq) { + public MemoryRegionR(Memory memory, long memOffsetBytes, long capacityBytes, + MemoryRequest memReq) { super(memory, memOffsetBytes, capacityBytes, memReq); } diff --git a/sketches/src/main/java/com/yahoo/sketches/ArrayOfDoublesSerDe.java b/sketches/src/main/java/com/yahoo/sketches/ArrayOfDoublesSerDe.java index 44df78fd8..4b787ffe8 100644 --- a/sketches/src/main/java/com/yahoo/sketches/ArrayOfDoublesSerDe.java +++ b/sketches/src/main/java/com/yahoo/sketches/ArrayOfDoublesSerDe.java @@ -10,6 +10,8 @@ /** * Methods of serializing and deserializing arrays of Double. + * + * @author Alex Saydakov */ public class ArrayOfDoublesSerDe extends ArrayOfItemsSerDe { diff --git a/sketches/src/main/java/com/yahoo/sketches/ArrayOfItemsSerDe.java b/sketches/src/main/java/com/yahoo/sketches/ArrayOfItemsSerDe.java index 5892f49ca..be849a37a 100644 --- a/sketches/src/main/java/com/yahoo/sketches/ArrayOfItemsSerDe.java +++ b/sketches/src/main/java/com/yahoo/sketches/ArrayOfItemsSerDe.java @@ -10,6 +10,8 @@ /** * Base class for serializing and deserializing custom types. * @param Type of item + * + * @author Alex Saydakov */ public abstract class ArrayOfItemsSerDe { @@ -17,7 +19,7 @@ public abstract class ArrayOfItemsSerDe { * Serialize an array of items to byte array. * The size of the array doesn't need to be serialized. * This method is called by the sketch serialization process. - * + * * @param items array of items to be serialized * @return serialized representation of the given array of items */ @@ -26,7 +28,7 @@ public abstract class ArrayOfItemsSerDe { /** * Deserialize an array of items from a given Memory object. * This method is called by the sketch deserialization process. - * + * * @param mem Memory containing a serialized array of items * @param numItems number of items in the serialized array * @return deserialized array of items @@ -35,12 +37,12 @@ public abstract class ArrayOfItemsSerDe { /** * Provides a simple mechanism to check compatibility between SerDe implementations. - * + * *

You will need to override this in the following cases: - *

  • If you want to rename the class or change its package hierarchy and keep the ID the + *
    • If you want to rename the class or change its package hierarchy and keep the ID the * same. This enables compatible deserialization of binary images that were serialized with a * different class name or package hierarchy.
    • - *
    • If you wish to change the binary layout of the serialization and don't want to change the + *
    • If you wish to change the binary layout of the serialization and don't want to change the * class name or package hierarchy, you will need to change the returned code.
    • *
    * @return a unique identifier of this SerDe @@ -48,9 +50,9 @@ public abstract class ArrayOfItemsSerDe { public short getId() { /* * Note that the hashCode() of a String is strictly a function of the content of the String - * and will be the same across different JVMs. This is not the case for Object.hashCode(), + * and will be the same across different JVMs. This is not the case for Object.hashCode(), * which generally computes the hash code from the native internal address of the object and - * will be DIFFERENT when computed on different JVMs. So if you override this method, make + * will be DIFFERENT when computed on different JVMs. So if you override this method, make * sure it will be repeatable across JVMs. */ return (short) getClass().getName().hashCode(); diff --git a/sketches/src/main/java/com/yahoo/sketches/ArrayOfLongsSerDe.java b/sketches/src/main/java/com/yahoo/sketches/ArrayOfLongsSerDe.java index 1a57115b1..9b9fc6991 100644 --- a/sketches/src/main/java/com/yahoo/sketches/ArrayOfLongsSerDe.java +++ b/sketches/src/main/java/com/yahoo/sketches/ArrayOfLongsSerDe.java @@ -10,6 +10,8 @@ /** * Methods of serializing and deserializing arrays of Long. + * + * @author Alex Saydakov */ public class ArrayOfLongsSerDe extends ArrayOfItemsSerDe { diff --git a/sketches/src/main/java/com/yahoo/sketches/ArrayOfNumbersSerDe.java b/sketches/src/main/java/com/yahoo/sketches/ArrayOfNumbersSerDe.java index 097ba9e34..0c63acc06 100644 --- a/sketches/src/main/java/com/yahoo/sketches/ArrayOfNumbersSerDe.java +++ b/sketches/src/main/java/com/yahoo/sketches/ArrayOfNumbersSerDe.java @@ -10,11 +10,15 @@ /** * Methods of serializing and deserializing arrays of the object version of primitive types of Number. + * *

    This class serializes numbers with a leading byte (ASCII character) indicating the type. The class keeps * the values byte aligned, even though only 3 bits are strictly necessary to encode one of * the 6 different primitives with object types that extend Number.

    + * *

    Classes handled are: Long, Integer, Short, Byte, Double, and * Float.

    + * + * @author Alex Saydakov */ public class ArrayOfNumbersSerDe extends ArrayOfItemsSerDe { diff --git a/sketches/src/main/java/com/yahoo/sketches/ArrayOfStringsSerDe.java b/sketches/src/main/java/com/yahoo/sketches/ArrayOfStringsSerDe.java index c49690ae5..e70ccd8c3 100644 --- a/sketches/src/main/java/com/yahoo/sketches/ArrayOfStringsSerDe.java +++ b/sketches/src/main/java/com/yahoo/sketches/ArrayOfStringsSerDe.java @@ -16,6 +16,8 @@ * {@link ArrayOfUtf16StringsSerDe}. In an extreme case when all strings are in ASCII, * this method is 2 times more compact, but it takes more time to encode and decode * by a factor of 1.5 to 2. + * + * @author Alex Saydakov */ public class ArrayOfStringsSerDe extends ArrayOfItemsSerDe { diff --git a/sketches/src/main/java/com/yahoo/sketches/ArrayOfUtf16StringsSerDe.java b/sketches/src/main/java/com/yahoo/sketches/ArrayOfUtf16StringsSerDe.java index 316122441..4d84a5ea8 100644 --- a/sketches/src/main/java/com/yahoo/sketches/ArrayOfUtf16StringsSerDe.java +++ b/sketches/src/main/java/com/yahoo/sketches/ArrayOfUtf16StringsSerDe.java @@ -14,6 +14,8 @@ * is a 16-bit code. The result is larger than one from {@link ArrayOfStringsSerDe}. * In an extreme case when all strings are in ASCII, the size is doubled. However it takes * less time to serialize and deserialize by a factor of 1.5 to 2. + * + * @author Alex Saydakov */ public class ArrayOfUtf16StringsSerDe extends ArrayOfItemsSerDe { diff --git a/sketches/src/main/java/com/yahoo/sketches/BoundsOnRatiosInThetaSketchedSets.java b/sketches/src/main/java/com/yahoo/sketches/BoundsOnRatiosInThetaSketchedSets.java index 31aaead64..94f08e95f 100644 --- a/sketches/src/main/java/com/yahoo/sketches/BoundsOnRatiosInThetaSketchedSets.java +++ b/sketches/src/main/java/com/yahoo/sketches/BoundsOnRatiosInThetaSketchedSets.java @@ -11,28 +11,27 @@ * This class is used to compute the bounds on the estimate of the ratio B / A, where: *
      *
    • A is a Theta Sketch of population PopA.
    • - *
    • B is a Theta Sketch of population PopB that is a proper subset of A, - * obtained by an intersection of A with some other Theta Sketch C, + *
    • B is a Theta Sketch of population PopB that is a proper subset of A, + * obtained by an intersection of A with some other Theta Sketch C, * which acts like a predicate or selection clause.
    • *
    • The estimate of the ratio PopB/PopA is * BoundsOnRatiosInThetaSketchedSets.getEstimateOfBoverA(A, B).
    • - *
    • The Upper Bound estimate on the ratio PopB/PopA is + *
    • The Upper Bound estimate on the ratio PopB/PopA is * BoundsOnRatiosInThetaSketchedSets.getUpperBoundForBoverA(A, B).
    • - *
    • The Lower Bound estimate on the ratio PopB/PopA is + *
    • The Lower Bound estimate on the ratio PopB/PopA is * BoundsOnRatiosInThetaSketchedSets.getLowerBoundForBoverA(A, B).
    • *
    - * Note: The theta of A cannot be greater than the theta of B. - * If B is formed as an intersection of A and some other set C, + * Note: The theta of A cannot be greater than the theta of B. + * If B is formed as an intersection of A and some other set C, * then the theta of B is guaranteed to be less than or equal to the theta of B. - * - * @version 0.5.1 + * * @author Kevin Lang * @author Lee Rhodes */ public final class BoundsOnRatiosInThetaSketchedSets { - + private BoundsOnRatiosInThetaSketchedSets() {} - + /** * Gets the approximate lower bound for B over A based on a 95% confidence interval * @param sketchA the sketch A @@ -43,16 +42,16 @@ public static double getLowerBoundForBoverA(Sketch sketchA, Sketch sketchB) { double thetaA = sketchA.getTheta(); double thetaB = sketchB.getTheta(); checkThetas(thetaA, thetaB); - + int countB = sketchB.getRetainedEntries(true); - int countA = (thetaB == thetaA) ? sketchA.getRetainedEntries(true) + int countA = (thetaB == thetaA) ? sketchA.getRetainedEntries(true) : sketchA.getCountLessThanTheta(thetaB); - + if (countA <= 0) return 0; - + return BoundsOnRatiosInSampledSets.getLowerBoundForBoverA(countA, countB, thetaB); } - + /** * Gets the approximate upper bound for B over A based on a 95% confidence interval * @param sketchA the sketch A @@ -65,14 +64,14 @@ public static double getUpperBoundForBoverA(Sketch sketchA, Sketch sketchB) { checkThetas(thetaA, thetaB); int countB = sketchB.getRetainedEntries(true); - int countA = (thetaB == thetaA) ? sketchA.getRetainedEntries(true) + int countA = (thetaB == thetaA) ? sketchA.getRetainedEntries(true) : sketchA.getCountLessThanTheta(thetaB); - + if (countA <= 0) return 1.0; - + return BoundsOnRatiosInSampledSets.getUpperBoundForBoverA(countA, countB, thetaB); } - + /** * Gets the estimate for B over A * @param sketchA the sketch A @@ -85,14 +84,14 @@ public static double getEstimateOfBoverA(Sketch sketchA, Sketch sketchB) { checkThetas(thetaA, thetaB); int countB = sketchB.getRetainedEntries(true); - int countA = (thetaB == thetaA) ? sketchA.getRetainedEntries(true) + int countA = (thetaB == thetaA) ? sketchA.getRetainedEntries(true) : sketchA.getCountLessThanTheta(thetaB); - + if (countA <= 0) return 0.5; - + return (double) countB / (double) countA; } - + static void checkThetas(double thetaA, double thetaB) { if (thetaB > thetaA) { throw new SketchesArgumentException("ThetaB cannot be > ThetaA."); diff --git a/sketches/src/main/java/com/yahoo/sketches/SketchesArgumentException.java b/sketches/src/main/java/com/yahoo/sketches/SketchesArgumentException.java index 4823403e9..083b160df 100644 --- a/sketches/src/main/java/com/yahoo/sketches/SketchesArgumentException.java +++ b/sketches/src/main/java/com/yahoo/sketches/SketchesArgumentException.java @@ -7,19 +7,21 @@ /** * Illegal Arguments Exception class for the library + * + * @author Lee Rhodes */ public class SketchesArgumentException extends SketchesException { private static final long serialVersionUID = 1L; - + //other constructors to be added as needed. - + /** - * Constructs a new runtime exception with the specified detail message. The cause is not - * initialized, and may subsequently be initialized by a call to + * Constructs a new runtime exception with the specified detail message. The cause is not + * initialized, and may subsequently be initialized by a call to * Throwable.initCause(java.lang.Throwable). - * - * @param message the detail message. The detail message is saved for later retrieval by the - * Throwable.getMessage() method. + * + * @param message the detail message. The detail message is saved for later retrieval by the + * Throwable.getMessage() method. */ public SketchesArgumentException(String message) { super(message); diff --git a/sketches/src/main/java/com/yahoo/sketches/SketchesException.java b/sketches/src/main/java/com/yahoo/sketches/SketchesException.java index 75e0e3303..7eeddffc3 100644 --- a/sketches/src/main/java/com/yahoo/sketches/SketchesException.java +++ b/sketches/src/main/java/com/yahoo/sketches/SketchesException.java @@ -7,37 +7,39 @@ /** * Exception class for the library + * + * @author Lee Rhodes */ public class SketchesException extends RuntimeException { private static final long serialVersionUID = 1L; - + //other constructors to be added as needed. - + /** - * Constructs a new runtime exception with the specified detail message. The cause is not - * initialized, and may subsequently be initialized by a call to + * Constructs a new runtime exception with the specified detail message. The cause is not + * initialized, and may subsequently be initialized by a call to * Throwable.initCause(java.lang.Throwable). - * - * @param message the detail message. The detail message is saved for later retrieval by the - * Throwable.getMessage() method. + * + * @param message the detail message. The detail message is saved for later retrieval by the + * Throwable.getMessage() method. */ public SketchesException(String message) { super(message); } - + /** * Constructs a new runtime exception with the specified detail message and cause. - * - *

    Note that the detail message associated with cause is not automatically incorporated + * + *

    Note that the detail message associated with cause is not automatically incorporated * in this runtime exception's detail message.

    - * - * @param message the detail message (which is saved for later retrieval by the + * + * @param message the detail message (which is saved for later retrieval by the * Throwable.getMessage() method). - * @param cause the cause (which is saved for later retrieval by the Throwable.getCause() + * @param cause the cause (which is saved for later retrieval by the Throwable.getCause() * method). (A null value is permitted, and indicates that the cause is nonexistent or unknown.) */ public SketchesException(String message, Throwable cause) { super(message, cause); } - + } diff --git a/sketches/src/main/java/com/yahoo/sketches/SketchesStateException.java b/sketches/src/main/java/com/yahoo/sketches/SketchesStateException.java index 36378b112..f106a0f8d 100644 --- a/sketches/src/main/java/com/yahoo/sketches/SketchesStateException.java +++ b/sketches/src/main/java/com/yahoo/sketches/SketchesStateException.java @@ -7,19 +7,21 @@ /** * Illegal State Exception class for the library + * + * @author Lee Rhodes */ public class SketchesStateException extends SketchesException { private static final long serialVersionUID = 1L; - + //other constructors to be added as needed. - + /** - * Constructs a new runtime exception with the specified detail message. The cause is not - * initialized, and may subsequently be initialized by a call to + * Constructs a new runtime exception with the specified detail message. The cause is not + * initialized, and may subsequently be initialized by a call to * Throwable.initCause(java.lang.Throwable). - * - * @param message the detail message. The detail message is saved for later retrieval by the - * Throwable.getMessage() method. + * + * @param message the detail message. The detail message is saved for later retrieval by the + * Throwable.getMessage() method. */ public SketchesStateException(String message) { super(message); diff --git a/sketches/src/main/java/com/yahoo/sketches/package-info.java b/sketches/src/main/java/com/yahoo/sketches/package-info.java index 36b39f259..9988b090f 100644 --- a/sketches/src/main/java/com/yahoo/sketches/package-info.java +++ b/sketches/src/main/java/com/yahoo/sketches/package-info.java @@ -1,12 +1,13 @@ /* * Copyright 2016, Yahoo! Inc. - * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root + * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root * for terms. */ /** - *

    This package is the parent package for all sketch algorithms as well as support packages - * such as the memory and hash packages. Classes at this level are used by more than - * one sub-package. + *

    This package is the parent package for all sketch algorithms. + * Classes at this level are used by more than one sub-package. + * + * @author Lee Rhodes */ package com.yahoo.sketches; diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesMergeImpl.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesMergeImpl.java index 8ba79d0c0..e1a5525fe 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesMergeImpl.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesMergeImpl.java @@ -1,5 +1,5 @@ /* - * Copyright 2016, Yahoo! Inc. Licensed under the terms of the + * Copyright 2016, Yahoo! Inc. Licensed under the terms of the * Apache License 2.0. See LICENSE file at the project root for terms. */ @@ -12,14 +12,20 @@ import com.yahoo.sketches.SketchesArgumentException; -public class DoublesMergeImpl { +/** + * Down-sampling and merge algorithms for quantiles. + * + * @author Lee Rhodes + * @author Kevin Lang + */ +class DoublesMergeImpl { /** * Merges the source sketch into the target sketch that can have a smaller value of K. * However, it is required that the ratio of the two K values be a power of 2. * I.e., source.getK() = target.getK() * 2^(nonnegative integer). * The source is not modified. - * + * * @param src The source sketch * @param tgt The target sketch */ @@ -45,7 +51,7 @@ static void downSamplingMergeInto(final HeapDoublesSketch src, final HeapDoubles tgt.update(sourceBaseBuffer[i]); } - DoublesUpdateImpl.maybeGrowLevels(nFinal, tgt); + DoublesUpdateImpl.maybeGrowLevels(nFinal, tgt); final double[] scratchBuf = new double [2 * targetK]; final double[] downBuf = new double [targetK]; @@ -66,7 +72,7 @@ static void downSamplingMergeInto(final HeapDoublesSketch src, final HeapDoubles // won't update target.n_ until the very end } } - tgt.n_ = nFinal; + tgt.n_ = nFinal; assert tgt.getN() / (2 * targetK) == tgt.getBitPattern(); // internal consistency check @@ -99,11 +105,11 @@ private static void justZipWithStride( * as discussed above. * @param keyArr array of keys * @param valArr array of values - * @param arrLen length of keyArr and valArr + * @param arrLen length of keyArr and valArr * @param blkSize size of internal sorted blocks */ //used by DoublesAuxiliary and UtilTest - static void blockyTandemMergeSort(final double[] keyArr, final long[] valArr, final int arrLen, + static void blockyTandemMergeSort(final double[] keyArr, final long[] valArr, final int arrLen, final int blkSize) { assert blkSize >= 1; if (arrLen <= blkSize) return; @@ -111,7 +117,7 @@ static void blockyTandemMergeSort(final double[] keyArr, final long[] valArr, fi if (numblks * blkSize < arrLen) numblks += 1; assert (numblks * blkSize >= arrLen); - // duplicate the input is preparation for the "ping-pong" copy reduction strategy. + // duplicate the input is preparation for the "ping-pong" copy reduction strategy. final double[] keyTmp = Arrays.copyOf(keyArr, arrLen); final long[] valTmp = Arrays.copyOf(valArr, arrLen); @@ -125,7 +131,7 @@ static void blockyTandemMergeSort(final double[] keyArr, final long[] valArr, fi * blockyTandemMergeSortRecursion() is called by blockyTandemMergeSort(). * In addition to performing the algorithm's top down recursion, * it manages the buffer swapping that eliminates most copying. - * It also maps the input's pre-sorted blocks into the subarrays + * It also maps the input's pre-sorted blocks into the subarrays * that are processed by tandemMerge(). * @param keySrc key source * @param valSrc value source @@ -137,7 +143,7 @@ static void blockyTandemMergeSort(final double[] keyArr, final long[] valArr, fi * @param arrLim array limit */ private static void blockyTandemMergeSortRecursion(final double[] keySrc, final long[] valSrc, - final double[] keyDst, final long[] valDst, final int grpStart, final int grpLen, + final double[] keyDst, final long[] valDst, final int grpStart, final int grpLen, /* indices of blocks */ final int blkSize, final int arrLim) { // Important note: grpStart and grpLen do NOT refer to positions in the underlying array. // Instead, they refer to the pre-sorted blocks, such as block 0, block 1, etc. @@ -172,7 +178,7 @@ private static void blockyTandemMergeSortRecursion(final double[] keySrc, final if (arrStart2 + arrLen2 > arrLim) arrLen2 = arrLim - arrStart2; tandemMerge(keySrc, valSrc, - arrStart1, arrLen1, + arrStart1, arrLen1, arrStart2, arrLen2, keyDst, valDst, arrStart1); // which will be arrStart3 @@ -198,22 +204,22 @@ private static void tandemMerge(final double[] keySrc, final long[] valSrc, final int arrStart3) { final int arrStop1 = arrStart1 + arrLen1; final int arrStop2 = arrStart2 + arrLen2; - + int i1 = arrStart1; int i2 = arrStart2; int i3 = arrStart3; while (i1 < arrStop1 && i2 < arrStop2) { - if (keySrc[i2] < keySrc[i1]) { + if (keySrc[i2] < keySrc[i1]) { keyDst[i3] = keySrc[i2]; valDst[i3] = valSrc[i2]; i3++; i2++; - } else { + } else { keyDst[i3] = keySrc[i1]; valDst[i3] = valSrc[i1]; i3++; i1++; } } - + if (i1 < arrStop1) { arraycopy(keySrc, i1, keyDst, i3, arrStop1 - i1); arraycopy(valSrc, i1, valDst, i3, arrStop1 - i1); @@ -223,6 +229,5 @@ private static void tandemMerge(final double[] keySrc, final long[] valSrc, arraycopy(valSrc, i2, valDst, i3, arrStop2 - i2); } } - - + } diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesPmfCdfImpl.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesPmfCdfImpl.java index 4dac04283..930f6c758 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesPmfCdfImpl.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesPmfCdfImpl.java @@ -1,5 +1,5 @@ /* - * Copyright 2016, Yahoo! Inc. Licensed under the terms of the + * Copyright 2016, Yahoo! Inc. Licensed under the terms of the * Apache License 2.0. See LICENSE file at the project root for terms. */ @@ -7,7 +7,13 @@ import java.util.Arrays; -public class DoublesPmfCdfImpl { +/** + * The PMF and CDF algorithms for quantiles. + * + * @author Lee Rhodes + * @author Kevin Lang + */ +class DoublesPmfCdfImpl { static double[] getPMFOrCDF(DoublesSketch sketch, double[] splitPoints, boolean isCDF) { long[] counters = internalBuildHistogram(sketch, splitPoints); @@ -31,7 +37,7 @@ static double[] getPMFOrCDF(DoublesSketch sketch, double[] splitPoints, boolean assert subtotal == n; //internal consistency check return result; } - + /** * Shared algorithm for both PMF and CDF functions. The splitPoints must be unique, monotonically * increasing values. @@ -75,7 +81,7 @@ static long[] internalBuildHistogram(final DoublesSketch sketch, final double[] } return counters; } - + /** * Because of the nested loop, cost is O(numSamples * numSplitPoints), which is bilinear. * This method does NOT require the samples to be sorted. @@ -86,10 +92,10 @@ static long[] internalBuildHistogram(final DoublesSketch sketch, final double[] * @param splitPoints must be unique and sorted. Number of splitPoints + 1 == counters.length. * @param counters array of counters */ - static void bilinearTimeIncrementHistogramCounters(final double[] samples, final int offset, + static void bilinearTimeIncrementHistogramCounters(final double[] samples, final int offset, final int numSamples, final long weight, final double[] splitPoints, final long[] counters) { assert (splitPoints.length + 1 == counters.length); - for (int i = 0; i < numSamples; i++) { + for (int i = 0; i < numSamples; i++) { final double sample = samples[i + offset]; int j = 0; for (j = 0; j < splitPoints.length; j++) { @@ -118,7 +124,7 @@ static void bilinearTimeIncrementHistogramCounters(final double[] samples, final * @param splitPoints must be unique and sorted. Number of splitPoints + 1 = counters.length. * @param counters array of counters */ - static void linearTimeIncrementHistogramCounters(final double[] samples, final int offset, + static void linearTimeIncrementHistogramCounters(final double[] samples, final int offset, final int numSamples, final long weight, final double[] splitPoints, final long[] counters) { int i = 0; int j = 0; @@ -138,5 +144,5 @@ static void linearTimeIncrementHistogramCounters(final double[] samples, final i counters[j] += (weight * (numSamples - i)); } } - + } diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesToByteArrayImpl.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesToByteArrayImpl.java index f89c66159..55e1ba86f 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesToByteArrayImpl.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesToByteArrayImpl.java @@ -1,5 +1,5 @@ /* - * Copyright 2016, Yahoo! Inc. Licensed under the terms of the + * Copyright 2016, Yahoo! Inc. Licensed under the terms of the * Apache License 2.0. See LICENSE file at the project root for terms. */ @@ -24,15 +24,20 @@ import com.yahoo.memory.NativeMemory; import com.yahoo.sketches.Family; -public class DoublesToByteArrayImpl { +/** + * The doubles to byte array algorithms. + * + * @author Lee Rhodes + */ +class DoublesToByteArrayImpl { static byte[] toByteArray(DoublesSketch sketch, boolean ordered, boolean compact) { boolean empty = sketch.isEmpty(); - int flags = (empty ? EMPTY_FLAG_MASK : 0) - | (ordered ? ORDERED_FLAG_MASK : 0) + int flags = (empty ? EMPTY_FLAG_MASK : 0) + | (ordered ? ORDERED_FLAG_MASK : 0) | (compact ? COMPACT_FLAG_MASK : 0); - + if (empty) { byte[] outByteArr = new byte[Long.BYTES]; Memory memOut = new NativeMemory(outByteArr); @@ -44,14 +49,14 @@ static byte[] toByteArray(DoublesSketch sketch, boolean ordered, boolean compact //not empty return combinedBufferToByteArray(sketch, ordered, compact); } - + /** * Returns a byte array, including preamble, min, max and data extracted from the Combined Buffer. * @param ordered true if the desired form of the resulting array has the base buffer sorted. * @param compact true if the desired form of the resulting array is in compact form. * @return a byte array, including preamble, min, max and data extracted from the Combined Buffer. */ - static byte[] combinedBufferToByteArray(DoublesSketch sketch, boolean ordered, + static byte[] combinedBufferToByteArray(DoublesSketch sketch, boolean ordered, boolean compact) { final int preLongs = 2; final int extra = 2; // extra space for min and max values @@ -61,7 +66,7 @@ static byte[] combinedBufferToByteArray(DoublesSketch sketch, boolean ordered, long n = sketch.getN(); double[] combinedBuffer = sketch.getCombinedBuffer(); double[] bbItemsArr = null; - + final int bbCnt = Util.computeBaseBufferItems(k, n); if (bbCnt > 0) { bbItemsArr = new double[bbCnt]; @@ -69,21 +74,21 @@ static byte[] combinedBufferToByteArray(DoublesSketch sketch, boolean ordered, if (ordered) { Arrays.sort(bbItemsArr); } } byte[] outByteArr = null; - + if (compact) { final int retainedItems = sketch.getRetainedItems(); int outBytes = (retainedItems << 3) + preBytes; outByteArr = new byte[outBytes]; - + Memory memOut = new NativeMemory(outByteArr); long cumOffset = memOut.getCumulativeOffset(0L); - + //insert preamble, min, max insertPre0(outByteArr, cumOffset, preLongs, flags, k); insertN(outByteArr, cumOffset, n); insertMinDouble(outByteArr, cumOffset, sketch.getMinValue()); insertMaxDouble(outByteArr, cumOffset, sketch.getMaxValue()); - + //insert base buffer if (bbCnt > 0) { memOut.putDoubleArray(preBytes, bbItemsArr, 0, bbCnt); @@ -102,23 +107,23 @@ static byte[] combinedBufferToByteArray(DoublesSketch sketch, boolean ordered, bits >>>= 1; } } - + } else { //not compact final int totLevels = Util.computeNumLevelsNeeded(k, n); int outBytes = (totLevels == 0) ? (bbCnt << 3) + preBytes : (((2 + totLevels) * k) << 3) + preBytes; outByteArr = new byte[outBytes]; - + Memory memOut = new NativeMemory(outByteArr); long cumOffset = memOut.getCumulativeOffset(0L); - + //insert preamble, min, max insertPre0(outByteArr, cumOffset, preLongs, flags, k); insertN(outByteArr, cumOffset, n); insertMinDouble(outByteArr, cumOffset, sketch.getMinValue()); insertMaxDouble(outByteArr, cumOffset, sketch.getMaxValue()); - + //insert base buffer if (bbCnt > 0) { memOut.putDoubleArray(preBytes, bbItemsArr, 0, bbCnt); @@ -132,8 +137,8 @@ static byte[] combinedBufferToByteArray(DoublesSketch sketch, boolean ordered, } return outByteArr; } - - static void insertPre0(byte[] outArr, long cumOffset, int preLongs, int flags, + + static void insertPre0(byte[] outArr, long cumOffset, int preLongs, int flags, int k) { insertPreLongs(outArr, cumOffset, preLongs); insertSerVer(outArr, cumOffset, DoublesSketch.DOUBLES_SER_VER); @@ -142,5 +147,5 @@ static void insertPre0(byte[] outArr, long cumOffset, int preLongs, int flags, insertK(outArr, cumOffset, k); insertSerDeId(outArr, cumOffset, DoublesSketch.ARRAY_OF_DOUBLES_SERDE_ID); } - + } diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUpdateImpl.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUpdateImpl.java index 909825fce..41ee5f905 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUpdateImpl.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUpdateImpl.java @@ -1,5 +1,5 @@ /* - * Copyright 2016, Yahoo! Inc. Licensed under the terms of the + * Copyright 2016, Yahoo! Inc. Licensed under the terms of the * Apache License 2.0. See LICENSE file at the project root for terms. */ @@ -7,6 +7,12 @@ import java.util.Arrays; +/** + * The doubles update algorithms for quantiles. + * + * @author Lee Rhodes + * @author Kevin Lang + */ public class DoublesUpdateImpl { static void growBaseBuffer(final DoublesSketch sketch) { //n has not been incremented yet @@ -18,7 +24,7 @@ static void growBaseBuffer(final DoublesSketch sketch) { //n has not been increm sketch.putCombinedBufferItemCapacity(newSize); sketch.putCombinedBuffer(Arrays.copyOf(baseBuffer, newSize)); } - + /** * Called when the base buffer has just acquired 2*k elements. * @param sketch the given quantiles sketch @@ -34,16 +40,16 @@ static void processFullBaseBuffer(final HeapDoublesSketch sketch) { maybeGrowLevels(newN, sketch); // notice that this is acquired after the possible resizing - final double[] baseBuffer = sketch.getCombinedBuffer(); + final double[] baseBuffer = sketch.getCombinedBuffer(); Arrays.sort(baseBuffer, 0, bbCount); //sort the BB inPlacePropagateCarry( 0, //starting level null, //sizeKbuf, not needed here 0, //sizeKStart, not needed here - baseBuffer, //size2Kbuf, the base buffer = the Combined Buffer + baseBuffer, //size2Kbuf, the base buffer = the Combined Buffer 0, //size2KStart - true, //doUpdateVersion + true, //doUpdateVersion sketch); //the sketch sketch.baseBufferCount_ = 0; assert newN / (2 * k) == sketch.getBitPattern(); // internal consistency check @@ -56,20 +62,20 @@ static void maybeGrowLevels(final long newN, final HeapDoublesSketch sketch) { final int numLevelsNeeded = Util.computeNumLevelsNeeded(k, newN); if (numLevelsNeeded == 0) { // don't need any levels yet, and might have small base buffer; this can happen during a merge - return; + return; } // from here on we need a full-size base buffer and at least one level assert newN >= 2L * k; - assert numLevelsNeeded > 0; + assert numLevelsNeeded > 0; final int spaceNeeded = (2 + numLevelsNeeded) * k; if (spaceNeeded <= sketch.getCombinedBufferItemCapacity()) { return; } // copies base buffer plus old levels - sketch.combinedBuffer_ = Arrays.copyOf(sketch.getCombinedBuffer(), spaceNeeded); + sketch.combinedBuffer_ = Arrays.copyOf(sketch.getCombinedBuffer(), spaceNeeded); sketch.combinedBufferItemCapacity_ = spaceNeeded; } - + static void inPlacePropagateCarry( final int startingLevel, final double[] sizeKBuf, final int sizeKStart, @@ -80,7 +86,7 @@ static void inPlacePropagateCarry( final int k = sketch.getK(); final long bitPattern = sketch.bitPattern_; //the one prior to the last increment of n_ final int endingLevel = Util.positionOfLowestZeroBitStartingAt(bitPattern, startingLevel); - + if (doUpdateVersion) { // update version of computation // its is okay for sizeKbuf to be null in this case zipSize2KBuffer( @@ -93,7 +99,7 @@ static void inPlacePropagateCarry( levelsArr, (2 + endingLevel) * k, k); } - + for (int lvl = startingLevel; lvl < endingLevel; lvl++) { assert (bitPattern & (1L << lvl)) > 0; // internal consistency check mergeTwoSizeKBuffers( @@ -110,7 +116,7 @@ static void inPlacePropagateCarry( // update bit pattern with binary-arithmetic ripple carry sketch.bitPattern_ = bitPattern + (1L << startingLevel); } - + private static void zipSize2KBuffer( final double[] bufA, final int startA, // input final double[] bufC, final int startC, // output @@ -121,7 +127,7 @@ private static void zipSize2KBuffer( bufC[c] = bufA[a]; } } - + private static void mergeTwoSizeKBuffers( final double[] keySrc1, final int arrStart1, final double[] keySrc2, final int arrStart2, @@ -134,13 +140,13 @@ private static void mergeTwoSizeKBuffers( int i2 = arrStart2; int i3 = arrStart3; while (i1 < arrStop1 && i2 < arrStop2) { - if (keySrc2[i2] < keySrc1[i1]) { + if (keySrc2[i2] < keySrc1[i1]) { keyDst[i3++] = keySrc2[i2++]; - } else { + } else { keyDst[i3++] = keySrc1[i1++]; - } + } } - + if (i1 < arrStop1) { System.arraycopy(keySrc1, i1, keyDst, i3, arrStop1 - i1); } else { @@ -148,5 +154,5 @@ private static void mergeTwoSizeKBuffers( System.arraycopy(keySrc1, i2, keyDst, i3, arrStop2 - i2); } } - + } diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUtil.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUtil.java index d9944f35b..fb31ef0c3 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUtil.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/DoublesUtil.java @@ -16,18 +16,17 @@ /** * Utilities that support the doubles quantiles algorithms. - * + * *

    This class contains a highly specialized sort called blockyTandemMergeSort(). * It also contains methods that are used while building histograms and other common * functions.

    - * - * @author Kevin Lang + * * @author Lee Rhodes */ final class DoublesUtil { - private DoublesUtil() {} - + private DoublesUtil() {} + /** * Returns an on-heap copy of the given sketch * @param sketch the given sketch @@ -46,7 +45,7 @@ static HeapDoublesSketch copy(DoublesSketch sketch) { qsCopy.combinedBuffer_ = Arrays.copyOf(combBuf, combBuf.length); return qsCopy; } - + /** * Checks the validity of the memory capacity assuming n, k and compact. * @param k the given value of k @@ -62,8 +61,8 @@ static void checkMemCapacity(int k, long n, boolean compact, long memCapBytes) { reqBufBytes = (metaPre + retainedItems) << 3; } else { //not compact int totLevels = Util.computeNumLevelsNeeded(k, n); - reqBufBytes = (totLevels == 0) - ? (metaPre + retainedItems) << 3 + reqBufBytes = (totLevels == 0) + ? (metaPre + retainedItems) << 3 : (metaPre + (2 + totLevels) * k) << 3; } if (memCapBytes < reqBufBytes) { @@ -71,7 +70,7 @@ static void checkMemCapacity(int k, long n, boolean compact, long memCapBytes) { + memCapBytes + " < " + reqBufBytes); } } - + /** * Check the validity of the given serialization version * @param serVer the given serialization version @@ -79,13 +78,13 @@ static void checkMemCapacity(int k, long n, boolean compact, long memCapBytes) { static void checkDoublesSerVer(int serVer) { int max = DoublesSketch.DOUBLES_SER_VER; int min = DoublesSketch.MIN_DOUBLES_SER_VER; - if ((serVer > max) || (serVer < min)) { + if ((serVer > max) || (serVer < min)) { throw new SketchesArgumentException( "Possible corruption: Unsupported Serialization Version: " + serVer); } } - - static String toString(final boolean sketchSummary, final boolean dataDetail, + + static String toString(final boolean sketchSummary, final boolean dataDetail, final DoublesSketch sketch) { final StringBuilder sb = new StringBuilder(); if (dataDetail) { @@ -96,26 +95,26 @@ static String toString(final boolean sketchSummary, final boolean dataDetail, } return sb.toString(); } - + static String getDataDetail(final DoublesSketch sketch) { final StringBuilder sb = new StringBuilder(); final String thisSimpleName = sketch.getClass().getSimpleName(); sb.append(LS).append("### ").append(thisSimpleName).append(" DATA DETAIL: ").append(LS); - + final int k = sketch.getK(); final long n = sketch.getN(); final int bbCount = sketch.getBaseBufferCount(); final long bitPattern = sketch.getBitPattern(); final double[] combBuf = sketch.getCombinedBuffer(); - + //output the base buffer - + sb.append(" BaseBuffer : "); - for (int i = 0; i < bbCount; i++) { + for (int i = 0; i < bbCount; i++) { sb.append(String.format("%10.1f", combBuf[i])); } sb.append(LS); - + //output all the levels int combBufSize = combBuf.length; if (n >= 2 * k) { @@ -134,7 +133,7 @@ static String getDataDetail(final DoublesSketch sketch) { sb.append("### END DATA DETAIL").append(LS); return sb.toString(); } - + static String getSummary(final DoublesSketch sketch) { final StringBuilder sb = new StringBuilder(); final String thisSimpleName = sketch.getClass().getSimpleName(); @@ -171,7 +170,7 @@ static String getSummary(final DoublesSketch sketch) { sb.append("### END SKETCH SUMMARY").append(LS); return sb.toString(); } - + static String printMemData(Memory mem, int k, int n) { if (n == 0) return ""; final StringBuilder sb = new StringBuilder(); @@ -195,5 +194,5 @@ static String printMemData(Memory mem, int k, int n) { sb.append(LS + "### END DATA DETAIL").append(LS); return sb.toString(); } - + } diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/HeapDoublesSketch.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/HeapDoublesSketch.java index c18f3607d..96d8904f4 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/HeapDoublesSketch.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/HeapDoublesSketch.java @@ -25,7 +25,6 @@ /** * Implements the DoublesSketch on the Java heap. * - * @author Kevin Lang * @author Lee Rhodes */ final class HeapDoublesSketch extends DoublesSketch { diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/ItemsUnion.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/ItemsUnion.java index 655e488e6..0ba5668fe 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/ItemsUnion.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/ItemsUnion.java @@ -12,10 +12,11 @@ /** * The API for Union operations for GenericQuantilesSketches - * + * + * @param type of item + * * @author Lee Rhodes * @author Alex Saydakov - * @param type of item */ public final class ItemsUnion { @@ -44,7 +45,7 @@ public static ItemsUnion getInstance(final Comparator comparat * @param type of item * @param k Parameter that controls space usage of sketch and accuracy of estimates. * It is recommended that k be a power of 2 to enable merging of sketches with - * different values of k. However, in this case it is only possible to merge from + * different values of k. However, in this case it is only possible to merge from * larger values of k to smaller values. * @param comparator to compare items * @return an instance of ItemsUnion @@ -56,15 +57,15 @@ public static ItemsUnion getInstance(final int k, final Comparator type of item - * @param srcMem the given srcMem. + * @param srcMem the given srcMem. * A reference to srcMem will not be maintained internally. * @param comparator to compare items * @param serDe an instance of ArrayOfItemsSerDe * @return an instance of ItemsUnion */ - public static ItemsUnion getInstance(final Memory srcMem, + public static ItemsUnion getInstance(final Memory srcMem, final Comparator comparator, final ArrayOfItemsSerDe serDe) { - final ItemsSketch gadget = ItemsSketch.getInstance(srcMem, comparator, serDe); + final ItemsSketch gadget = ItemsSketch.getInstance(srcMem, comparator, serDe); return new ItemsUnion(gadget.getK(), gadget.getComparator(), gadget); } @@ -77,19 +78,19 @@ public static ItemsUnion getInstance(final Memory srcMem, public static ItemsUnion getInstance(final ItemsSketch sketch) { return new ItemsUnion(sketch.getK(), sketch.getComparator(), sketch); } - + //@formatter:off @SuppressWarnings("null") static ItemsSketch updateLogic(final int myK, final Comparator comparator, final ItemsSketch myQS, final ItemsSketch other) { int sw1 = ((myQS == null) ? 0 : myQS.isEmpty() ? 4 : 8); sw1 |= ((other == null) ? 0 : other.isEmpty() ? 1 : 2); - int outCase = 0; //0=null, 1=NOOP, 2=copy, 3=merge + int outCase = 0; //0=null, 1=NOOP, 2=copy, 3=merge switch (sw1) { case 0: outCase = 0; break; //myQS = null, other = null ; return null case 1: outCase = 4; break; //myQS = null, other = empty; copy or downsample(myK) case 2: outCase = 2; break; //myQS = null, other = valid; copy or downsample(myK) - case 4: outCase = 1; break; //myQS = empty, other = null ; no-op + case 4: outCase = 1; break; //myQS = empty, other = null ; no-op case 5: outCase = 1; break; //myQS = empty, other = empty; no-op case 6: outCase = 3; break; //myQS = empty, other = valid; merge case 8: outCase = 1; break; //myQS = valid, other = null ; no-op @@ -137,21 +138,21 @@ static ItemsSketch updateLogic(final int myK, final Comparator * However, it is required that the ratio of the two K values be a power of 2. * I.e., source.getK() = target.getK() * 2^(nonnegative integer). * The source is not modified. - * - *

    Note: It is easy to prove that the following simplified code which launches multiple waves of - * carry propagation does exactly the same amount of merging work (including the work of - * allocating fresh buffers) as the more complicated and seemingly more efficient approach that + * + *

    Note: It is easy to prove that the following simplified code which launches multiple waves of + * carry propagation does exactly the same amount of merging work (including the work of + * allocating fresh buffers) as the more complicated and seemingly more efficient approach that * tracks a single carry propagation wave through both sketches. - * - *

    This simplified code probably does do slightly more "outer loop" work, but I am pretty - * sure that even that is within a constant factor of the more complicated code, plus the - * total amount of "outer loop" work is at least a factor of K smaller than the total amount of + * + *

    This simplified code probably does do slightly more "outer loop" work, but I am pretty + * sure that even that is within a constant factor of the more complicated code, plus the + * total amount of "outer loop" work is at least a factor of K smaller than the total amount of * merging work, which is identical in the two approaches. * - *

    Note: a two-way merge that doesn't modify either of its two inputs could be implemented + *

    Note: a two-way merge that doesn't modify either of its two inputs could be implemented * by making a deep copy of the larger sketch and then merging the smaller one into it. * However, it was decided not to do this. - * + * * @param source The source sketch * @param target The target sketch */ @@ -161,25 +162,25 @@ static void mergeInto(final ItemsSketch source, final ItemsSketch targ final int tgtK = target.getK(); final long srcN = source.getN(); final long tgtN = target.getN(); - + if (srcK != tgtK) { ItemsUtil.downSamplingMergeInto(source, target); return; } - + final Object[] srcLevels = source.getCombinedBuffer(); // aliasing is a bit dangerous final Object[] srcBaseBuffer = srcLevels; // aliasing is a bit dangerous - + final long nFinal = tgtN + srcN; - + for (int i = 0; i < source.getBaseBufferCount(); i++) { target.update((T) srcBaseBuffer[i]); } - + ItemsUtil.maybeGrowLevels(nFinal, target); - + final Object[] scratchBuf = new Object[2 * tgtK]; - + long srcBitPattern = source.getBitPattern(); assert srcBitPattern == (srcN / (2L * srcK)); for (int srcLvl = 0; srcBitPattern != 0L; srcLvl++, srcBitPattern >>>= 1) { @@ -193,9 +194,9 @@ static void mergeInto(final ItemsSketch source, final ItemsSketch targ } } target.n_ = nFinal; - + assert target.getN() / (2 * tgtK) == target.getBitPattern(); // internal consistency check - + final T srcMax = source.getMaxValue(); final T srcMin = source.getMinValue(); final T tgtMax = target.getMaxValue(); @@ -206,15 +207,15 @@ static void mergeInto(final ItemsSketch source, final ItemsSketch targ /** * Iterative union operation, which means this method can be repeatedly called. - * Merges the given sketch into this union object. + * Merges the given sketch into this union object. * The given sketch is not modified. * It is required that the ratio of the two K values be a power of 2. * This is easily satisfied if each of the K values is already a power of 2. * If the given sketch is null or empty it is ignored. - * - *

    It is required that the results of the union operation, which can be obtained at any time, + * + *

    It is required that the results of the union operation, which can be obtained at any time, * is obtained from {@link #getResult() }.

    - * + * * @param sketchIn the sketch to be merged into this one. */ public void update(final ItemsSketch sketchIn) { @@ -228,8 +229,8 @@ public void update(final ItemsSketch sketchIn) { * It is required that the ratio of the two K values be a power of 2. * This is easily satisfied if each of the K values is already a power of 2. * If the given sketch is null or empty it is ignored. - * - *

    It is required that the results of the union operation, which can be obtained at any time, + * + *

    It is required that the results of the union operation, which can be obtained at any time, * is obtained from {@link #getResult() }.

    * @param srcMem Memory image of sketch to be merged * @param serDe an instance of ArrayOfItemsSerDe @@ -240,8 +241,8 @@ public void update(final Memory srcMem, final ArrayOfItemsSerDe serDe) { } /** - * Update this union with the given double (or float) data Item. - * + * Update this union with the given double (or float) data Item. + * * @param dataItem The given datum. */ public void update(final T dataItem) { @@ -261,9 +262,9 @@ public ItemsSketch getResult() { } /** - * Gets the result of this Union operation (without a copy) and resets this Union to the + * Gets the result of this Union operation (without a copy) and resets this Union to the * virgin state. - * + * * @return the result of this Union operation and reset. */ public ItemsSketch getResultAndReset() { diff --git a/sketches/src/main/java/com/yahoo/sketches/quantiles/Util.java b/sketches/src/main/java/com/yahoo/sketches/quantiles/Util.java index 6fb5dc5d9..d9535f2c1 100644 --- a/sketches/src/main/java/com/yahoo/sketches/quantiles/Util.java +++ b/sketches/src/main/java/com/yahoo/sketches/quantiles/Util.java @@ -17,16 +17,15 @@ /** * Utility class for quantiles sketches. - * + * *

    This class contains a highly specialized sort called blockyTandemMergeSort(). * It also contains methods that are used while building histograms and other common * functions.

    - * - * @author Kevin Lang + * * @author Lee Rhodes */ final class Util { - + private Util() {} static final int MIN_BASE_BUF_SIZE = 4; @@ -35,12 +34,12 @@ private Util() {} * The java line separator character as a String. */ public static final String LS = System.getProperty("line.separator"); - + /** * The tab character */ public static final char TAB = '\t'; - + /** * Checks the validity of the given value k * @param k must be greater than 0 and less than 65536. @@ -66,7 +65,7 @@ static void checkFamilyID(int familyID) { /** * Checks the consistency of the flag bits and the state of preambleLong and the memory * capacity and returns the empty state. - * @param preambleLongs the size of preamble in longs + * @param preambleLongs the size of preamble in longs * @param flags the flags field * @param memCapBytes the memory capacity * @return the value of the empty state @@ -91,9 +90,9 @@ static boolean checkPreLongsFlagsCap(int preambleLongs, int flags, long memCapBy /** * Checks just the flags field of the preamble * @param flags the flags field - */ + */ static void checkFlags(int flags) { //only used by checkPreLongsFlagsCap and test - int allowedFlags = + int allowedFlags = READ_ONLY_FLAG_MASK | EMPTY_FLAG_MASK | COMPACT_FLAG_MASK | ORDERED_FLAG_MASK; int flagsMask = ~allowedFlags; if ((flags & flagsMask) > 0) { @@ -103,7 +102,7 @@ static void checkFlags(int flags) { //only used by checkPreLongsFlagsCap and te } /** - * Checks the sequential validity of the given array of fractions. + * Checks the sequential validity of the given array of fractions. * They must be unique, monotonically increasing and not NaN, not < 0 and not > 1.0. * @param fractions array */ @@ -124,7 +123,7 @@ static final void validateFractions(double[] fractions) { } /** - * Checks the sequential validity of the given array of double values. + * Checks the sequential validity of the given array of double values. * They must be unique, monotonically increasing and not NaN. * @param values the given array of double values */ @@ -139,7 +138,7 @@ static final void validateValues(final double[] values) { "Values must be unique, monotonically increasing and not NaN."); } } - + /** * Returns the number of retained items in the sketch given k and n. * @param k the given configured k of the sketch @@ -152,15 +151,15 @@ static int computeRetainedItems(int k, long n) { int validLevels = Long.bitCount(bitPattern); return bbCnt + validLevels * k; } - + /** - * Returns the current item capacity of the non-compact, expanded combined data buffer - * given k and n. If total levels = 0, this returns the ceiling power of 2 + * Returns the current item capacity of the non-compact, expanded combined data buffer + * given k and n. If total levels = 0, this returns the ceiling power of 2 * size for the base buffer or the MIN_BASE_BUF_SIZE, whichever is larger. - * - * @param k sketch parameter. This determines the accuracy of the sketch and the + * + * @param k sketch parameter. This determines the accuracy of the sketch and the * size of the updatable data structure, which is a function of k and n. - * + * * @param n The number of items in the input stream * @return the current item capacity of the combined data buffer */ @@ -175,7 +174,7 @@ static int computeExpandedCombinedBufferItemCapacity(int k, long n) { } return ret; } - + /** * Computes the number of valid levels above the base buffer * @param bitPattern the bit pattern for valid log levels @@ -196,12 +195,12 @@ static int computeValidLevels(long bitPattern) { static int computeNumLevelsNeeded(int k, long n) { return 1 + hiBitPos(n / (2L * k)); } - + /** * Computes the number of base buffer items given k, n * @param k the configured size of the sketch * @param n the total values presented to the sketch - * @return the number of base buffer items + * @return the number of base buffer items */ static int computeBaseBufferItems(int k, long n) { return (int) (n % (2L * k)); @@ -228,7 +227,7 @@ static double lg(double x) { } /** - * Zero based position of the highest one-bit of the given long. + * Zero based position of the highest one-bit of the given long. * Returns minus one if num is zero. * @param num the given long * @return Zero based position of the highest one-bit of the given long @@ -283,7 +282,7 @@ static class EpsilonFromK { /** * A heuristic fudge factor that causes the inverted formula to better match the empirical. - * The value of 4/3 is directly associated with the deltaForEps value of 0.01. + * The value of 4/3 is directly associated with the deltaForEps value of 0.01. * Don't touch this! */ private static final double adjustKForEps = 4.0 / 3.0; // fudge factor @@ -294,7 +293,7 @@ static class EpsilonFromK { private static final double bracketedBinarySearchForEpsTol = 1e-15; /** - * From extensive empirical testing we recommend most users use this method for deriving + * From extensive empirical testing we recommend most users use this method for deriving * epsilon. This uses a fudge factor of 4/3 times the theoretical calculation of epsilon. * @param k the given k that must be greater than one. * @return the resulting epsilon @@ -306,13 +305,13 @@ static double getAdjustedEpsilon(int k) { //used by HeapQS, so far /** * Finds the epsilon given K and a fudge factor. - * See Cormode's Mergeable Summaries paper, Journal version, Theorem 3.6. - * This has a good fit between values of k between 16 and 1024. + * See Cormode's Mergeable Summaries paper, Journal version, Theorem 3.6. + * This has a good fit between values of k between 16 and 1024. * Beyond that has not been empirically tested. * @param k The given value of k - * @param ff The given fudge factor. No fudge factor = 1.0. + * @param ff The given fudge factor. No fudge factor = 1.0. * @return the resulting epsilon - */ + */ private static double getTheoreticalEpsilon(int k, double ff) { //used only by getAdjustedEpsilon() if (k < 2) { throw new SketchesArgumentException("K must be greater than one."); diff --git a/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirItemsSketch.java b/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirItemsSketch.java index 770125ee3..871b27f67 100644 --- a/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirItemsSketch.java +++ b/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirItemsSketch.java @@ -31,8 +31,9 @@ * uniform random sample of unweighted items from the stream. * * @param The type of object held in the reservoir. - * @author jmalkin - * @author langk + * + * @author Jon Malkin + * @author Kevin Lang */ public class ReservoirItemsSketch { diff --git a/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirLongsSketch.java b/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirLongsSketch.java index 78c925dd1..0226acaff 100644 --- a/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirLongsSketch.java +++ b/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirLongsSketch.java @@ -27,8 +27,8 @@ * This sketch provides a reservoir sample over an input stream of longs. The sketch * contains a uniform random sample of items from the stream. * - * @author jmalkin - * @author langk + * @author Jon Malkin + * @author Kevin Lang */ public class ReservoirLongsSketch { diff --git a/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirSize.java b/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirSize.java index 5e71f04d0..fc9a81544 100644 --- a/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirSize.java +++ b/sketches/src/main/java/com/yahoo/sketches/sampling/ReservoirSize.java @@ -18,7 +18,7 @@ *

    NOTE: Numerical instability may cause an off-by-one error on reservoir size, causing a * slight increase in storage over the optimal value.

    * - * @author jmalkin + * @author Jon Malkin */ final class ReservoirSize { /** diff --git a/sketches/src/main/java/com/yahoo/sketches/sampling/package-info.java b/sketches/src/main/java/com/yahoo/sketches/sampling/package-info.java index 0f8eb7f80..63c92881a 100644 --- a/sketches/src/main/java/com/yahoo/sketches/sampling/package-info.java +++ b/sketches/src/main/java/com/yahoo/sketches/sampling/package-info.java @@ -5,8 +5,8 @@ */ /** - *

    This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of unweighted items - * from a stream.

    + *

    This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of + * unweighted items from a stream.

    * *

    These sketches are mergeable and can be serialized and deserialized to/from a compact * form.

    diff --git a/sketches/src/main/java/com/yahoo/sketches/theta/PairwiseSetOperations.java b/sketches/src/main/java/com/yahoo/sketches/theta/PairwiseSetOperations.java index 0b9c53f75..d8fe4a61a 100644 --- a/sketches/src/main/java/com/yahoo/sketches/theta/PairwiseSetOperations.java +++ b/sketches/src/main/java/com/yahoo/sketches/theta/PairwiseSetOperations.java @@ -10,29 +10,31 @@ import com.yahoo.sketches.SketchesArgumentException; import com.yahoo.sketches.Util; +/** + * Set Operations where the arguments are presented in pairs as in C = Op(A,B). These are + * stateless operations and the result is returned immediately. These operations are designed for + * high performance and only accept ordered, CompactSketches that may be either Heap-based or + * Direct. The returned results are always in the form of a Heap-based, ordered CompactSketch. + * + * @author Lee Rhodes + */ public class PairwiseSetOperations { /** - * This implements a stateless, pair-wise intersection on Sketches that are already compact and - * ordered. This will work with sketches that are either on-heap or off-heap. + * This implements a stateless, pair-wise intersection operation on ordered, + * CompactSketches that are either Heap-based or Direct. * - * @param skA The first sketch argument. Must be compact, ordered and not null. - * @param skB The second sketch argument. Must be compact, ordered and not null. - * @return the result of the intersection as a heap, compact, ordered sketch. + * @param skA The first ordered, CompactSketch argument that must not be null. + * @param skB The second ordered, CompactSketch argument that must not be null. + * @return the result as a Heap-based, ordered CompactSketch. */ - public static Sketch intersect(Sketch skA, Sketch skB) { - if (!skA.isCompact() || !skA.isOrdered() || !skB.isCompact() || !skB.isOrdered()) { - throw new SketchesArgumentException("Require compact, ordered sketch, got: " - + skA.getClass().getSimpleName() + ", " + skB.getClass().getSimpleName()); - } - short seedHashA = skA.getSeedHash(); - short seedHashB = skB.getSeedHash(); - Util.checkSeedHashes(seedHashA, seedHashB); + public static CompactSketch intersect(CompactSketch skA, CompactSketch skB) { + final short seedHash = checkOrderedAndSeedHash(skA, skB); long thetaLong = Math.min(skA.getThetaLong(), skB.getThetaLong()); //Theta rule int indexA = 0; int indexB = 0; - int count = 0; + int outCount = 0; long[] cacheA = skA.getCache(); long[] cacheB = skB.getCache(); @@ -48,7 +50,7 @@ public static Sketch intersect(Sketch skA, Sketch skB) { } if (hashA == hashB) { - outCache[count++] = hashA; + outCache[outCount++] = hashA; ++indexA; ++indexB; } else if (hashA < hashB) { @@ -58,35 +60,27 @@ public static Sketch intersect(Sketch skA, Sketch skB) { } } - boolean empty = skA.isEmpty() || skB.isEmpty(); //empty rule is OR + boolean empty = skA.isEmpty() || skB.isEmpty(); //Empty rule is OR return new HeapCompactOrderedSketch( - Arrays.copyOf(outCache, count), empty, seedHashA, count, thetaLong); + Arrays.copyOf(outCache, outCount), empty, seedHash, outCount, thetaLong); } - - /** - * This implements a stateless, pair-wise A AND NOT B operation on Sketches that are - * already compact and ordered. This will work with sketches that are either on-heap or off-heap. + * This implements a stateless, pair-wise A AND NOT B operation on ordered, + * CompactSketches that are either Heap-based or Direct. * - * @param skA The first sketch argument. Must be compact, ordered and not null. - * @param skB The second sketch argument. Must be compact, ordered and not null. - * @return the result of the A AND NOT B as a heap, compact, ordered sketch. + * @param skA The first ordered, CompactSketch argument that must not be null. + * @param skB The second ordered, CompactSketch argument that must not be null. + * @return the result as a Heap-based, ordered CompactSketch. */ - public static Sketch aNotB(Sketch skA, Sketch skB) { - if (!skA.isCompact() || !skA.isOrdered() || !skB.isCompact() || !skB.isOrdered()) { - throw new SketchesArgumentException("Require compact, ordered sketch, got: " - + skA.getClass().getSimpleName() + ", " + skB.getClass().getSimpleName()); - } - short seedHashA = skA.getSeedHash(); - short seedHashB = skB.getSeedHash(); - Util.checkSeedHashes(seedHashA, seedHashB); + public static CompactSketch aNotB(CompactSketch skA, CompactSketch skB) { + final short seedHash = checkOrderedAndSeedHash(skA, skB); long thetaLong = Math.min(skA.getThetaLong(), skB.getThetaLong()); //Theta rule int indexA = 0; int indexB = 0; - int count = 0; + int outCount = 0; long[] cacheA = skA.getCache(); long[] cacheB = skB.getCache(); @@ -105,40 +99,34 @@ public static Sketch aNotB(Sketch skA, Sketch skB) { ++indexA; ++indexB; } else if (hashA < hashB) { - outCache[count++] = hashA; + outCache[outCount++] = hashA; ++indexA; } else { ++indexB; } } - boolean empty = skA.isEmpty(); + boolean empty = skA.isEmpty(); //Empty rule is whatever A is return new HeapCompactOrderedSketch( - Arrays.copyOf(outCache, count), empty, seedHashA, count, thetaLong); + Arrays.copyOf(outCache, outCount), empty, seedHash, outCount, thetaLong); } /** - * This implements a stateless, pair-wise union on Sketches that are already compact and - * ordered. This will work with sketches that are either on-heap or off-heap. + * This implements a stateless, pair-wise union operation on ordered, + * CompactSketches that are either Heap-based or Direct. * - * @param skA The first sketch argument. Must be compact, ordered and not null. - * @param skB The second sketch argument. Must be compact, ordered and not null. - * @return the result of the union as a heap, compact, ordered sketch. + * @param skA The first ordered, CompactSketch argument that must not be null. + * @param skB The second ordered, CompactSketch argument that must not be null. + * @return the result as a Heap-based, ordered CompactSketch. */ - public static Sketch union(Sketch skA, Sketch skB) { - if (!skA.isCompact() || !skA.isOrdered() || !skB.isCompact() || !skB.isOrdered()) { - throw new SketchesArgumentException("Require compact, ordered sketch, got: " - + skA.getClass().getSimpleName() + ", " + skB.getClass().getSimpleName()); - } - short seedHashA = skA.getSeedHash(); - short seedHashB = skB.getSeedHash(); - Util.checkSeedHashes(seedHashA, seedHashB); + public static CompactSketch union(CompactSketch skA, CompactSketch skB) { + final short seedHash = checkOrderedAndSeedHash(skA, skB); long thetaLong = Math.min(skA.getThetaLong(), skB.getThetaLong()); //Theta rule int indexA = 0; int indexB = 0; - int count = 0; + int outCount = 0; long[] cacheA = skA.getCache(); long[] cacheB = skB.getCache(); @@ -154,22 +142,33 @@ public static Sketch union(Sketch skA, Sketch skB) { } if (hashA == hashB) { - outCache[count++] = hashA; + outCache[outCount++] = hashA; ++indexA; ++indexB; } else if (hashA < hashB) { - outCache[count++] = hashA; + outCache[outCount++] = hashA; ++indexA; } else { - outCache[count++] = hashB; + outCache[outCount++] = hashB; ++indexB; } } - boolean empty = skA.isEmpty() || skB.isEmpty(); //empty rule is OR + boolean empty = skA.isEmpty() && skB.isEmpty(); //Empty rule is AND return new HeapCompactOrderedSketch( - Arrays.copyOf(outCache, count), empty, seedHashA, count, thetaLong); + Arrays.copyOf(outCache, outCount), empty, seedHash, outCount, thetaLong); } + private static final short checkOrderedAndSeedHash( + final CompactSketch skA, final CompactSketch skB) { + if (!skA.isOrdered() || !skB.isOrdered()) { + throw new SketchesArgumentException("Sketch must be ordered, got: " + + skA.getClass().getSimpleName() + ", " + skB.getClass().getSimpleName()); + } + short seedHashA = skA.getSeedHash(); + short seedHashB = skB.getSeedHash(); + Util.checkSeedHashes(seedHashA, seedHashB); + return seedHashA; + } } diff --git a/sketches/src/main/java/com/yahoo/sketches/theta/Rebuilder.java b/sketches/src/main/java/com/yahoo/sketches/theta/Rebuilder.java index f39dfb6aa..e6273aee6 100644 --- a/sketches/src/main/java/com/yahoo/sketches/theta/Rebuilder.java +++ b/sketches/src/main/java/com/yahoo/sketches/theta/Rebuilder.java @@ -16,53 +16,55 @@ import com.yahoo.sketches.Util; /** - * Handles common resize, rebuild and move operations. + * Handles common resize, rebuild and move operations. * The Memory based operations assume a specific data structure that is unique to the theta sketches. + * + * @author Lee Rhodes */ final class Rebuilder { - + private Rebuilder() {} - + /** * Rebuild the hashTable in the given Memory at its current size. Changes theta and thus count. * This assumes a Memory preamble of standard form with correct values of curCount and thetaLong. * ThetaLong and curCount will change. * Afterwards, caller must update local class members curCount and thetaLong from Memory. - * + * * @param mem the Memory the given Memory * @param preambleLongs size of preamble in longs * @param lgNomLongs the log_base2 of k, the configuration parameter of the sketch * @param lgArrLongs the log_base2 of the current size of the hash table * @param curCount the number of valid entries */ - static final void quickSelectAndRebuild(final Memory mem, final int preambleLongs, + static final void quickSelectAndRebuild(final Memory mem, final int preambleLongs, final int lgNomLongs, final int lgArrLongs, int curCount) { //Pull data into tmp arr for QS algo int arrLongs = 1 << lgArrLongs; long[] tmpArr = new long[arrLongs]; int preBytes = preambleLongs << 3; mem.getLongArray(preBytes, tmpArr, 0, arrLongs); //copy mem data to tmpArr - + //Do the QuickSelect on a tmp arr to create new thetaLong int pivot = (1 << lgNomLongs) + 1; // (K+1) pivot for QS long newThetaLong = selectExcludingZeros(tmpArr, curCount, pivot); mem.putLong(THETA_LONG, newThetaLong); //UPDATE thetalong - + //Rebuild to clean up dirty data, update count long[] tgtArr = new long[arrLongs]; int newCurCount = HashOperations.hashArrayInsert(tmpArr, tgtArr, lgArrLongs, newThetaLong); mem.putInt(RETAINED_ENTRIES_INT, newCurCount); //UPDATE curCount - + //put the rebuilt array back into memory mem.putLongArray(preBytes, tgtArr, 0, arrLongs); } - + /** * Moves me (the entire sketch) to a new larger Memory location and rebuilds the hash table. - * This assumes a Memory preamble of standard form with the correct value of thetaLong. - * Afterwards, the caller must update the local Memory reference, lgArrLongs + * This assumes a Memory preamble of standard form with the correct value of thetaLong. + * Afterwards, the caller must update the local Memory reference, lgArrLongs * and hashTableThreshold from the dstMemory and free the source Memory. - * + * * @param srcMem the source Memory * @param preambleLongs size of preamble in longs * @param srcLgArrLongs size (log_base2) of source hash table @@ -70,7 +72,7 @@ static final void quickSelectAndRebuild(final Memory mem, final int preambleLong * @param dstLgArrLongs the destination hash table target size * @param thetaLong theta as a long */ - static final void moveAndResize(final Memory srcMem, final int preambleLongs, + static final void moveAndResize(final Memory srcMem, final int preambleLongs, final int srcLgArrLongs, final Memory dstMem, final int dstLgArrLongs, final long thetaLong) { //Move Preamble to destination memory int preBytes = preambleLongs << 3; @@ -88,20 +90,20 @@ static final void moveAndResize(final Memory srcMem, final int preambleLongs, dstMem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); dstMem.putByte(LG_ARR_LONGS_BYTE, (byte)dstLgArrLongs); //update in dstMem } - + /** * Resizes existing hash array into a larger one within a single Memory assuming enough space. - * This assumes a Memory preamble of standard form with the correct value of thetaLong. + * This assumes a Memory preamble of standard form with the correct value of thetaLong. * The Memory lgArrLongs will change. * Afterwards, the caller must update local copies of lgArrLongs and hashTableThreshold from * Memory. - * + * * @param mem the Memory * @param preambleLongs the size of the preamble in longs * @param srcLgArrLongs the size of the source hash table * @param dstLgArrLongs the LgArrLongs value for the new hash table */ - static final void resize(final Memory mem, final int preambleLongs, + static final void resize(final Memory mem, final int preambleLongs, final int srcLgArrLongs, final int dstLgArrLongs) { //Preamble stays in place int preBytes = preambleLongs << 3; @@ -119,9 +121,9 @@ static final void resize(final Memory mem, final int preambleLongs, mem.putLongArray(preBytes, dstHTArr, 0, dstHTLen); //put it back, no need to clear mem.putByte(LG_ARR_LONGS_BYTE, (byte) dstLgArrLongs); //update in mem } - + /** - * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be + * Returns the actual log2 Resize Factor that can be used to grow the hash table. This will be * an integer value between zero and the given lgRF, inclusive; * @param capBytes the current memory capacity in bytes * @param lgArrLongs the current lg hash table size in longs @@ -134,5 +136,5 @@ static final int actLgResizeFactor(long capBytes, int lgArrLongs, int preLongs, int lgFactor = Math.max(Integer.numberOfTrailingZeros(maxHTLongs) - lgArrLongs, 0); return (lgFactor >= lgRF) ? lgRF : lgFactor; } - + } diff --git a/sketches/src/test/java/com/yahoo/sketches/theta/PairwiseSetOperationsTest.java b/sketches/src/test/java/com/yahoo/sketches/theta/PairwiseSetOperationsTest.java index 38fff0481..aceee3d57 100644 --- a/sketches/src/test/java/com/yahoo/sketches/theta/PairwiseSetOperationsTest.java +++ b/sketches/src/test/java/com/yahoo/sketches/theta/PairwiseSetOperationsTest.java @@ -85,24 +85,14 @@ public void checkIntersectionEarlyStop() { CompactSketch csk3 = inter.getResult(true, null); double result2 = csk3.getEstimate(); - println(result1 + ", "+ result2); assertEquals(result1, result2, 0.0); + usk1.reset(); usk2.reset(); inter.reset(); } } - @Test(expectedExceptions = SketchesArgumentException.class) - public void checkIntersectionBadArguments() { - int lgK = 10; - int k = 1<