From 0521a13ac86a1e73dec166392b1c16898a5bd858 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 24 Dec 2023 12:37:35 -0800 Subject: [PATCH 1/4] First implementation of weighted update for KllDoubles. Not fully tested for off heap yet. Not implemented for floats or items yet. --- .../org/apache/datasketches/common/Util.java | 15 ++ .../datasketches/kll/KllDoublesHelper.java | 66 ++++--- .../datasketches/kll/KllDoublesSketch.java | 18 +- .../kll/KllHeapDoublesSketch.java | 28 +++ .../apache/datasketches/kll/KllHelper.java | 46 ++--- .../apache/datasketches/kll/KllSketch.java | 2 +- .../quantilescommon/QuantilesDoublesAPI.java | 2 +- .../datasketches/kll/KllMiscDoublesTest.java | 163 +++++++++++++----- 8 files changed, 246 insertions(+), 94 deletions(-) diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index f713171e6..558a8d4a4 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -729,6 +729,19 @@ public static boolean isOdd(final long n) { return (n & 1L) == 1L; } + //Other + + /** + * Returns a one if the bit at bitPos is a one, otherwise zero. + * @param number the number to examine + * @param bitPos the given zero-based bit position, where the least significant + * bit is at position zero. + * @return a one if the bit at bitPos is a one, otherwise zero. + */ + public static final int bitAt(final long number, final int bitPos) { + return (number & (1L << bitPos)) > 0 ? 1 : 0; + } + /** * Computes the number of decimal digits of the number n * @param n the given number @@ -756,6 +769,8 @@ public static String intToFixedLengthString(final int number, final int length) return characterPad(num, length, ' ', false); } + //Generic tests + /** * Finds the minimum of two generic items * @param the type diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index a3effa917..e92709463 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -39,6 +39,20 @@ // final class KllDoublesHelper { + /** + * Create Items Array from given item and weight. + * Used with weighted update only. + * @param item the given item + * @param weight the given weight + * @return the Items Array. + */ + static double[] createItemsArray(final double item, final int weight) { + final int itemsArrLen = Integer.bitCount(weight); + final double[] itemsArr = new double[itemsArrLen]; + Arrays.fill(itemsArr, item); + return itemsArr; + } + /** * The following code is only valid in the special case of exactly reaching capacity while updating. * It cannot be used while merging, while reducing k, or anything else. @@ -135,12 +149,12 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, } } - //After the level 0 update, we capture the intermediate state of levels and items arrays... + //After the level 0 update, we capture the intermediate state of my levels and items arrays... final int myCurNumLevels = mySketch.getNumLevels(); final int[] myCurLevelsArr = mySketch.levelsArr; final double[] myCurDoubleItemsArr = mySketch.getDoubleItemsArray(); - // then rename them and initialize in case there are no higher levels + // create aliases in case there are no higher levels int myNewNumLevels = myCurNumLevels; int[] myNewLevelsArr = myCurLevelsArr; double[] myNewDoubleItemsArr = myCurDoubleItemsArr; @@ -150,12 +164,13 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, final int tmpSpaceNeeded = mySketch.getNumRetained() + KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr); final double[] workbuf = new double[tmpSpaceNeeded]; - final int ub = KllHelper.ubOnNumLevels(finalN); - final int[] worklevels = new int[ub + 2]; // ub+1 does not work - final int[] outlevels = new int[ub + 2]; final int provisionalNumLevels = max(myCurNumLevels, otherNumLevels); + final int ub = max(KllHelper.ubOnNumLevels(finalN), provisionalNumLevels); + final int[] worklevels = new int[ub + 2]; // ub+1 does not work + final int[] outlevels = new int[ub + 2]; + populateDoubleWorkArrays(workbuf, worklevels, provisionalNumLevels, myCurNumLevels, myCurLevelsArr, myCurDoubleItemsArr, otherNumLevels, otherLevelsArr, otherDoubleItemsArr); @@ -199,7 +214,7 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewDoubleItemsArr.length); mySketch.setWritableMemory(wmem); } - } + } //end of updating levels above level 0 //Update Preamble: mySketch.setN(finalN); @@ -225,7 +240,7 @@ static void mergeDoubleImpl(final KllDoublesSketch mySketch, assert KllHelper.sumTheSampleWeights(mySketch.getNumLevels(), mySketch.levelsArr) == mySketch.getN(); } - private static void mergeSortedDoubleArrays( + private static void mergeSortedDoubleArrays( //only bufC is modified final double[] bufA, final int startA, final int lenA, final double[] bufB, final int startB, final int lenB, final double[] bufC, final int startC) { @@ -299,8 +314,7 @@ private static void randomlyHalveUpDoubles(final double[] buf, final int start, } //Called from KllDoublesSketch::update and this - static void updateDouble(final KllDoublesSketch dblSk, - final double item) { + static void updateDouble(final KllDoublesSketch dblSk, final double item) { if (Double.isNaN(item)) { return; } //ignore if (dblSk.isEmpty()) { dblSk.setMinItem(item); @@ -445,32 +459,36 @@ private static int[] generalDoublesCompress( return new int[] {numLevels, targetItemCount, currentItemCount}; } - private static void populateDoubleWorkArrays( - final double[] workbuf, final int[] worklevels, final int provisionalNumLevels, + private static void populateDoubleWorkArrays( //workBuf and workLevels are modified + final double[] workBuf, final int[] workLevels, final int provisionalNumLevels, final int myCurNumLevels, final int[] myCurLevelsArr, final double[] myCurDoubleItemsArr, final int otherNumLevels, final int[] otherLevelsArr, final double[] otherDoubleItemsArr) { - worklevels[0] = 0; + workLevels[0] = 0; - // Note: the level zero data from "other" was already inserted into "self" + // Note: the level zero data from "other" was already inserted into "self", + // This copies into workbuf. final int selfPopZero = KllHelper.currentLevelSizeItems(0, myCurNumLevels, myCurLevelsArr); - System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[0], workbuf, worklevels[0], selfPopZero); - worklevels[1] = worklevels[0] + selfPopZero; + System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[0], workBuf, workLevels[0], selfPopZero); + workLevels[1] = workLevels[0] + selfPopZero; for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { final int selfPop = KllHelper.currentLevelSizeItems(lvl, myCurNumLevels, myCurLevelsArr); final int otherPop = KllHelper.currentLevelSizeItems(lvl, otherNumLevels, otherLevelsArr); - worklevels[lvl + 1] = worklevels[lvl] + selfPop + otherPop; - - if (selfPop > 0 && otherPop == 0) { - System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[lvl], workbuf, worklevels[lvl], selfPop); - } else if (selfPop == 0 && otherPop > 0) { - System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workbuf, worklevels[lvl], otherPop); - } else if (selfPop > 0 && otherPop > 0) { - mergeSortedDoubleArrays( + workLevels[lvl + 1] = workLevels[lvl] + selfPop + otherPop; + + if (selfPop == 0 && otherPop == 0) { continue; } + else if (selfPop > 0 && otherPop == 0) { + System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[lvl], workBuf, workLevels[lvl], selfPop); + } + else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherDoubleItemsArr, otherLevelsArr[lvl], workBuf, workLevels[lvl], otherPop); + } + else if (selfPop > 0 && otherPop > 0) { + mergeSortedDoubleArrays( //only workbuf is modified myCurDoubleItemsArr, myCurLevelsArr[lvl], selfPop, otherDoubleItemsArr, otherLevelsArr[lvl], otherPop, - workbuf, worklevels[lvl]); + workBuf, workLevels[lvl]); } } } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index f8cd538e6..d007535c9 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -276,9 +276,8 @@ public QuantilesDoublesSketchIterator iterator() { @Override public final void merge(final KllSketch other) { if (readOnly || sketchStructure != UPDATABLE) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } - final KllDoublesSketch othDblSk = (KllDoublesSketch)other; - if (othDblSk.isEmpty()) { return; } - KllDoublesHelper.mergeDoubleImpl(this, othDblSk); + if (other.isEmpty()) { return; } + KllDoublesHelper.mergeDoubleImpl(this, (KllDoublesSketch)other); kllDoublesSV = null; } @@ -324,6 +323,19 @@ public void update(final double item) { kllDoublesSV = null; } + /** + * Updates this sketch with the given item the number of times specified by the given weight. + * @param item the item to be repeated. NaNs are ignored. + * @param weight the number of times the update of item is to be repeated. It must be ≥ one. + */ + public void weightedUpdate(final double item, final int weight) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (weight < 1) { throw new SketchesArgumentException("Weight is less than one."); } + if (Double.isNaN(item)) { return; } //ignore + KllHeapDoublesSketch.weightedUpdateDouble(this, item, weight); + kllDoublesSV = null; + } + //restricted /** diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java index df81a34c8..8e0ef93d5 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java @@ -81,6 +81,25 @@ final class KllHeapDoublesSketch extends KllDoublesSketch { this.doubleItems = new double[k]; } + /** + * Used for creating a temporary sketch for use with weighted updates. + */ + KllHeapDoublesSketch(final int k, final int m, final double item, final int weight) { + super(UPDATABLE); + KllHelper.checkM(m); + KllHelper.checkK(k, m); + this.levelsArr = KllHelper.createLevelsArray(weight); + this.readOnly = false; + this.k = k; + this.m = m; + this.n = weight; + this.minK = k; + this.isLevelZeroSorted = false; + this.minDoubleItem = item; + this.maxDoubleItem = item; + this.doubleItems = KllDoublesHelper.createItemsArray(item, weight); + } + /** * Heapify constructor. * @param srcMem Memory object that contains data serialized by this sketch. @@ -282,4 +301,13 @@ void setNumLevels(final int numLevels) { @Override void setWritableMemory(final WritableMemory wmem) { } + static void weightedUpdateDouble(final KllDoublesSketch dblSk, final double item, final int weight) { + if (weight < dblSk.getLevelsArray(UPDATABLE)[0]) { + for (int i = 0; i < weight; i++) { dblSk.update(item); } + } else { + final KllHeapDoublesSketch tmpSk = new KllHeapDoublesSketch(dblSk.getK(), DEFAULT_M, item, weight); + dblSk.merge(tmpSk); + } + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 00cf2793b..6ef4197d2 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -28,6 +28,7 @@ import static java.lang.Math.pow; import static java.lang.Math.round; import static org.apache.datasketches.common.Family.KLL; +import static org.apache.datasketches.common.Util.bitAt; import static org.apache.datasketches.common.Util.floorPowerOf2; import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK; @@ -93,7 +94,7 @@ static class LevelStats { /** * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index */ - private static long[] powersOfThree = + static long[] powersOfThree = new long[] {1, 3, 9, 27, 81, 243, 729, 2187, 6561, 19683, 59049, 177147, 531441, 1594323, 4782969, 14348907, 43046721, 129140163, 387420489, 1162261467, 3486784401L, 10460353203L, 31381059609L, 94143178827L, 282429536481L, @@ -150,6 +151,23 @@ public static long convertToCumulative(final long[] array) { return subtotal; } + /** + * Create the Levels Array from given weight + * Used with weighted update only. + * @param weight the given weight + * @return the Levels Array + */ + static int[] createLevelsArray(final int weight) { + final int numLevels = 32 - Integer.numberOfLeadingZeros(weight); + final int[] levelsArr = new int[numLevels + 1]; //always one more than numLevels + int itemsArrIndex = 0; + levelsArr[0] = itemsArrIndex; + for (int level = 0; level < numLevels; level++) { + levelsArr[level + 1] = itemsArrIndex += bitAt(weight, level); + } + return levelsArr; + } + static int currentLevelSizeItems(final int level, final int numLevels, final int[] levels) { if (level >= numLevels) { return 0; } return levels[level + 1] - levels[level]; @@ -180,7 +198,9 @@ static LevelStats getFinalSketchStatsAtNumLevels( printf("%6s %8s %12s %18s %18s\n", "Level", "Items", "CumItems", "N at Level", "CumN"); } for (int level = 0; level < numLevels; level++) { - final LevelStats lvlStats = getLevelCapacityItems(k, m, numLevels, level); + final int items = KllHelper.levelCapacity(k, numLevels, level, m); + final long n = (long)items << level; + final LevelStats lvlStats = new LevelStats(n, numLevels, items); cumItems += lvlStats.numItems; cumN += lvlStats.n; if (printSketchStructure) { @@ -257,24 +277,6 @@ static int getKFromEpsilon(final double epsilon, final boolean pmf) { return max(KllSketch.MIN_M, min(KllSketch.MAX_K, k)); } - /** - * Given k, m, numLevels, this computes the item capacity of a single level. - * @param k the given user sketch configuration parameter - * @param m the given user sketch configuration parameter - * @param numLevels the given number of levels of the sketch - * @param level the specific level to compute its item capacity - * @return LevelStats with the computed N and items for the given level. - */ - static LevelStats getLevelCapacityItems( - final int k, - final int m, - final int numLevels, - final int level) { - final int items = KllHelper.levelCapacity(k, numLevels, level, m); - final long n = (long)items << level; - return new LevelStats(n, numLevels, items); - } - /** * Gets the normalized rank error given k and pmf. * Static method version of the getNormalizedRankError(boolean). @@ -696,7 +698,7 @@ static int findLevelToCompact(final int k, final int m, final int numLevels, fin * @param depth the zero-based index of the level being computed. * @return the actual capacity of a given level given its depth index. */ - private static long intCapAux(final int k, final int depth) { + static long intCapAux(final int k, final int depth) { if (depth <= 30) { return intCapAuxAux(k, depth); } final int half = depth / 2; final int rest = depth - half; @@ -710,7 +712,7 @@ private static long intCapAux(final int k, final int depth) { * @param depth the zero-based index of the level being computed. The max depth is 30! * @return the actual capacity of a given level given its depth index. */ - private static long intCapAuxAux(final long k, final int depth) { + static long intCapAuxAux(final long k, final int depth) { final long twok = k << 1; // for rounding at the end, pre-multiply by 2 here, divide by 2 during rounding. final long tmp = ((twok << depth) / powersOfThree[depth]); //2k* (2/3)^depth. 2k also keeps the fraction larger. final long result = ((tmp + 1L) >>> 1); // (tmp + 1)/2. If odd, round up. This guarantees an integer. diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 67f6bb98e..684cfd841 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -113,7 +113,7 @@ public abstract class KllSketch implements QuantilesAPI { final SketchType sketchType; final SketchStructure sketchStructure; boolean readOnly; - int[] levelsArr; //Always writable form + int[] levelsArr; //Always updatable form /** * Constructor for on-heap and off-heap. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index 21348407d..e8e5310f5 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -293,7 +293,7 @@ default double[] getRanks(double[] quantiles) { /** * Updates this sketch with the given item. - * @param item from a stream of quantiles. NaNs are ignored. + * @param item from a stream of items. NaNs are ignored. */ void update(double item); diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index 79f2b5e6e..59a845fc7 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -19,6 +19,7 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.common.Util.bitAt; import static org.apache.datasketches.kll.KllHelper.getGrowthSchemeForGivenN; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.testng.Assert.assertEquals; @@ -167,6 +168,7 @@ public void visualCheckToString() { assertEquals(sk2.getNumRetained(), 56); } + //Disable this test for releases @Test //set static enablePrinting = true for visual checking public void viewHeapCompactions() { int k = 20; @@ -193,8 +195,75 @@ public void viewHeapCompactions() { println(""); } + @Test //set static enablePrinting = true for visual checking + public void checkWeightedUpdates() { + int k = 20; + int n1 = 0; + int weight = 127; + double item = 10.0; + KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); + println(sk.toString(true, true)); + sk.weightedUpdate(item, weight); +// sk.weightedUpdate(item, n2); +// println(sk.toString(true, true)); +// assertEquals(sk.getNumRetained(), 8); +// assertEquals(sk.getN(), 216); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateItemsArray() { + double item = 10.0; + int weight = 108; + double[] itemsArr = KllDoublesHelper.createItemsArray(item, weight); + assertEquals(itemsArr.length, 4); + for (int i = 0; i < itemsArr.length; i++) { itemsArr[i] = item; } + outputItems(itemsArr); + } + + private static void outputItems(double[] itemsArr) { + String[] hdr2 = {"Index", "Value"}; + String hdr2fmt = "%6s %15s\n"; + String d2fmt = "%6d %15f\n"; + println("ItemsArr"); + printf(hdr2fmt, (Object[]) hdr2); + for (int i = 0; i < itemsArr.length; i++) { + printf(d2fmt, i, itemsArr[i]); + } + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateLevelsArray() { + int weight = 108; + int[] levelsArr = KllHelper.createLevelsArray(weight); + assertEquals(levelsArr.length, 8); + int[] correct = {0,0,0,1,2,2,3,4}; + for (int i = 0; i < levelsArr.length; i++) { + assertEquals(levelsArr[i], correct[i]); + } + outputLevels(weight, levelsArr); + } + + private static void outputLevels(int weight, int[] levelsArr) { + String[] hdr = {"Lvl", "StartAdr", "BitPattern", "Weight"}; + String hdrfmt = "%3s %9s %10s %s\n"; + String dfmt = "%3d %9d %10d %d\n"; + String dfmt_2 = "%3d %9d %s\n"; + println("Count = " + weight + " => " + (Integer.toBinaryString(weight))); + println("LevelsArr"); + printf(hdrfmt, (Object[]) hdr); + for (int i = 0; i < levelsArr.length; i++) { + if (i == levelsArr.length - 1) { printf(dfmt_2, i, levelsArr[i], "ItemsArr.length"); } + else { + int j = bitAt(weight, i); + printf(dfmt, i, levelsArr[i], j, 1 << (i)); + } + } + println(""); + } + @Test - public void viewCompactSketchData() { + public void viewMemorySketchData() { int k = 20; int n = 109; boolean withSummary = true; @@ -205,50 +274,55 @@ public void viewCompactSketchData() { Memory mem = Memory.wrap(byteArr); KllDoublesSketch ddSk = KllDoublesSketch.wrap(mem); println(ddSk.toString(withSummary, withData)); + assertEquals(ddSk.getN(), n); } - //@Test //set static enablePrinting = true for visual checking - // // must also make KllHelper.intCapAux(...) visible - // public void checkIntCapAux() { - // String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"}; - // String hdrFmt = "%6s %6s %28s %10s %10s %34s\n"; - // String dataFmt = "%6d %6d %,28d %,10d %,10d %,34.0f\n"; - // int k = 1000; - // int m = 8; - // int numLevels = 20; - // println("k=" + k + ", m=" + m + ", numLevels=" + numLevels); - // printf(hdrFmt, (Object[]) hdr); - // double maxN = 0; - // for (int i = 0; i < numLevels; i++) { - // int depth = numLevels - i - 1; - // long cap = KllHelper.intCapAux(k, depth); - // long end = Math.max(m, cap); - // long wt = 1L << i; - // maxN += (double)wt * (double)end; - // printf(dataFmt, i, depth, wt, cap, end, maxN); - // } - // } - - //@Test //set static enablePrinting = true for visual checking - // // must also make KllHelper.powersOfThree visible - // public void checkIntCapAuxAux() { - // String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"}; - // String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s\n"; - // String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d\n"; - // long k = (1L << 16) - 1L; - // long m = 8; - // println("k = " + k + ", m = " + m); - // printf(hdrFmt, (Object[]) hdr); - // for (int i = 0; i < 31; i++) { - // long twoK = k << 1; - // long twoKxtwoD = twoK << i; - // long threeToD = KllHelper.powersOfThree[i]; - // long tmp = twoKxtwoD / threeToD; - // long result = (tmp + 1L) >>> 1; - // long end = Math.max(m, result); //performed later - // printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end); - // } - // } + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAux() { + String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"}; + String hdrFmt = "%6s %6s %28s %10s %10s %34s\n"; + String dataFmt = "%6d %6d %,28d %,10d %,10d %,34.0f\n"; + int k = 1000; + int m = 8; + int numLevels = 20; + println("k=" + k + ", m=" + m + ", numLevels=" + numLevels); + printf(hdrFmt, (Object[]) hdr); + double maxN = 0; + double[] correct = {0,1,1,2,2,3,5,8,12,17,26,39,59,88,132,198,296,444,667,1000}; + for (int i = 0; i < numLevels; i++) { + int depth = numLevels - i - 1; + long cap = KllHelper.intCapAux(k, depth); + long end = Math.max(m, cap); + long wt = 1L << i; + maxN += (double)wt * (double)end; + printf(dataFmt, i, depth, wt, cap, end, maxN); + assertEquals(cap, correct[i]); + } + } + + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAuxAux() { + String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"}; + String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s\n"; + String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d\n"; + long k = (1L << 16) - 1L; + long m = 8; + println("k = " + k + ", m = " + m); + printf(hdrFmt, (Object[]) hdr); + long[] correct = + {65535,43690,29127,19418,12945,8630,5753,3836,2557,1705,1136,758,505,337,224,150,100,67,44,30,20,13,9,6,4,3,2,1,1,1,0}; + for (int i = 0; i < 31; i++) { + long twoK = k << 1; + long twoKxtwoD = twoK << i; + long threeToD = KllHelper.powersOfThree[i]; + long tmp = twoKxtwoD / threeToD; + long result = (tmp + 1L) >>> 1; + long end = Math.max(m, result); //performed later + printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end); + assertEquals(result,correct[i]); + assertEquals(result, KllHelper.intCapAuxAux(k, i)); + } + } @Test //set static enablePrinting = true for visual checking public void viewDirectCompactions() { @@ -278,10 +352,13 @@ public void viewCompactionAndSortedView() { DoublesSortedViewIterator itr = sv.iterator(); println("### SORTED VIEW"); printf("%12s%12s\n", "Value", "CumWeight"); + long[] correct = {2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; + int i = 0; while (itr.next()) { double v = itr.getQuantile(); long wt = itr.getWeight(); printf("%12.1f%12d\n", v, wt); + assertEquals(wt, correct[i++]); } } From a0e833e46ddc734990beb8bdb40264c60fefc243 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 24 Dec 2023 16:30:48 -0800 Subject: [PATCH 2/4] Fix test problem --- .../java/org/apache/datasketches/kll/KllDoublesSketch.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index d007535c9..d2f3dc38a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -276,8 +276,9 @@ public QuantilesDoublesSketchIterator iterator() { @Override public final void merge(final KllSketch other) { if (readOnly || sketchStructure != UPDATABLE) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } - if (other.isEmpty()) { return; } - KllDoublesHelper.mergeDoubleImpl(this, (KllDoublesSketch)other); + final KllDoublesSketch othDblSk = (KllDoublesSketch)other; //check cast first + if (othDblSk.isEmpty()) { return; } //then check empty + KllDoublesHelper.mergeDoubleImpl(this, othDblSk); kllDoublesSV = null; } From cd3af18a54c948908cd3e24917d62c272b914da3 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 3 Jan 2024 11:06:40 -0800 Subject: [PATCH 3/4] Add "integer" to weightedUpdate Change enablePrinting to false --- src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index d2f3dc38a..a23b8c3d1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -325,7 +325,7 @@ public void update(final double item) { } /** - * Updates this sketch with the given item the number of times specified by the given weight. + * Weighted update. Updates this sketch with the given item the number of times specified by the given integer weight. * @param item the item to be repeated. NaNs are ignored. * @param weight the number of times the update of item is to be repeated. It must be ≥ one. */ From 6c1503c3775d14ae553620cce955cab751580057 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 3 Jan 2024 11:20:43 -0800 Subject: [PATCH 4/4] Add asserts to checkWeightedUpdates(). --- .../apache/datasketches/kll/KllMiscDoublesTest.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index 59a845fc7..677dbd527 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -198,16 +198,18 @@ public void viewHeapCompactions() { @Test //set static enablePrinting = true for visual checking public void checkWeightedUpdates() { int k = 20; - int n1 = 0; int weight = 127; double item = 10.0; KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); println(sk.toString(true, true)); sk.weightedUpdate(item, weight); -// sk.weightedUpdate(item, n2); -// println(sk.toString(true, true)); -// assertEquals(sk.getNumRetained(), 8); -// assertEquals(sk.getN(), 216); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 7); + assertEquals(sk.getN(), weight); + sk.weightedUpdate(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 14); + assertEquals(sk.getN(), 254); } @Test //set static enablePrinting = true for visual checking