From fbe8076f1a59abe652df0c3e554421dc7e2fd000 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 8 Dec 2023 16:52:55 -0800 Subject: [PATCH 1/4] updating the KllMiscDoublesTest::viewHeapCompactions() --- .../apache/datasketches/kll/KllHelper.java | 17 +++++++++-------- .../datasketches/kll/KllMiscDoublesTest.java | 19 +++++++++++++------ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 6956ccb67..4127c9f1e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -817,20 +817,21 @@ private static long intCapAuxAux(final long k, final int depth) { return result; } + private final static boolean enablePrinting = true; + /** - * @param fmt format - * @param args arguments + * @param format the format + * @param args the args */ - private static void printf(final String fmt, final Object ... args) { - //System.out.printf(fmt, args); //Disable + private static final void printf(final String format, final Object ... args) { + if (enablePrinting) { System.out.printf(format, args); } } /** - * Println Object o - * @param o object to print + * @param o the Object to println */ - private static void println(final Object o) { - //System.out.println(o.toString()); //Disable + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } } } diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index fe2080884..b6b9801cf 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -19,6 +19,7 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.kll.KllHelper.getGrowthSchemeForGivenN; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -27,6 +28,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.kll.KllDirectDoublesSketch.KllDirectCompactDoublesSketch; +import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; @@ -164,23 +166,28 @@ public void visualCheckToString() { assertEquals(sk2.getNumRetained(), 56); } - @Test //set static enablePrinting = true for visual checking + @Test //set static enablePrinting = true for visual checking //HERE public void viewHeapCompactions() { int k = 20; - int n = 108; + int n = 21; int compaction = 0; KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); for (int i = 1; i <= n; i++) { - sk.update(i); + sk.update(1);//i if (sk.levelsArr[0] == 0) { println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); println(sk.toString(true, true)); - sk.update(++i); + if (i == n) { break; } + sk.update(1); ++i; //++i println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); println(sk.toString(true, true)); - assertEquals(sk.getDoubleItemsArray()[sk.levelsArr[0]], i); + //assertEquals(sk.getDoubleItemsArray()[sk.levelsArr[0]], i); } } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(true, true)); + println(""); + getGrowthSchemeForGivenN(k,8,n,SketchType.DOUBLES_SKETCH, true); } @Test //set static enablePrinting = true for visual checking @@ -592,7 +599,7 @@ public void printlnTest() { printf("%s\n", s); } - private final static boolean enablePrinting = false; + private final static boolean enablePrinting = true; /** * @param format the format From 677e1da812901664dad2823790c401d4203e0474 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 17 Dec 2023 11:26:35 -0800 Subject: [PATCH 2/4] By adding 3 small package-private methods I was able to eliminate a whole bunch of duplicate code in KLL. Plus some other code cleanup and javadoc cleanup. Changes to some test code to help with diagnostic understanding of the operation of the sketch. --- .../kll/KllDirectCompactItemsSketch.java | 20 ++ .../kll/KllDirectDoublesSketch.java | 20 +- .../kll/KllDirectFloatsSketch.java | 18 ++ .../datasketches/kll/KllDoublesHelper.java | 8 +- .../datasketches/kll/KllFloatsHelper.java | 8 +- .../kll/KllHeapDoublesSketch.java | 20 ++ .../datasketches/kll/KllHeapFloatsSketch.java | 20 ++ .../datasketches/kll/KllHeapItemsSketch.java | 20 ++ .../apache/datasketches/kll/KllHelper.java | 225 +++++------------- .../datasketches/kll/KllItemsHelper.java | 8 +- .../apache/datasketches/kll/KllSketch.java | 27 ++- .../quantilescommon/QuantilesDoublesAPI.java | 8 +- .../datasketches/kll/KllMiscDoublesTest.java | 65 ++++- 13 files changed, 274 insertions(+), 193 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java index b8d91fef5..443ff4ae1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java @@ -65,6 +65,14 @@ final class KllDirectCompactItemsSketch extends KllItemsSketch { levelsArr = memVal.levelsArr; //always converted to writable form. } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "Null"; } + return serDe.toString(getTotalItemsArray()[index]); + } + @Override public int getK() { return getMemoryK(mem); @@ -83,6 +91,12 @@ public T getMaxItem() { return serDe.deserializeFromMemory(mem, offset, 2)[1]; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(getMaxItem()); + } + @Override public T getMinItem() { if (sketchStructure == COMPACT_EMPTY || isEmpty()) { @@ -96,6 +110,12 @@ public T getMinItem() { return serDe.deserializeFromMemory(mem, offset, 1)[0]; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(getMinItem()); + } + @Override public long getN() { if (sketchStructure == COMPACT_EMPTY) { return 0; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 64e340d9e..21a46069c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -116,7 +116,13 @@ static KllDirectDoublesSketch newDirectUpdatableInstance( return new KllDirectDoublesSketch(UPDATABLE, wMem, memReqSvr, memVal); } - //END of Constructors + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Double.toString(getDoubleItemsArray()[index]); + } @Override public int getK() { @@ -137,6 +143,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getDouble(offset); } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(getMaxItem()); + } + @Override public double getMinItem() { int levelsArrBytes = 0; @@ -151,6 +163,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getDouble(offset); } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(getMinItem()); + } + @Override public long getN() { if (sketchStructure == COMPACT_EMPTY) { return 0; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index c86651477..542eda596 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -118,6 +118,12 @@ static KllDirectFloatsSketch newDirectUpdatableInstance( //END of Constructors + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Float.toString(getFloatItemsArray()[index]); + } + @Override public int getK() { return getMemoryK(wmem); @@ -137,6 +143,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getFloat(offset); } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(getMaxItem()); + } + @Override public float getMinItem() { int levelsArrBytes = 0; @@ -151,6 +163,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getFloat(offset); } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(getMinItem()); + } + @Override public long getN() { if (sketchStructure == COMPACT_EMPTY) { return 0; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 49bfe220a..9f5c3732d 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -309,12 +309,12 @@ static void updateDouble(final KllDoublesSketch dblSk, dblSk.setMinItem(min(dblSk.getMinItem(), item)); dblSk.setMaxItem(max(dblSk.getMaxItem(), item)); } - if (dblSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(dblSk); } - final int myLevelsArrAtZero = dblSk.levelsArr[0]; //LevelsArr could be expanded + final int level0space = dblSk.levelsArr[0]; + assert level0space >= 0; + if (level0space == 0) { compressWhileUpdatingSketch(dblSk); } dblSk.incN(); dblSk.setLevelZeroSorted(false); - final int nextPos = myLevelsArrAtZero - 1; - assert myLevelsArrAtZero >= 0; + final int nextPos = level0space - 1; dblSk.setLevelsArrayAt(0, nextPos); dblSk.setDoubleItemsArrayAt(nextPos, item); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 9f732e947..cc4e9aca7 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -309,12 +309,12 @@ static void updateFloat(final KllFloatsSketch fltSk, fltSk.setMinItem(min(fltSk.getMinItem(), item)); fltSk.setMaxItem(max(fltSk.getMaxItem(), item)); } - if (fltSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(fltSk); } - final int myLevelsArrAtZero = fltSk.levelsArr[0]; //LevelsArr could be expanded + final int level0space = fltSk.levelsArr[0]; + assert level0space >= 0; + if (level0space == 0) { compressWhileUpdatingSketch(fltSk); } fltSk.incN(); fltSk.setLevelZeroSorted(false); - final int nextPos = myLevelsArrAtZero - 1; - assert myLevelsArrAtZero >= 0; + final int nextPos = level0space - 1; fltSk.setLevelsArrayAt(0, nextPos); fltSk.setFloatItemsArrayAt(nextPos, item); } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java index 61d8243dc..df81a34c8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java @@ -141,6 +141,14 @@ static KllHeapDoublesSketch heapifyImpl(final Memory srcMem) { return new KllHeapDoublesSketch(srcMem, memVal); } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Double.toString(doubleItems[index]); + } + @Override public int getK() { return k; } @@ -150,12 +158,24 @@ public double getMaxItem() { return maxDoubleItem; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(maxDoubleItem); + } + @Override public double getMinItem() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } return minDoubleItem; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(minDoubleItem); + } + @Override public long getN() { return n; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java index 0af731286..472871854 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java @@ -141,6 +141,14 @@ static KllHeapFloatsSketch heapifyImpl(final Memory srcMem) { return new KllHeapFloatsSketch(srcMem, memVal); } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Double.toString(floatItems[index]); + } + @Override public int getK() { return k; } @@ -150,12 +158,24 @@ public float getMaxItem() { return maxFloatItem; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(maxFloatItem); + } + @Override public float getMinItem() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } return minFloatItem; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(minFloatItem); + } + @Override public long getN() { return n; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java index 4b3c6c7c2..3ed776f25 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java @@ -117,6 +117,14 @@ final class KllHeapItemsSketch extends KllItemsSketch { } } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "Null"; } + return serDe.toString((T)(itemsArr[index])); + } + @Override public int getK() { return k; @@ -128,12 +136,24 @@ public T getMaxItem() { return maxItem; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(maxItem); + } + @Override public T getMinItem() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } return minItem; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(minItem); + } + @Override public long getN() { return n; diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 4127c9f1e..a73c5caf1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -47,7 +47,6 @@ import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; import org.apache.datasketches.kll.KllSketch.SketchStructure; import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.WritableBuffer; @@ -58,8 +57,14 @@ * * @author Lee Rhodes */ -@SuppressWarnings("unchecked") final class KllHelper { + public static final String LS = System.getProperty("line.separator"); + static final double EPS_DELTA_THRESHOLD = 1E-6; + static final double MIN_EPS = 4.7634E-5; + static final double PMF_COEF = 2.446; + static final double PMF_EXP = 0.9433; + static final double CDF_COEF = 2.296; + static final double CDF_EXP = 0.9723; static class GrowthStats { SketchType sketchType; @@ -85,13 +90,6 @@ static class LevelStats { } } - static final double EPS_DELTA_THRESHOLD = 1E-6; - static final double MIN_EPS = 4.7634E-5; - static final double PMF_COEF = 2.446; - static final double PMF_EXP = 0.9433; - static final double CDF_COEF = 2.296; - static final double CDF_EXP = 0.9723; - /** * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index */ @@ -313,7 +311,7 @@ static int levelCapacity(final int k, final int numLevels, final int level, fina assert (k <= (1 << 29)); assert (numLevels >= 1) && (numLevels <= 61); assert (level >= 0) && (level < numLevels); - final int depth = numLevels - level - 1; + final int depth = numLevels - level - 1; //depth is # levels from the top level (= 0) return (int) Math.max(m, intCapAux(k, depth)); } @@ -359,108 +357,44 @@ static WritableMemory memorySpaceMgmt( return newWmem; } - private static String outputItemsData(final int numLevels, final int[] levelsArr, final Object[] itemsArr, - final ArrayOfItemsSerDe serDe) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (levelsArr[0] > 0) { - sb.append(" Empty/Garbage:" + Util.LS); - for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(serDe.toString((T)itemsArr[i])).append(Util.LS); - } - } - int level = 0; - while (level < numLevels) { - final int fromIndex = levelsArr[level]; - final int toIndex = levelsArr[level + 1]; // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(serDe.toString((T)itemsArr[i])).append(Util.LS); - } - level++; - } - sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - return sb.toString(); - - } - - private static String outputDoublesData(final int numLevels, final int[] levelsArr, final double[] doubleItemsArr) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (levelsArr[0] > 0) { - sb.append(" Empty/Garbage:" + Util.LS); - for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); - } - } - int level = 0; - while (level < numLevels) { - final int fromIndex = levelsArr[level]; - final int toIndex = levelsArr[level + 1]; // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); - } - level++; - } - sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - return sb.toString(); - } - - private static String outputFloatsData(final int numLevels, final int[] levelsArr, final float[] floatsItemsArr) { + private static String outputData(final KllSketch sketch) { + final int[] levelsArr = sketch.getLevelsArray(sketch.sketchStructure); + final int numLevels = sketch.getNumLevels(); + final int k = sketch.getK(); + final int m = sketch.getM(); final StringBuilder sb = new StringBuilder(); - sb.append("### KLL items data {index, item}:").append(Util.LS); + sb.append("### KllSketch itemsArray & levelsArray data:").append(LS); + sb.append("Index, Value").append(LS); if (levelsArr[0] > 0) { - sb.append(" Empty/Garbage:" + Util.LS); + final String gbg = " Empty or Garbage, size = " + levelsArr[0]; for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(floatsItemsArr[i]).append(Util.LS); + sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i)); + if (i == 0) { sb.append(gbg); } + sb.append(LS); } } int level = 0; while (level < numLevels) { final int fromIndex = levelsArr[level]; final int toIndex = levelsArr[level + 1]; // exclusive + String lvlData = ""; if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); + lvlData = " level[" + level + "]=" + levelsArr[level] + + ", cap=" + KllHelper.levelCapacity(k, numLevels, level, m) + + ", size=" + KllHelper.currentLevelSizeItems(level, numLevels, levelsArr) + + ", wt=" + (1 << level) + LS; } for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(floatsItemsArr[i]).append(Util.LS); + sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i)); + if (i == fromIndex) { sb.append(lvlData); } else { sb.append(LS); } } level++; } - sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - return sb.toString(); - } + sb.append(" ----------level[" + level + "]=" + levelsArr[level] + ": itemsArray[].length"); + sb.append(LS); + sb.append("### End data").append(LS); - static String outputLevels(final int k, final int m, final int numLevels, final int[] levelsArr) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL levels array:").append(Util.LS) - .append(" level, offset: nominal capacity, actual size").append(Util.LS); - int level = 0; - for ( ; level < numLevels; level++) { - sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": ") - .append(KllHelper.levelCapacity(k, numLevels, level, m)) - .append(", ").append(KllHelper.currentLevelSizeItems(level, numLevels, levelsArr)).append(Util.LS); - } - sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": (Exclusive)") - .append(Util.LS); - sb.append("### End levels array").append(Util.LS); return sb.toString(); } @@ -541,7 +475,7 @@ static byte[] toByteArray(final KllSketch srcSk, final boolean updatable) { return bytesOut; } - static String toStringImpl(final KllSketch sketch, final boolean withLevels, final boolean withData, + static String toStringImpl(final KllSketch sketch, final boolean withSummary, final boolean withData, final ArrayOfItemsSerDe serDe) { final SketchType sketchType = sketch.sketchType; final boolean hasMemory = sketch.hasMemory(); @@ -562,70 +496,36 @@ static String toStringImpl(final KllSketch sketch, final boolean withLevels, final String skTypeStr = sketchType.getName(); final String className = "Kll" + directStr + compactStr + skTypeStr; - sb.append(Util.LS).append("### ").append(className).append(" Summary:").append(Util.LS); - sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(sketch.getMinK()).append(Util.LS); - sb.append(" M : ").append(m).append(Util.LS); - sb.append(" N : ").append(n).append(Util.LS); - sb.append(" Epsilon : ").append(epsPct).append(Util.LS); - sb.append(" Epsilon PMF : ").append(epsPMFPct).append(Util.LS); - sb.append(" Empty : ").append(sketch.isEmpty()).append(Util.LS); - sb.append(" Estimation Mode : ").append(sketch.isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(numLevels).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(Util.LS); - sb.append(" Capacity Items : ").append(fullLevelsArr[numLevels]).append(Util.LS); - sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(Util.LS); - sb.append(" Empty/Garbage Items : ").append(sketch.levelsArr[0]).append(Util.LS); - sb.append(" ReadOnly : ").append(readOnlyStr).append(Util.LS); + sb.append(LS).append("### ").append(className).append(" Summary:").append(LS); + sb.append(" K : ").append(k).append(LS); + sb.append(" Dynamic min K : ").append(sketch.getMinK()).append(LS); + sb.append(" M : ").append(m).append(LS); + sb.append(" N : ").append(n).append(LS); + sb.append(" Epsilon : ").append(epsPct).append(LS); + sb.append(" Epsilon PMF : ").append(epsPMFPct).append(LS); + sb.append(" Empty : ").append(sketch.isEmpty()).append(LS); + sb.append(" Estimation Mode : ").append(sketch.isEstimationMode()).append(LS); + sb.append(" Levels : ").append(numLevels).append(LS); + sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(LS); + sb.append(" Capacity Items : ").append(fullLevelsArr[numLevels]).append(LS); + sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(LS); + sb.append(" Empty/Garbage Items : ").append(sketch.levelsArr[0]).append(LS); + sb.append(" ReadOnly : ").append(readOnlyStr).append(LS); if (sketchType != ITEMS_SKETCH) { - sb.append(" Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true)) - .append(Util.LS); + sb.append(" Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true)).append(LS); } - sb.append(" Compact Storage Bytes : ").append(sketch.currentSerializedSizeBytes(false)) - .append(Util.LS); + sb.append(" Compact Storage Bytes : ").append(sketch.currentSerializedSizeBytes(false)).append(LS); - if (sketchType == DOUBLES_SKETCH) { - final KllDoublesSketch dblSk = (KllDoublesSketch) sketch; - sb.append(" Min Item : ").append(dblSk.isEmpty() ? Double.NaN : dblSk.getMinItem()) - .append(Util.LS); - sb.append(" Max Item : ").append(dblSk.isEmpty() ? Double.NaN : dblSk.getMaxItem()) - .append(Util.LS); - } - else if (sketchType == FLOATS_SKETCH) { - final KllFloatsSketch fltSk = (KllFloatsSketch) sketch; - sb.append(" Min Item : ").append(fltSk.isEmpty() ? Float.NaN : fltSk.getMinItem()) - .append(Util.LS); - sb.append(" Max Item : ").append(fltSk.isEmpty() ? Float.NaN : fltSk.getMaxItem()) - .append(Util.LS); - } - else { //sketchType == ITEMS_SKETCH - final KllItemsSketch itmSk = (KllItemsSketch) sketch; - sb.append(" Min Item : ").append(itmSk.isEmpty() ? "null" : serDe.toString(itmSk.getMinItem())) - .append(Util.LS); - sb.append(" Max Item : ").append(itmSk.isEmpty() ? "null" : serDe.toString(itmSk.getMaxItem())) - .append(Util.LS); - } - sb.append("### End sketch summary").append(Util.LS); + final String emptyStr = (sketchType == ITEMS_SKETCH) ? "Null" : "NaN"; - if (withLevels) { - sb.append(outputLevels(k, m, numLevels, fullLevelsArr)); - } - if (withData) { - if (sketchType == DOUBLES_SKETCH) { - final KllDoublesSketch dblSk = (KllDoublesSketch) sketch; - final double[] myDoubleItemsArr = dblSk.getDoubleItemsArray(); - sb.append(outputDoublesData(numLevels, fullLevelsArr, myDoubleItemsArr)); - } else if (sketchType == FLOATS_SKETCH) { - final KllFloatsSketch fltSk = (KllFloatsSketch) sketch; - final float[] myFloatItemsArr = fltSk.getFloatItemsArray(); - sb.append(outputFloatsData(numLevels, fullLevelsArr, myFloatItemsArr)); - } - else { //sketchType == KllItemsSketch - final KllItemsSketch itmSk = (KllItemsSketch) sketch; - final T[] myItemsArr = itmSk.getTotalItemsArray(); - sb.append(outputItemsData(numLevels, fullLevelsArr, myItemsArr, serDe)); - } - } + sb.append(" Min Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMinItemAsString()) + .append(LS); + sb.append(" Max Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMaxItemAsString()) + .append(LS); + sb.append("### End sketch summary").append(LS); + + if (! withSummary) { sb.setLength(0); } + if (withData) { sb.append(outputData(sketch)); } return sb.toString(); } @@ -789,8 +689,9 @@ static int findLevelToCompact(final int k, final int m, final int numLevels, fin /** * Computes the actual item capacity of a given level given its depth index. * If the depth of levels exceeds 30, this uses a folding technique to accurately compute the - * actual level capacity up to a depth of 60. Without folding, the internal calculations would - * exceed the capacity of a long. + * actual level capacity up to a depth of 60 (or 61 levels). + * Without folding, the internal calculations would exceed the capacity of a long. + * This method just decides whether folding is required or not. * @param k the configured k of the sketch * @param depth the zero-based index of the level being computed. * @return the actual capacity of a given level given its depth index. @@ -806,13 +707,13 @@ private static long intCapAux(final int k, final int depth) { /** * Performs the integer based calculation of an individual level (or folded level). * @param k the configured k of the sketch - * @param depth depth the zero-based index of the level being computed. + * @param depth the zero-based index of the level being computed. The max depth is 30! * @return the actual capacity of a given level given its depth index. */ private static long intCapAuxAux(final long k, final int depth) { - final long twok = k << 1; // for rounding pre-multiply by 2 - final long tmp = ((twok << depth) / powersOfThree[depth]); - final long result = ((tmp + 1L) >>> 1); // add 1 and divide by 2 + final long twok = k << 1; // for rounding at the end, pre-multiply by 2 here, divide by 2 during rounding. + final long tmp = ((twok << depth) / powersOfThree[depth]); //2k* (2/3)^depth. 2k also keeps the fraction larger. + final long result = ((tmp + 1L) >>> 1); // (tmp + 1)/2. If odd, round up. This guarantees an integer. assert (result <= k); return result; } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java index 03bc931ac..a15960328 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java @@ -305,12 +305,12 @@ static void updateItem(final KllItemsSketch itmSk, itmSk.setMinItem(Util.minT(itmSk.getMinItem(), item, comp)); itmSk.setMaxItem(Util.maxT(itmSk.getMaxItem(), item, comp)); } - if (itmSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(itmSk); } - final int myLevelsArrAtZero = itmSk.levelsArr[0]; //LevelsArr could be expanded + final int level0space = itmSk.levelsArr[0]; + assert level0space >= 0; + if (level0space == 0) { compressWhileUpdatingSketch(itmSk); } itmSk.incN(); itmSk.setLevelZeroSorted(false); - final int nextPos = myLevelsArrAtZero - 1; - assert myLevelsArrAtZero >= 0; + final int nextPos = level0space - 1; itmSk.setLevelsArrayAt(0, nextPos); itmSk.setItemsArrayAt(nextPos, item); } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 874d64fa9..67f6bb98e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -130,6 +130,13 @@ public abstract class KllSketch implements QuantilesAPI { this.sketchStructure = sketchStructure; } + /** + * Gets the string value of the item at the given index. + * @param index the index of the value + * @return the string value of the item at the given index. + */ + abstract String getItemAsString(int index); + /** * Gets the approximate k to use given epsilon, the normalized rank error. * @param epsilon the normalized rank error between zero and one. @@ -159,6 +166,18 @@ public static int getMaxSerializedSizeBytes(final int k, final long n, return updatableMemFormat ? gStats.updatableBytes : gStats.compactBytes; } + /** + * Gets the string value of the max item + * @return the string value of the max item + */ + abstract String getMaxItemAsString(); + + /** + * Gets the string value of the min item + * @return the string value of the min item + */ + abstract String getMinItemAsString(); + /** * Gets the normalized rank error given k and pmf. * Static method version of the getNormalizedRankError(boolean). @@ -262,17 +281,17 @@ public final boolean isSameResource(final Memory that) { @Override public final String toString() { - return toString(false, false); + return toString(true, false); } /** * Returns a summary of the sketch as a string. - * @param withLevels if true include information about levels + * @param withSummary if true includes sketch summary information * @param withData if true include sketch data * @return string representation of sketch summary */ - public String toString(final boolean withLevels, final boolean withData) { - return KllHelper.toStringImpl(this, withLevels, withData, getSerDe()); + public String toString(final boolean withSummary, final boolean withData) { + return KllHelper.toStringImpl(this, withSummary, withData, getSerDe()); } //restricted diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index 31a5bedf9..02aab3c6a 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -75,8 +75,8 @@ default double[] getCDF(double[] splitPoints) { double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit); /** - * Returns the maximum item of the stream. This is provided for convenience, but may be different from the largest - * item retained by the sketch algorithm. + * Returns the maximum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(1.0). * * @return the maximum item of the stream * @throws IllegalArgumentException if sketch is empty. @@ -84,8 +84,8 @@ default double[] getCDF(double[] splitPoints) { double getMaxItem(); /** - * Returns the minimum item of the stream. This is provided for convenience, but is distinct from the smallest - * item retained by the sketch algorithm. + * Returns the minimum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(0.0). * * @return the minimum item of the stream * @throws IllegalArgumentException if sketch is empty. diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index b6b9801cf..aeeb31fd5 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -40,6 +40,7 @@ /** * @author Lee Rhodes */ +@SuppressWarnings("unused") public class KllMiscDoublesTest { static final String LS = System.getProperty("line.separator"); private final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); @@ -166,30 +167,74 @@ public void visualCheckToString() { assertEquals(sk2.getNumRetained(), 56); } - @Test //set static enablePrinting = true for visual checking //HERE + @Test //set static enablePrinting = true for visual checking public void viewHeapCompactions() { int k = 20; - int n = 21; + int n = 108; + boolean withSummary = false; + boolean withData = true; int compaction = 0; KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); for (int i = 1; i <= n; i++) { - sk.update(1);//i + sk.update(i); if (sk.levelsArr[0] == 0) { println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); - println(sk.toString(true, true)); - if (i == n) { break; } - sk.update(1); ++i; //++i + println(sk.toString(withSummary, withData)); + sk.update(++i); println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); - println(sk.toString(true, true)); - //assertEquals(sk.getDoubleItemsArray()[sk.levelsArr[0]], i); + println(sk.toString(withSummary, withData)); + assertEquals(sk.getDoubleItemsArray()[sk.levelsArr[0]], i); } } println(LS + "#<<< END STATE # >>>"); - println(sk.toString(true, true)); + println(sk.toString(withSummary, withData)); println(""); - getGrowthSchemeForGivenN(k,8,n,SketchType.DOUBLES_SKETCH, true); + //getGrowthSchemeForGivenN(k,8,n,SketchType.DOUBLES_SKETCH, true); } + //@Test //set static enablePrinting = true for visual checking + // // must also make KllHelper.intCapAux(...) visible + // public void checkIntCapAux() { + // String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"}; + // String hdrFmt = "%6s %6s %28s %10s %10s %34s\n"; + // String dataFmt = "%6d %6d %,28d %,10d %,10d %,34.0f\n"; + // int k = 1000; + // int m = 8; + // int numLevels = 20; + // println("k=" + k + ", m=" + m + ", numLevels=" + numLevels); + // printf(hdrFmt, (Object[]) hdr); + // double maxN = 0; + // for (int i = 0; i < numLevels; i++) { + // int depth = numLevels - i - 1; + // long cap = KllHelper.intCapAux(k, depth); + // long end = Math.max(m, cap); + // long wt = 1L << i; + // maxN += (double)wt * (double)end; + // printf(dataFmt, i, depth, wt, cap, end, maxN); + // } + // } + + //@Test //set static enablePrinting = true for visual checking + // // must also make KllHelper.powersOfThree visible + // public void checkIntCapAuxAux() { + // String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"}; + // String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s\n"; + // String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d\n"; + // long k = (1L << 16) - 1L; + // long m = 8; + // println("k = " + k + ", m = " + m); + // printf(hdrFmt, (Object[]) hdr); + // for (int i = 0; i < 31; i++) { + // long twoK = k << 1; + // long twoKxtwoD = twoK << i; + // long threeToD = KllHelper.powersOfThree[i]; + // long tmp = twoKxtwoD / threeToD; + // long result = (tmp + 1L) >>> 1; + // long end = Math.max(m, result); //performed later + // printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end); + // } + // } + @Test //set static enablePrinting = true for visual checking public void viewDirectCompactions() { int k = 20; From 6b3776e2c00bb231e28b73288ea0a9810ada9437 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 17 Dec 2023 18:21:29 -0800 Subject: [PATCH 3/4] Fixed a printing bug. Fixed some SpotBugs issues. --- .../datasketches/kll/KllDoublesHelper.java | 10 ++++-- .../datasketches/kll/KllDoublesSketch.java | 11 ++++++ .../datasketches/kll/KllFloatsHelper.java | 8 +++-- .../datasketches/kll/KllFloatsSketch.java | 11 ++++++ .../apache/datasketches/kll/KllHelper.java | 2 +- .../datasketches/kll/KllItemsHelper.java | 8 +++-- .../datasketches/kll/KllItemsSketch.java | 11 ++++++ .../datasketches/partitions/Partitioner.java | 3 +- .../GenericPartitionBoundaries.java | 2 +- .../quantilescommon/QuantilesDoublesAPI.java | 35 ------------------- .../quantilescommon/QuantilesFloatsAPI.java | 35 ------------------- .../datasketches/kll/KllMiscDoublesTest.java | 19 ++++++++-- tools/FindBugsExcludeFilter.xml | 30 ++++++++++++++++ 13 files changed, 103 insertions(+), 82 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 9f5c3732d..a3effa917 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -309,9 +309,13 @@ static void updateDouble(final KllDoublesSketch dblSk, dblSk.setMinItem(min(dblSk.getMinItem(), item)); dblSk.setMaxItem(max(dblSk.getMaxItem(), item)); } - final int level0space = dblSk.levelsArr[0]; - assert level0space >= 0; - if (level0space == 0) { compressWhileUpdatingSketch(dblSk); } + int level0space = dblSk.levelsArr[0]; + assert (level0space >= 0); + if (level0space == 0) { + compressWhileUpdatingSketch(dblSk); + level0space = dblSk.levelsArr[0]; + assert (level0space > 0); + } dblSk.incN(); dblSk.setLevelZeroSorted(false); final int nextPos = level0space - 1; diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 7c175512a..f8cd538e6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -306,6 +306,17 @@ public byte[] toByteArray() { return KllHelper.toByteArray(this, false); } + @Override + public String toString(final boolean withSummary, final boolean withData) { + KllSketch sketch = this; + if (withData && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllDoublesSketch.heapify(getWritableMemory()); + } + return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe()); + } + @Override public void update(final double item) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index cc4e9aca7..d2a604a08 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -309,9 +309,13 @@ static void updateFloat(final KllFloatsSketch fltSk, fltSk.setMinItem(min(fltSk.getMinItem(), item)); fltSk.setMaxItem(max(fltSk.getMaxItem(), item)); } - final int level0space = fltSk.levelsArr[0]; + int level0space = fltSk.levelsArr[0]; assert level0space >= 0; - if (level0space == 0) { compressWhileUpdatingSketch(fltSk); } + if (level0space == 0) { + compressWhileUpdatingSketch(fltSk); + level0space = fltSk.levelsArr[0]; + assert (level0space > 0); + } fltSk.incN(); fltSk.setLevelZeroSorted(false); final int nextPos = level0space - 1; diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 5484e8bf1..613ecaf7c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -306,6 +306,17 @@ public byte[] toByteArray() { return KllHelper.toByteArray(this, false); } + @Override + public String toString(final boolean withSummary, final boolean withData) { + KllSketch sketch = this; + if (withData && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllFloatsSketch.heapify(getWritableMemory()); + } + return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe()); + } + @Override public void update(final float item) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index a73c5caf1..5163854a4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -358,7 +358,7 @@ static WritableMemory memorySpaceMgmt( } private static String outputData(final KllSketch sketch) { - final int[] levelsArr = sketch.getLevelsArray(sketch.sketchStructure); + final int[] levelsArr = sketch.getLevelsArray(SketchStructure.UPDATABLE); final int numLevels = sketch.getNumLevels(); final int k = sketch.getK(); final int m = sketch.getM(); diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java index a15960328..502d43278 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java @@ -305,9 +305,13 @@ static void updateItem(final KllItemsSketch itmSk, itmSk.setMinItem(Util.minT(itmSk.getMinItem(), item, comp)); itmSk.setMaxItem(Util.maxT(itmSk.getMaxItem(), item, comp)); } - final int level0space = itmSk.levelsArr[0]; + int level0space = itmSk.levelsArr[0]; assert level0space >= 0; - if (level0space == 0) { compressWhileUpdatingSketch(itmSk); } + if (level0space == 0) { + compressWhileUpdatingSketch(itmSk); + level0space = itmSk.levelsArr[0]; + assert (level0space > 0); + } itmSk.incN(); itmSk.setLevelZeroSorted(false); final int nextPos = level0space - 1; diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java index f0e923fbd..589c1fa30 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java @@ -275,6 +275,17 @@ public byte[] toByteArray() { return KllHelper.toByteArray(this, false); } + @Override + public String toString(final boolean withSummary, final boolean withData) { + KllSketch sketch = this; + if (withData && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllItemsSketch.heapify((Memory)getWritableMemory(), comparator, serDe); + } + return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe()); + } + @Override public void update(final T item) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } diff --git a/src/main/java/org/apache/datasketches/partitions/Partitioner.java b/src/main/java/org/apache/datasketches/partitions/Partitioner.java index be256e479..9bc3eeec5 100644 --- a/src/main/java/org/apache/datasketches/partitions/Partitioner.java +++ b/src/main/java/org/apache/datasketches/partitions/Partitioner.java @@ -25,6 +25,7 @@ import static java.lang.Math.min; import static java.lang.Math.pow; import static java.lang.Math.round; +import static java.util.Collections.unmodifiableList; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; @@ -116,7 +117,7 @@ public List> partition(final S sk) { final StackElement se = new StackElement<>(gpb, 0, "1"); stack.push(se); partitionSearch(stack); - return finalPartitionList; + return unmodifiableList(finalPartitionList); } private void partitionSearch(final ArrayDeque> stack) { diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java index 4db851460..5c0098a5e 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java @@ -48,7 +48,7 @@ public GenericPartitionBoundaries( final T minItem, final QuantileSearchCriteria searchCrit) { this.totalN = totalN; - this.boundaries = boundaries; + this.boundaries = boundaries; //SpotBugs copy this.natRanks = natRanks; this.normRanks = normRanks; this.maxItem = maxItem; diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index 02aab3c6a..21348407d 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -297,39 +297,4 @@ default double[] getRanks(double[] quantiles) { */ void update(double item); - /** - * This encapsulates the essential information needed to construct actual partitions and is returned from the - * getPartitionBoundaries(int, QuantileSearchCritera) method. - */ - static class DoublesPartitionBoundaries { - - /** - * The total number of items presented to the sketch. - * - *

To compute the weight or density of a specific - * partition i where i varies from 1 to m partitions: - *

{@code
-     * long N = getN();
-     * double[] ranks = getRanks();
-     * long weight = Math.round((ranks[i] - ranks[i - 1]) * N);
-     * }
- */ - public long N; - - /** - * The normalized ranks that correspond to the returned boundaries. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always 0.0, and index m is always 1.0. - */ - public double[] ranks; - - /** - * The partition boundaries as quantiles. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index m is always - * {@link #getMaxItem() getMaxItem()}. - */ - public double[] boundaries; - } } - diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 2fcbdd99f..986780444 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -296,40 +296,5 @@ default double[] getRanks(float[] quantiles) { */ void update(float item); - /** - * This encapsulates the essential information needed to construct actual partitions and is returned from the - * getPartitionBoundaries(int, QuantileSearchCritera) method. - */ - - static class FloatsPartitionBoundaries { - - /** - * The total number of items presented to the sketch. - * - *

To compute the weight or density of a specific - * partition i where i varies from 1 to m partitions: - *

{@code
-     * long N = getN();
-     * double[] ranks = getRanks();
-     * long weight = Math.round((ranks[i] - ranks[i - 1]) * N);
-     * }
- */ - public long N; - - /** - * The normalized ranks that correspond to the returned boundaries. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always 0.0, and index m is always 1.0. - */ - public double[] ranks; - - /** - * The partition boundaries as quantiles. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index m is always - * {@link #getMaxItem() getMaxItem()}. - */ - public float[] boundaries; - } } diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index aeeb31fd5..76137f268 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -174,7 +174,9 @@ public void viewHeapCompactions() { boolean withSummary = false; boolean withData = true; int compaction = 0; - KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); + WritableMemory wmem = WritableMemory.allocate(1 << 20); + MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + KllDoublesSketch sk = KllDoublesSketch.newDirectInstance(k, wmem, memReqSvr); for (int i = 1; i <= n; i++) { sk.update(i); if (sk.levelsArr[0] == 0) { @@ -189,7 +191,20 @@ public void viewHeapCompactions() { println(LS + "#<<< END STATE # >>>"); println(sk.toString(withSummary, withData)); println(""); - //getGrowthSchemeForGivenN(k,8,n,SketchType.DOUBLES_SKETCH, true); + } + + @Test + public void viewCompactSketchData() { + int k = 20; + int n = 109; + boolean withSummary = true; + boolean withData = true; + KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toByteArray(); + Memory mem = Memory.wrap(byteArr); + KllDoublesSketch ddSk = KllDoublesSketch.wrap(mem); + println(ddSk.toString(withSummary, withData)); } //@Test //set static enablePrinting = true for visual checking diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml index 6ce8f010c..62cf08f0b 100644 --- a/tools/FindBugsExcludeFilter.xml +++ b/tools/FindBugsExcludeFilter.xml @@ -1,4 +1,5 @@ + @@ -44,8 +49,33 @@ under the License. + + + + + + + + + + + + + + + + + + + + + + + + + From ee24ca464c5c7efcba7e6baf89a80c0cbb11db11 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sun, 17 Dec 2023 21:55:04 -0800 Subject: [PATCH 4/4] Fixed two test methods that were accidentally printing. --- src/main/java/org/apache/datasketches/kll/KllHelper.java | 2 +- .../java/org/apache/datasketches/kll/KllMiscDoublesTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 5163854a4..da20654d6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -718,7 +718,7 @@ private static long intCapAuxAux(final long k, final int depth) { return result; } - private final static boolean enablePrinting = true; + private final static boolean enablePrinting = false; /** * @param format the format diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index 76137f268..79f2b5e6e 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -659,7 +659,7 @@ public void printlnTest() { printf("%s\n", s); } - private final static boolean enablePrinting = true; + private final static boolean enablePrinting = false; /** * @param format the format