diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java index b8d91fef5..443ff4ae1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java @@ -65,6 +65,14 @@ final class KllDirectCompactItemsSketch extends KllItemsSketch { levelsArr = memVal.levelsArr; //always converted to writable form. } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "Null"; } + return serDe.toString(getTotalItemsArray()[index]); + } + @Override public int getK() { return getMemoryK(mem); @@ -83,6 +91,12 @@ public T getMaxItem() { return serDe.deserializeFromMemory(mem, offset, 2)[1]; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(getMaxItem()); + } + @Override public T getMinItem() { if (sketchStructure == COMPACT_EMPTY || isEmpty()) { @@ -96,6 +110,12 @@ public T getMinItem() { return serDe.deserializeFromMemory(mem, offset, 1)[0]; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(getMinItem()); + } + @Override public long getN() { if (sketchStructure == COMPACT_EMPTY) { return 0; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java index 64e340d9e..21a46069c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java @@ -116,7 +116,13 @@ static KllDirectDoublesSketch newDirectUpdatableInstance( return new KllDirectDoublesSketch(UPDATABLE, wMem, memReqSvr, memVal); } - //END of Constructors + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Double.toString(getDoubleItemsArray()[index]); + } @Override public int getK() { @@ -137,6 +143,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getDouble(offset); } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(getMaxItem()); + } + @Override public double getMinItem() { int levelsArrBytes = 0; @@ -151,6 +163,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getDouble(offset); } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(getMinItem()); + } + @Override public long getN() { if (sketchStructure == COMPACT_EMPTY) { return 0; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java index c86651477..542eda596 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java @@ -118,6 +118,12 @@ static KllDirectFloatsSketch newDirectUpdatableInstance( //END of Constructors + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Float.toString(getFloatItemsArray()[index]); + } + @Override public int getK() { return getMemoryK(wmem); @@ -137,6 +143,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getFloat(offset); } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(getMaxItem()); + } + @Override public float getMinItem() { int levelsArrBytes = 0; @@ -151,6 +163,12 @@ else if (sketchStructure == COMPACT_FULL) { return wmem.getFloat(offset); } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(getMinItem()); + } + @Override public long getN() { if (sketchStructure == COMPACT_EMPTY) { return 0; } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 49bfe220a..a3effa917 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -309,12 +309,16 @@ static void updateDouble(final KllDoublesSketch dblSk, dblSk.setMinItem(min(dblSk.getMinItem(), item)); dblSk.setMaxItem(max(dblSk.getMaxItem(), item)); } - if (dblSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(dblSk); } - final int myLevelsArrAtZero = dblSk.levelsArr[0]; //LevelsArr could be expanded + int level0space = dblSk.levelsArr[0]; + assert (level0space >= 0); + if (level0space == 0) { + compressWhileUpdatingSketch(dblSk); + level0space = dblSk.levelsArr[0]; + assert (level0space > 0); + } dblSk.incN(); dblSk.setLevelZeroSorted(false); - final int nextPos = myLevelsArrAtZero - 1; - assert myLevelsArrAtZero >= 0; + final int nextPos = level0space - 1; dblSk.setLevelsArrayAt(0, nextPos); dblSk.setDoubleItemsArrayAt(nextPos, item); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 7c175512a..f8cd538e6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -306,6 +306,17 @@ public byte[] toByteArray() { return KllHelper.toByteArray(this, false); } + @Override + public String toString(final boolean withSummary, final boolean withData) { + KllSketch sketch = this; + if (withData && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllDoublesSketch.heapify(getWritableMemory()); + } + return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe()); + } + @Override public void update(final double item) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 9f732e947..d2a604a08 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -309,12 +309,16 @@ static void updateFloat(final KllFloatsSketch fltSk, fltSk.setMinItem(min(fltSk.getMinItem(), item)); fltSk.setMaxItem(max(fltSk.getMaxItem(), item)); } - if (fltSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(fltSk); } - final int myLevelsArrAtZero = fltSk.levelsArr[0]; //LevelsArr could be expanded + int level0space = fltSk.levelsArr[0]; + assert level0space >= 0; + if (level0space == 0) { + compressWhileUpdatingSketch(fltSk); + level0space = fltSk.levelsArr[0]; + assert (level0space > 0); + } fltSk.incN(); fltSk.setLevelZeroSorted(false); - final int nextPos = myLevelsArrAtZero - 1; - assert myLevelsArrAtZero >= 0; + final int nextPos = level0space - 1; fltSk.setLevelsArrayAt(0, nextPos); fltSk.setFloatItemsArrayAt(nextPos, item); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index 5484e8bf1..613ecaf7c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -306,6 +306,17 @@ public byte[] toByteArray() { return KllHelper.toByteArray(this, false); } + @Override + public String toString(final boolean withSummary, final boolean withData) { + KllSketch sketch = this; + if (withData && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllFloatsSketch.heapify(getWritableMemory()); + } + return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe()); + } + @Override public void update(final float item) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java index 61d8243dc..df81a34c8 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java @@ -141,6 +141,14 @@ static KllHeapDoublesSketch heapifyImpl(final Memory srcMem) { return new KllHeapDoublesSketch(srcMem, memVal); } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Double.toString(doubleItems[index]); + } + @Override public int getK() { return k; } @@ -150,12 +158,24 @@ public double getMaxItem() { return maxDoubleItem; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(maxDoubleItem); + } + @Override public double getMinItem() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } return minDoubleItem; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Double.toString(minDoubleItem); + } + @Override public long getN() { return n; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java index 0af731286..472871854 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java @@ -141,6 +141,14 @@ static KllHeapFloatsSketch heapifyImpl(final Memory srcMem) { return new KllHeapFloatsSketch(srcMem, memVal); } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "NaN"; } + return Double.toString(floatItems[index]); + } + @Override public int getK() { return k; } @@ -150,12 +158,24 @@ public float getMaxItem() { return maxFloatItem; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(maxFloatItem); + } + @Override public float getMinItem() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } return minFloatItem; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "NaN"; } + return Float.toString(minFloatItem); + } + @Override public long getN() { return n; } diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java index 4b3c6c7c2..3ed776f25 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java @@ -117,6 +117,14 @@ final class KllHeapItemsSketch extends KllItemsSketch { } } + //End of constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "Null"; } + return serDe.toString((T)(itemsArr[index])); + } + @Override public int getK() { return k; @@ -128,12 +136,24 @@ public T getMaxItem() { return maxItem; } + @Override + String getMaxItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(maxItem); + } + @Override public T getMinItem() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } return minItem; } + @Override + String getMinItemAsString() { + if (isEmpty()) { return "Null"; } + return serDe.toString(minItem); + } + @Override public long getN() { return n; diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java index 6956ccb67..da20654d6 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java @@ -47,7 +47,6 @@ import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; import org.apache.datasketches.kll.KllSketch.SketchStructure; import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.WritableBuffer; @@ -58,8 +57,14 @@ * * @author Lee Rhodes */ -@SuppressWarnings("unchecked") final class KllHelper { + public static final String LS = System.getProperty("line.separator"); + static final double EPS_DELTA_THRESHOLD = 1E-6; + static final double MIN_EPS = 4.7634E-5; + static final double PMF_COEF = 2.446; + static final double PMF_EXP = 0.9433; + static final double CDF_COEF = 2.296; + static final double CDF_EXP = 0.9723; static class GrowthStats { SketchType sketchType; @@ -85,13 +90,6 @@ static class LevelStats { } } - static final double EPS_DELTA_THRESHOLD = 1E-6; - static final double MIN_EPS = 4.7634E-5; - static final double PMF_COEF = 2.446; - static final double PMF_EXP = 0.9433; - static final double CDF_COEF = 2.296; - static final double CDF_EXP = 0.9723; - /** * This is the exact powers of 3 from 3^0 to 3^30 where the exponent is the index */ @@ -313,7 +311,7 @@ static int levelCapacity(final int k, final int numLevels, final int level, fina assert (k <= (1 << 29)); assert (numLevels >= 1) && (numLevels <= 61); assert (level >= 0) && (level < numLevels); - final int depth = numLevels - level - 1; + final int depth = numLevels - level - 1; //depth is # levels from the top level (= 0) return (int) Math.max(m, intCapAux(k, depth)); } @@ -359,108 +357,44 @@ static WritableMemory memorySpaceMgmt( return newWmem; } - private static String outputItemsData(final int numLevels, final int[] levelsArr, final Object[] itemsArr, - final ArrayOfItemsSerDe serDe) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (levelsArr[0] > 0) { - sb.append(" Empty/Garbage:" + Util.LS); - for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(serDe.toString((T)itemsArr[i])).append(Util.LS); - } - } - int level = 0; - while (level < numLevels) { - final int fromIndex = levelsArr[level]; - final int toIndex = levelsArr[level + 1]; // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(serDe.toString((T)itemsArr[i])).append(Util.LS); - } - level++; - } - sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - return sb.toString(); - - } - - private static String outputDoublesData(final int numLevels, final int[] levelsArr, final double[] doubleItemsArr) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL items data {index, item}:").append(Util.LS); - if (levelsArr[0] > 0) { - sb.append(" Empty/Garbage:" + Util.LS); - for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); - } - } - int level = 0; - while (level < numLevels) { - final int fromIndex = levelsArr[level]; - final int toIndex = levelsArr[level + 1]; // exclusive - if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); - } - - for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(doubleItemsArr[i]).append(Util.LS); - } - level++; - } - sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - return sb.toString(); - } - - private static String outputFloatsData(final int numLevels, final int[] levelsArr, final float[] floatsItemsArr) { + private static String outputData(final KllSketch sketch) { + final int[] levelsArr = sketch.getLevelsArray(SketchStructure.UPDATABLE); + final int numLevels = sketch.getNumLevels(); + final int k = sketch.getK(); + final int m = sketch.getM(); final StringBuilder sb = new StringBuilder(); - sb.append("### KLL items data {index, item}:").append(Util.LS); + sb.append("### KllSketch itemsArray & levelsArray data:").append(LS); + sb.append("Index, Value").append(LS); if (levelsArr[0] > 0) { - sb.append(" Empty/Garbage:" + Util.LS); + final String gbg = " Empty or Garbage, size = " + levelsArr[0]; for (int i = 0; i < levelsArr[0]; i++) { - sb.append(" ").append(i + ", ").append(floatsItemsArr[i]).append(Util.LS); + sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i)); + if (i == 0) { sb.append(gbg); } + sb.append(LS); } } int level = 0; while (level < numLevels) { final int fromIndex = levelsArr[level]; final int toIndex = levelsArr[level + 1]; // exclusive + String lvlData = ""; if (fromIndex < toIndex) { - sb.append(" level[").append(level).append("]: offset: " + levelsArr[level] + " wt: " + (1 << level)); - sb.append(Util.LS); + lvlData = " level[" + level + "]=" + levelsArr[level] + + ", cap=" + KllHelper.levelCapacity(k, numLevels, level, m) + + ", size=" + KllHelper.currentLevelSizeItems(level, numLevels, levelsArr) + + ", wt=" + (1 << level) + LS; } for (int i = fromIndex; i < toIndex; i++) { - sb.append(" ").append(i + ", ").append(floatsItemsArr[i]).append(Util.LS); + sb.append(" ").append(i + ", ").append(sketch.getItemAsString(i)); + if (i == fromIndex) { sb.append(lvlData); } else { sb.append(LS); } } level++; } - sb.append(" level[" + level + "]: offset: " + levelsArr[level] + " (Exclusive)"); - sb.append(Util.LS); - sb.append("### End items data").append(Util.LS); - return sb.toString(); - } + sb.append(" ----------level[" + level + "]=" + levelsArr[level] + ": itemsArray[].length"); + sb.append(LS); + sb.append("### End data").append(LS); - static String outputLevels(final int k, final int m, final int numLevels, final int[] levelsArr) { - final StringBuilder sb = new StringBuilder(); - sb.append("### KLL levels array:").append(Util.LS) - .append(" level, offset: nominal capacity, actual size").append(Util.LS); - int level = 0; - for ( ; level < numLevels; level++) { - sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": ") - .append(KllHelper.levelCapacity(k, numLevels, level, m)) - .append(", ").append(KllHelper.currentLevelSizeItems(level, numLevels, levelsArr)).append(Util.LS); - } - sb.append(" ").append(level).append(", ").append(levelsArr[level]).append(": (Exclusive)") - .append(Util.LS); - sb.append("### End levels array").append(Util.LS); return sb.toString(); } @@ -541,7 +475,7 @@ static byte[] toByteArray(final KllSketch srcSk, final boolean updatable) { return bytesOut; } - static String toStringImpl(final KllSketch sketch, final boolean withLevels, final boolean withData, + static String toStringImpl(final KllSketch sketch, final boolean withSummary, final boolean withData, final ArrayOfItemsSerDe serDe) { final SketchType sketchType = sketch.sketchType; final boolean hasMemory = sketch.hasMemory(); @@ -562,70 +496,36 @@ static String toStringImpl(final KllSketch sketch, final boolean withLevels, final String skTypeStr = sketchType.getName(); final String className = "Kll" + directStr + compactStr + skTypeStr; - sb.append(Util.LS).append("### ").append(className).append(" Summary:").append(Util.LS); - sb.append(" K : ").append(k).append(Util.LS); - sb.append(" Dynamic min K : ").append(sketch.getMinK()).append(Util.LS); - sb.append(" M : ").append(m).append(Util.LS); - sb.append(" N : ").append(n).append(Util.LS); - sb.append(" Epsilon : ").append(epsPct).append(Util.LS); - sb.append(" Epsilon PMF : ").append(epsPMFPct).append(Util.LS); - sb.append(" Empty : ").append(sketch.isEmpty()).append(Util.LS); - sb.append(" Estimation Mode : ").append(sketch.isEstimationMode()).append(Util.LS); - sb.append(" Levels : ").append(numLevels).append(Util.LS); - sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(Util.LS); - sb.append(" Capacity Items : ").append(fullLevelsArr[numLevels]).append(Util.LS); - sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(Util.LS); - sb.append(" Empty/Garbage Items : ").append(sketch.levelsArr[0]).append(Util.LS); - sb.append(" ReadOnly : ").append(readOnlyStr).append(Util.LS); + sb.append(LS).append("### ").append(className).append(" Summary:").append(LS); + sb.append(" K : ").append(k).append(LS); + sb.append(" Dynamic min K : ").append(sketch.getMinK()).append(LS); + sb.append(" M : ").append(m).append(LS); + sb.append(" N : ").append(n).append(LS); + sb.append(" Epsilon : ").append(epsPct).append(LS); + sb.append(" Epsilon PMF : ").append(epsPMFPct).append(LS); + sb.append(" Empty : ").append(sketch.isEmpty()).append(LS); + sb.append(" Estimation Mode : ").append(sketch.isEstimationMode()).append(LS); + sb.append(" Levels : ").append(numLevels).append(LS); + sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(LS); + sb.append(" Capacity Items : ").append(fullLevelsArr[numLevels]).append(LS); + sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(LS); + sb.append(" Empty/Garbage Items : ").append(sketch.levelsArr[0]).append(LS); + sb.append(" ReadOnly : ").append(readOnlyStr).append(LS); if (sketchType != ITEMS_SKETCH) { - sb.append(" Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true)) - .append(Util.LS); + sb.append(" Updatable Storage Bytes: ").append(sketch.currentSerializedSizeBytes(true)).append(LS); } - sb.append(" Compact Storage Bytes : ").append(sketch.currentSerializedSizeBytes(false)) - .append(Util.LS); + sb.append(" Compact Storage Bytes : ").append(sketch.currentSerializedSizeBytes(false)).append(LS); - if (sketchType == DOUBLES_SKETCH) { - final KllDoublesSketch dblSk = (KllDoublesSketch) sketch; - sb.append(" Min Item : ").append(dblSk.isEmpty() ? Double.NaN : dblSk.getMinItem()) - .append(Util.LS); - sb.append(" Max Item : ").append(dblSk.isEmpty() ? Double.NaN : dblSk.getMaxItem()) - .append(Util.LS); - } - else if (sketchType == FLOATS_SKETCH) { - final KllFloatsSketch fltSk = (KllFloatsSketch) sketch; - sb.append(" Min Item : ").append(fltSk.isEmpty() ? Float.NaN : fltSk.getMinItem()) - .append(Util.LS); - sb.append(" Max Item : ").append(fltSk.isEmpty() ? Float.NaN : fltSk.getMaxItem()) - .append(Util.LS); - } - else { //sketchType == ITEMS_SKETCH - final KllItemsSketch itmSk = (KllItemsSketch) sketch; - sb.append(" Min Item : ").append(itmSk.isEmpty() ? "null" : serDe.toString(itmSk.getMinItem())) - .append(Util.LS); - sb.append(" Max Item : ").append(itmSk.isEmpty() ? "null" : serDe.toString(itmSk.getMaxItem())) - .append(Util.LS); - } - sb.append("### End sketch summary").append(Util.LS); + final String emptyStr = (sketchType == ITEMS_SKETCH) ? "Null" : "NaN"; - if (withLevels) { - sb.append(outputLevels(k, m, numLevels, fullLevelsArr)); - } - if (withData) { - if (sketchType == DOUBLES_SKETCH) { - final KllDoublesSketch dblSk = (KllDoublesSketch) sketch; - final double[] myDoubleItemsArr = dblSk.getDoubleItemsArray(); - sb.append(outputDoublesData(numLevels, fullLevelsArr, myDoubleItemsArr)); - } else if (sketchType == FLOATS_SKETCH) { - final KllFloatsSketch fltSk = (KllFloatsSketch) sketch; - final float[] myFloatItemsArr = fltSk.getFloatItemsArray(); - sb.append(outputFloatsData(numLevels, fullLevelsArr, myFloatItemsArr)); - } - else { //sketchType == KllItemsSketch - final KllItemsSketch itmSk = (KllItemsSketch) sketch; - final T[] myItemsArr = itmSk.getTotalItemsArray(); - sb.append(outputItemsData(numLevels, fullLevelsArr, myItemsArr, serDe)); - } - } + sb.append(" Min Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMinItemAsString()) + .append(LS); + sb.append(" Max Item : ").append(sketch.isEmpty() ? emptyStr : sketch.getMaxItemAsString()) + .append(LS); + sb.append("### End sketch summary").append(LS); + + if (! withSummary) { sb.setLength(0); } + if (withData) { sb.append(outputData(sketch)); } return sb.toString(); } @@ -789,8 +689,9 @@ static int findLevelToCompact(final int k, final int m, final int numLevels, fin /** * Computes the actual item capacity of a given level given its depth index. * If the depth of levels exceeds 30, this uses a folding technique to accurately compute the - * actual level capacity up to a depth of 60. Without folding, the internal calculations would - * exceed the capacity of a long. + * actual level capacity up to a depth of 60 (or 61 levels). + * Without folding, the internal calculations would exceed the capacity of a long. + * This method just decides whether folding is required or not. * @param k the configured k of the sketch * @param depth the zero-based index of the level being computed. * @return the actual capacity of a given level given its depth index. @@ -806,31 +707,32 @@ private static long intCapAux(final int k, final int depth) { /** * Performs the integer based calculation of an individual level (or folded level). * @param k the configured k of the sketch - * @param depth depth the zero-based index of the level being computed. + * @param depth the zero-based index of the level being computed. The max depth is 30! * @return the actual capacity of a given level given its depth index. */ private static long intCapAuxAux(final long k, final int depth) { - final long twok = k << 1; // for rounding pre-multiply by 2 - final long tmp = ((twok << depth) / powersOfThree[depth]); - final long result = ((tmp + 1L) >>> 1); // add 1 and divide by 2 + final long twok = k << 1; // for rounding at the end, pre-multiply by 2 here, divide by 2 during rounding. + final long tmp = ((twok << depth) / powersOfThree[depth]); //2k* (2/3)^depth. 2k also keeps the fraction larger. + final long result = ((tmp + 1L) >>> 1); // (tmp + 1)/2. If odd, round up. This guarantees an integer. assert (result <= k); return result; } + private final static boolean enablePrinting = false; + /** - * @param fmt format - * @param args arguments + * @param format the format + * @param args the args */ - private static void printf(final String fmt, final Object ... args) { - //System.out.printf(fmt, args); //Disable + private static final void printf(final String format, final Object ... args) { + if (enablePrinting) { System.out.printf(format, args); } } /** - * Println Object o - * @param o object to print + * @param o the Object to println */ - private static void println(final Object o) { - //System.out.println(o.toString()); //Disable + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } } } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java index 03bc931ac..502d43278 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java @@ -305,12 +305,16 @@ static void updateItem(final KllItemsSketch itmSk, itmSk.setMinItem(Util.minT(itmSk.getMinItem(), item, comp)); itmSk.setMaxItem(Util.maxT(itmSk.getMaxItem(), item, comp)); } - if (itmSk.levelsArr[0] == 0) { compressWhileUpdatingSketch(itmSk); } - final int myLevelsArrAtZero = itmSk.levelsArr[0]; //LevelsArr could be expanded + int level0space = itmSk.levelsArr[0]; + assert level0space >= 0; + if (level0space == 0) { + compressWhileUpdatingSketch(itmSk); + level0space = itmSk.levelsArr[0]; + assert (level0space > 0); + } itmSk.incN(); itmSk.setLevelZeroSorted(false); - final int nextPos = myLevelsArrAtZero - 1; - assert myLevelsArrAtZero >= 0; + final int nextPos = level0space - 1; itmSk.setLevelsArrayAt(0, nextPos); itmSk.setItemsArrayAt(nextPos, item); } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java index f0e923fbd..589c1fa30 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java @@ -275,6 +275,17 @@ public byte[] toByteArray() { return KllHelper.toByteArray(this, false); } + @Override + public String toString(final boolean withSummary, final boolean withData) { + KllSketch sketch = this; + if (withData && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllItemsSketch.heapify((Memory)getWritableMemory(), comparator, serDe); + } + return KllHelper.toStringImpl(sketch, withSummary, withData, getSerDe()); + } + @Override public void update(final T item) { if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java index 874d64fa9..67f6bb98e 100644 --- a/src/main/java/org/apache/datasketches/kll/KllSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java @@ -130,6 +130,13 @@ public abstract class KllSketch implements QuantilesAPI { this.sketchStructure = sketchStructure; } + /** + * Gets the string value of the item at the given index. + * @param index the index of the value + * @return the string value of the item at the given index. + */ + abstract String getItemAsString(int index); + /** * Gets the approximate k to use given epsilon, the normalized rank error. * @param epsilon the normalized rank error between zero and one. @@ -159,6 +166,18 @@ public static int getMaxSerializedSizeBytes(final int k, final long n, return updatableMemFormat ? gStats.updatableBytes : gStats.compactBytes; } + /** + * Gets the string value of the max item + * @return the string value of the max item + */ + abstract String getMaxItemAsString(); + + /** + * Gets the string value of the min item + * @return the string value of the min item + */ + abstract String getMinItemAsString(); + /** * Gets the normalized rank error given k and pmf. * Static method version of the getNormalizedRankError(boolean). @@ -262,17 +281,17 @@ public final boolean isSameResource(final Memory that) { @Override public final String toString() { - return toString(false, false); + return toString(true, false); } /** * Returns a summary of the sketch as a string. - * @param withLevels if true include information about levels + * @param withSummary if true includes sketch summary information * @param withData if true include sketch data * @return string representation of sketch summary */ - public String toString(final boolean withLevels, final boolean withData) { - return KllHelper.toStringImpl(this, withLevels, withData, getSerDe()); + public String toString(final boolean withSummary, final boolean withData) { + return KllHelper.toStringImpl(this, withSummary, withData, getSerDe()); } //restricted diff --git a/src/main/java/org/apache/datasketches/partitions/Partitioner.java b/src/main/java/org/apache/datasketches/partitions/Partitioner.java index be256e479..9bc3eeec5 100644 --- a/src/main/java/org/apache/datasketches/partitions/Partitioner.java +++ b/src/main/java/org/apache/datasketches/partitions/Partitioner.java @@ -25,6 +25,7 @@ import static java.lang.Math.min; import static java.lang.Math.pow; import static java.lang.Math.round; +import static java.util.Collections.unmodifiableList; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; @@ -116,7 +117,7 @@ public List> partition(final S sk) { final StackElement se = new StackElement<>(gpb, 0, "1"); stack.push(se); partitionSearch(stack); - return finalPartitionList; + return unmodifiableList(finalPartitionList); } private void partitionSearch(final ArrayDeque> stack) { diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java index 4db851460..5c0098a5e 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java @@ -48,7 +48,7 @@ public GenericPartitionBoundaries( final T minItem, final QuantileSearchCriteria searchCrit) { this.totalN = totalN; - this.boundaries = boundaries; + this.boundaries = boundaries; //SpotBugs copy this.natRanks = natRanks; this.normRanks = normRanks; this.maxItem = maxItem; diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index 31a5bedf9..21348407d 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -75,8 +75,8 @@ default double[] getCDF(double[] splitPoints) { double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit); /** - * Returns the maximum item of the stream. This is provided for convenience, but may be different from the largest - * item retained by the sketch algorithm. + * Returns the maximum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(1.0). * * @return the maximum item of the stream * @throws IllegalArgumentException if sketch is empty. @@ -84,8 +84,8 @@ default double[] getCDF(double[] splitPoints) { double getMaxItem(); /** - * Returns the minimum item of the stream. This is provided for convenience, but is distinct from the smallest - * item retained by the sketch algorithm. + * Returns the minimum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(0.0). * * @return the minimum item of the stream * @throws IllegalArgumentException if sketch is empty. @@ -297,39 +297,4 @@ default double[] getRanks(double[] quantiles) { */ void update(double item); - /** - * This encapsulates the essential information needed to construct actual partitions and is returned from the - * getPartitionBoundaries(int, QuantileSearchCritera) method. - */ - static class DoublesPartitionBoundaries { - - /** - * The total number of items presented to the sketch. - * - *

To compute the weight or density of a specific - * partition i where i varies from 1 to m partitions: - *

{@code
-     * long N = getN();
-     * double[] ranks = getRanks();
-     * long weight = Math.round((ranks[i] - ranks[i - 1]) * N);
-     * }
- */ - public long N; - - /** - * The normalized ranks that correspond to the returned boundaries. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always 0.0, and index m is always 1.0. - */ - public double[] ranks; - - /** - * The partition boundaries as quantiles. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index m is always - * {@link #getMaxItem() getMaxItem()}. - */ - public double[] boundaries; - } } - diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 2fcbdd99f..986780444 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -296,40 +296,5 @@ default double[] getRanks(float[] quantiles) { */ void update(float item); - /** - * This encapsulates the essential information needed to construct actual partitions and is returned from the - * getPartitionBoundaries(int, QuantileSearchCritera) method. - */ - - static class FloatsPartitionBoundaries { - - /** - * The total number of items presented to the sketch. - * - *

To compute the weight or density of a specific - * partition i where i varies from 1 to m partitions: - *

{@code
-     * long N = getN();
-     * double[] ranks = getRanks();
-     * long weight = Math.round((ranks[i] - ranks[i - 1]) * N);
-     * }
- */ - public long N; - - /** - * The normalized ranks that correspond to the returned boundaries. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always 0.0, and index m is always 1.0. - */ - public double[] ranks; - - /** - * The partition boundaries as quantiles. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index m is always - * {@link #getMaxItem() getMaxItem()}. - */ - public float[] boundaries; - } } diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index fe2080884..79f2b5e6e 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -19,6 +19,7 @@ package org.apache.datasketches.kll; +import static org.apache.datasketches.kll.KllHelper.getGrowthSchemeForGivenN; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -27,6 +28,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.kll.KllDirectDoublesSketch.KllDirectCompactDoublesSketch; +import org.apache.datasketches.kll.KllSketch.SketchType; import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; @@ -38,6 +40,7 @@ /** * @author Lee Rhodes */ +@SuppressWarnings("unused") public class KllMiscDoublesTest { static final String LS = System.getProperty("line.separator"); private final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); @@ -168,21 +171,85 @@ public void visualCheckToString() { public void viewHeapCompactions() { int k = 20; int n = 108; + boolean withSummary = false; + boolean withData = true; int compaction = 0; - KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); + WritableMemory wmem = WritableMemory.allocate(1 << 20); + MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + KllDoublesSketch sk = KllDoublesSketch.newDirectInstance(k, wmem, memReqSvr); for (int i = 1; i <= n; i++) { sk.update(i); if (sk.levelsArr[0] == 0) { println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); - println(sk.toString(true, true)); + println(sk.toString(withSummary, withData)); sk.update(++i); println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); - println(sk.toString(true, true)); + println(sk.toString(withSummary, withData)); assertEquals(sk.getDoubleItemsArray()[sk.levelsArr[0]], i); } } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withSummary, withData)); + println(""); } + @Test + public void viewCompactSketchData() { + int k = 20; + int n = 109; + boolean withSummary = true; + boolean withData = true; + KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toByteArray(); + Memory mem = Memory.wrap(byteArr); + KllDoublesSketch ddSk = KllDoublesSketch.wrap(mem); + println(ddSk.toString(withSummary, withData)); + } + + //@Test //set static enablePrinting = true for visual checking + // // must also make KllHelper.intCapAux(...) visible + // public void checkIntCapAux() { + // String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"}; + // String hdrFmt = "%6s %6s %28s %10s %10s %34s\n"; + // String dataFmt = "%6d %6d %,28d %,10d %,10d %,34.0f\n"; + // int k = 1000; + // int m = 8; + // int numLevels = 20; + // println("k=" + k + ", m=" + m + ", numLevels=" + numLevels); + // printf(hdrFmt, (Object[]) hdr); + // double maxN = 0; + // for (int i = 0; i < numLevels; i++) { + // int depth = numLevels - i - 1; + // long cap = KllHelper.intCapAux(k, depth); + // long end = Math.max(m, cap); + // long wt = 1L << i; + // maxN += (double)wt * (double)end; + // printf(dataFmt, i, depth, wt, cap, end, maxN); + // } + // } + + //@Test //set static enablePrinting = true for visual checking + // // must also make KllHelper.powersOfThree visible + // public void checkIntCapAuxAux() { + // String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"}; + // String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s\n"; + // String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d\n"; + // long k = (1L << 16) - 1L; + // long m = 8; + // println("k = " + k + ", m = " + m); + // printf(hdrFmt, (Object[]) hdr); + // for (int i = 0; i < 31; i++) { + // long twoK = k << 1; + // long twoKxtwoD = twoK << i; + // long threeToD = KllHelper.powersOfThree[i]; + // long tmp = twoKxtwoD / threeToD; + // long result = (tmp + 1L) >>> 1; + // long end = Math.max(m, result); //performed later + // printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end); + // } + // } + @Test //set static enablePrinting = true for visual checking public void viewDirectCompactions() { int k = 20; diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml index 6ce8f010c..62cf08f0b 100644 --- a/tools/FindBugsExcludeFilter.xml +++ b/tools/FindBugsExcludeFilter.xml @@ -1,4 +1,5 @@ + @@ -44,8 +49,33 @@ under the License. + + + + + + + + + + + + + + + + + + + + + + + + +