From 23e36accc87d0e78a98aeefaa905686fe9fad6c5 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 16 Nov 2023 14:11:35 -0800 Subject: [PATCH] This commit includes a number of enhancements for the 5.0 release: - This is a large number of changes. - The problem detected by the Druid team is fixed, so now the "getPartitionBoundaries" works for input streams that are larger than Integer.MAX_VALUE. - This fix applies to both the KllItemsSketch and the classic ItemsSketch. These are the only two sketches, for now, that will support the "getPartitionBoundaries" functionality. This is enforced via a new "PartitioningFeature" API interface. - In addition, there is new "partitions" package that solves the problem of limited accuracy of our quantiles sketches when being asked to partition very large input streams. This package can partition very large streams of almost unlimited size with very small variation in the resulting partition sizes. I have tested this with streams as large as 30E12 elements. - I have reduced code duplication in a number of places. Specifically, All the quantile sketch sorted view classes use only 3 iterator implementations, which are for float, double and generic. Further consolidation of classes can be done across the sorted view classes themselves, but that will have to be done later. - Javadocs have been improved in a number of places and I have fixed spelling errors when I see them. --- README.md | 2 +- pom.xml | 7 + .../org/apache/datasketches/common/Util.java | 83 +++---- .../datasketches/kll/KllDoublesSketch.java | 16 -- .../kll/KllDoublesSketchIterator.java | 42 +--- .../kll/KllDoublesSketchSortedView.java | 81 ++++--- .../KllDoublesSketchSortedViewIterator.java | 79 ------- .../datasketches/kll/KllFloatsSketch.java | 16 -- .../kll/KllFloatsSketchIterator.java | 42 +--- .../kll/KllFloatsSketchSortedView.java | 64 +++++- .../KllFloatsSketchSortedViewIterator.java | 79 ------- .../datasketches/kll/KllItemsSketch.java | 21 +- .../kll/KllItemsSketchIterator.java | 42 +--- .../kll/KllItemsSketchSortedView.java | 186 ++++++++++----- .../datasketches/kll/KllSketchIterator.java | 82 +++++++ .../datasketches/partitions/BoundsRule.java | 37 +++ .../datasketches/partitions/Partitioner.java | 211 ++++++++++++++++++ .../partitions/SketchFillRequest.java | 46 ++++ .../datasketches/partitions/package-info.java | 23 ++ .../datasketches/quantiles/DoublesSketch.java | 16 -- .../quantiles/DoublesSketchSortedView.java | 76 ++++--- .../DoublesSketchSortedViewIterator.java | 77 ------- .../datasketches/quantiles/ItemsSketch.java | 95 +------- .../quantiles/ItemsSketchSortedView.java | 152 +++++++++---- .../quantilescommon/DoublesSortedView.java | 20 +- .../DoublesSortedViewIterator.java | 18 +- .../quantilescommon/FloatsSortedView.java | 18 ++ .../FloatsSortedViewIterator.java | 18 +- .../GenericPartitionBoundaries.java | 136 +++++++++++ .../quantilescommon/GenericSortedView.java | 23 +- .../GenericSortedViewIterator.java | 54 +---- .../quantilescommon/PartitionBoundaries.java | 67 ++++++ .../quantilescommon/PartitioningFeature.java | 83 +++++++ .../quantilescommon/QuantilesAPI.java | 4 +- .../quantilescommon/QuantilesDoublesAPI.java | 50 ----- .../quantilescommon/QuantilesFloatsAPI.java | 50 ----- .../quantilescommon/QuantilesGenericAPI.java | 94 -------- .../quantilescommon/QuantilesUtil.java | 22 +- .../quantilescommon/SortedView.java | 36 +-- .../quantilescommon/SortedViewIterator.java | 55 +++-- .../datasketches/quantilescommon/Stack.java | 68 ++++++ .../datasketches/req/BaseReqSketch.java | 18 -- .../datasketches/req/ReqSketchSortedView.java | 80 ++++--- .../req/ReqSketchSortedViewIterator.java | 80 ------- .../apache/datasketches/common/UtilTest.java | 9 +- ...lDirectCompactItemsSketchIteratorTest.java | 8 +- .../kll/KllDirectDoublesSketchTest.java | 16 -- .../kll/KllDirectFloatsSketchTest.java | 16 -- .../kll/KllDoublesSketchIteratorTest.java | 8 +- .../kll/KllDoublesSketchTest.java | 27 +-- .../kll/KllFloatsSketchIteratorTest.java | 8 +- .../datasketches/kll/KllFloatsSketchTest.java | 27 +-- .../kll/KllItemsSketchSortedViewString.java | 7 +- .../datasketches/kll/KllItemsSketchTest.java | 16 +- .../kll/KllItemsSketchiteratorTest.java | 8 +- .../kll/KllMiscDirectDoublesTest.java | 13 -- .../kll/KllMiscDirectFloatsTest.java | 13 -- .../datasketches/kll/KllMiscItemsTest.java | 4 +- .../partitions/ClassicPartitionsTest.java | 127 +++++++++++ .../ItemsSketchFillRequestLongAsString.java | 121 ++++++++++ ...KllItemsSketchFillRequestLongAsString.java | 121 ++++++++++ .../partitions/KllPartitionsTest.java | 127 +++++++++++ .../quantiles/CustomQuantilesTest.java | 4 +- .../quantiles/DoublesSketchTest.java | 7 +- .../HeapUpdateDoublesSketchTest.java | 28 +-- .../ItemsSketchSortedViewString.java | 6 +- .../quantiles/ItemsSketchTest.java | 12 +- .../quantiles/SkewedDataTest.java | 114 ++++++++++ .../CrossCheckQuantilesTest.java | 49 ++-- .../LongsAsOrderableStrings.java | 64 ++++++ .../quantilescommon/ReflectUtilityTest.java | 14 +- .../req/ReqSketchSortedViewTest.java | 21 +- .../datasketches/req/ReqSketchTest.java | 20 +- 73 files changed, 2222 insertions(+), 1362 deletions(-) delete mode 100644 src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedViewIterator.java delete mode 100644 src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java create mode 100644 src/main/java/org/apache/datasketches/kll/KllSketchIterator.java create mode 100644 src/main/java/org/apache/datasketches/partitions/BoundsRule.java create mode 100644 src/main/java/org/apache/datasketches/partitions/Partitioner.java create mode 100644 src/main/java/org/apache/datasketches/partitions/SketchFillRequest.java create mode 100644 src/main/java/org/apache/datasketches/partitions/package-info.java delete mode 100644 src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedViewIterator.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java create mode 100644 src/main/java/org/apache/datasketches/quantilescommon/Stack.java delete mode 100644 src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java create mode 100644 src/test/java/org/apache/datasketches/partitions/ClassicPartitionsTest.java create mode 100644 src/test/java/org/apache/datasketches/partitions/ItemsSketchFillRequestLongAsString.java create mode 100644 src/test/java/org/apache/datasketches/partitions/KllItemsSketchFillRequestLongAsString.java create mode 100644 src/test/java/org/apache/datasketches/partitions/KllPartitionsTest.java create mode 100644 src/test/java/org/apache/datasketches/quantiles/SkewedDataTest.java create mode 100644 src/test/java/org/apache/datasketches/quantilescommon/LongsAsOrderableStrings.java diff --git a/README.md b/README.md index 8da5faac3..3190036d1 100644 --- a/README.md +++ b/README.md @@ -154,5 +154,5 @@ In Eclipse, open the project *Properties / Java Build Path / Module Dependencies #### SpotBugs -* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you will get a lot of false positive or low risk issues that we have examined and exliminated with this exclusion file. +* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you may get a lot of false positive or low risk issues that we have examined and eliminated with this exclusion file. diff --git a/pom.xml b/pom.xml index 75016d7f7..02765d07f 100644 --- a/pom.xml +++ b/pom.xml @@ -150,6 +150,13 @@ under the License. ${testng.version} test + diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 602b40b0b..c9a749e55 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -24,6 +24,7 @@ import static java.lang.Math.log; import static java.lang.Math.pow; import static java.lang.Math.round; +import static java.util.Arrays.fill; import java.util.Comparator; @@ -217,7 +218,7 @@ public static String nanoSecToString(final long nS) { /** * Returns the given time in milliseconds formatted as Hours:Min:Sec.mSec - * @param mS the given nanoseconds + * @param mS the given milliseconds * @return the given time in milliseconds formatted as Hours:Min:Sec.mSec */ public static String milliSecToString(final long mS) { @@ -244,40 +245,20 @@ public static String zeroPad(final String s, final int fieldLength) { /** * Prepend or postpend the given string with the given character to fill the given field length. - * If the given string is equal or greater than the given field length, it will be returned - * without modification. + * If the given string is equal to or greater than the given field length, it will be returned without modification. * @param s the given string * @param fieldLength the desired field length * @param padChar the desired pad character * @param postpend if true append the pacCharacters to the end of the string. - * @return prepended or postpended given string with the given character to fill the given field - * length. + * @return prepended or postpended given string with the given character to fill the given field length. */ - public static String characterPad(final String s, final int fieldLength, final char padChar, - final boolean postpend) { - final char[] chArr = s.toCharArray(); - final int sLen = chArr.length; + public static String characterPad(final String s, final int fieldLength, final char padChar, final boolean postpend) { + final int sLen = s.length(); if (sLen < fieldLength) { - final char[] out = new char[fieldLength]; - final int blanks = fieldLength - sLen; - - if (postpend) { - for (int i = 0; i < sLen; i++) { - out[i] = chArr[i]; - } - for (int i = sLen; i < fieldLength; i++) { - out[i] = padChar; - } - } else { //prepend - for (int i = 0; i < blanks; i++) { - out[i] = padChar; - } - for (int i = blanks; i < fieldLength; i++) { - out[i] = chArr[i - blanks]; - } - } - - return String.valueOf(out); + final char[] cArr = new char[fieldLength - sLen]; + fill(cArr, padChar); + final String addstr = String.valueOf(cArr); + return (postpend) ? s.concat(addstr) : addstr.concat(s); } return s; } @@ -550,56 +531,60 @@ public static double powerSeriesNextDouble(final int ppb, final double curPoint, } /** - * Computes the ceiling power of given base and n as doubles. - * This is the smallest positive power - * of base that equal to or greater than the given n and equal to a mathematical integer. + * Returns the ceiling of a given n given a radix, where the ceiling is an integral power of the radix. + * This is the smallest positive power of radix that is equal to or greater than the given n + * and equal to a mathematical integer. * The result of this function is consistent with {@link #ceilingIntPowerOf2(int)} for values * less than one. I.e., if n < 1, the result is 1. * - * @param base The base in the expression ⌈basen⌉. + *

The formula is: radixceiling(logradix(x))

+ * + * @param radix The base of the number system. * @param n The input argument. - * @return the ceiling power of base as a double and equal to a mathematical integer. + * @return the ceiling power of radix as a double and equal to a mathematical integer. */ - public static double ceilingPowerBaseOfDouble(final double base, final double n) { + public static double ceilingPowerBaseOfDouble(final double radix, final double n) { final double x = n < 1.0 ? 1.0 : n; - return pow(base, ceil(logBaseOfX(base, x))); + return Math.round(pow(radix, ceil(logBaseOfX(radix, x)))); } /** - * Computes the floor power of given base and n as doubles. - * This is the largest positive power - * of base that equal to or less than the given n and equal to a mathematical integer. + * Computes the floor of a given n given radix, where the floor is an integral power of the radix. + * This is the largest positive power of radix that is equal to or less than the given n + * and equal to a mathematical integer. * The result of this function is consistent with {@link #floorPowerOf2(int)} for values * less than one. I.e., if n < 1, the result is 1. * - * @param base The base in the expression ⌊basen⌋. + *

The formula is: radixfloor(logradix(x))

+ * + * @param radix The base of the number system. * @param n The input argument. * @return the floor power of 2 and equal to a mathematical integer. */ - public static double floorPowerBaseOfDouble(final double base, final double n) { + public static double floorPowerBaseOfDouble(final double radix, final double n) { final double x = n < 1.0 ? 1.0 : n; - return pow(base, floor(logBaseOfX(base, x))); + return Math.round(pow(radix, floor(logBaseOfX(radix, x)))); } // Logarithm related /** - * The log base 2 of the value + * The log2(value) * @param value the given value - * @return The log base 2 of the value + * @return log2(value) */ public static double log2(final double value) { return log(value) / LOG2; } /** - * Returns the logarithm_logBase of x. Example: logB(2.0, x) = log(x) / log(2.0). - * @param logBase the base of the logarithm used + * Returns the logradix(x). Example: logB(2.0, x) = log(x) / log(2.0). + * @param radix the base of the number system * @param x the given value - * @return the logarithm_logBase of x: Example: logB(2.0, x) = log(x) / log(2.0). + * @return the logradix(x): Example: logB(2.0, x) = log(x) / log(2.0). */ - public static double logBaseOfX(final double logBase, final double x) { - return log(x) / log(logBase); + public static double logBaseOfX(final double radix, final double x) { + return log(x) / log(radix); } /** diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java index 213544021..7c175512a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java @@ -24,7 +24,6 @@ import static org.apache.datasketches.common.ByteArrayUtil.putDoubleLE; import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallySpacedDoubles; import java.util.Objects; @@ -175,21 +174,6 @@ public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria return kllDoublesSV.getCDF(splitPoints, searchCrit); } - @Override - public DoublesPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - final double[] ranks = equallySpacedDoubles(numEquallyWeighted); - final double[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final DoublesPartitionBoundaries dpb = new DoublesPartitionBoundaries(); - dpb.N = this.getN(); - dpb.ranks = ranks; - dpb.boundaries = boundaries; - return dpb; - } - @Override public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java index 473d5f1bb..bc18c5347 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchIterator.java @@ -24,20 +24,12 @@ /** * Iterator over KllDoublesSketch. The order is not defined. */ -public final class KllDoublesSketchIterator implements QuantilesDoublesSketchIterator { +public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator { private final double[] quantiles; - private final int[] levelsArr; - private final int numLevels; - private int level; - private int index; - private long weight; - private boolean isInitialized; KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) { + super(levelsArr, numLevels); this.quantiles = quantiles; - this.levelsArr = levelsArr; - this.numLevels = numLevels; - this.isInitialized = false; } @Override @@ -45,34 +37,4 @@ public double getQuantile() { return quantiles[index]; } - @Override - public long getWeight() { - return weight; - } - - @Override - public boolean next() { - if (!isInitialized) { - level = 0; - index = levelsArr[level]; - weight = 1; - isInitialized = true; - } else { - index++; - } - if (index < levelsArr[level + 1]) { - return true; - } - // go to the next non-empty level - do { - level++; - if (level == numLevels) { - return false; // run out of levels - } - weight *= 2; - } while (levelsArr[level] == levelsArr[level + 1]); - index = levelsArr[level]; - return true; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java index 8f8ae5d63..e8bed53eb 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedView.java @@ -27,6 +27,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.quantilescommon.DoublesSortedView; +import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesUtil; @@ -40,6 +41,9 @@ public final class KllDoublesSketchSortedView implements DoublesSortedView { private final double[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final double[] normRanks; + private final double maxItem; + private final double minItem; /** * Construct from elements for testing. @@ -47,31 +51,44 @@ public final class KllDoublesSketchSortedView implements DoublesSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN) { + KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN, + final double maxItem, final double minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; } /** * Constructs this Sorted View given the sketch - * @param sk the given KllDoublesSketch. + * @param sketch the given KllDoublesSketch. */ - public KllDoublesSketchSortedView(final KllDoublesSketch sk) { - this.totalN = sk.getN(); - final double[] srcQuantiles = sk.getDoubleItemsArray(); - final int[] srcLevels = sk.levelsArr; - final int srcNumLevels = sk.getNumLevels(); + public KllDoublesSketchSortedView(final KllDoublesSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); + final double[] srcQuantiles = sketch.getDoubleItemsArray(); + final int[] srcLevels = sketch.levelsArr; + final int srcNumLevels = sketch.getNumLevels(); - if (!sk.isLevelZeroSorted()) { + if (!sketch.isLevelZeroSorted()) { Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); - if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); } + if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } } final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage quantiles = new double[numQuantiles]; cumWeights = new long[numQuantiles]; populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + final double[] normRanks = new double[numQuantiles]; + for (int i = 0; i < numQuantiles; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; } @Override @@ -79,34 +96,36 @@ public long[] getCumulativeWeights() { return cumWeights.clone(); } + @Override + public double getMaxItem() { + return maxItem; + } + + @Override + public double getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public double[] getNormalizedRanks() { + return normRanks; + } + @Override public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; - } - return quantiles[index]; - } - - /** - * Special version of getQuantile to support the getPartitionBoundaries(int) function. - * @param weight ultimately comes from selected integral weights computed by the sketch. - * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ - * the quantile directly corresponding to the given weight internal to the sketch. - * @return the approximate quantile given the weight. - */ - double getQuantile(final long weight, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } - final int len = cumWeights.length; - final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; - final int index = InequalitySearch.find(cumWeights, 0, len - 1, weight, crit); - if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; } return quantiles[index]; } @@ -134,8 +153,8 @@ public boolean isEmpty() { } @Override - public KllDoublesSketchSortedViewIterator iterator() { - return new KllDoublesSketchSortedViewIterator(quantiles, cumWeights); + public DoublesSortedViewIterator iterator() { + return new DoublesSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedViewIterator.java deleted file mode 100644 index 29131bd2c..000000000 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketchSortedViewIterator.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.kll; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over KllDoublesSketchSortedView - * @author Alexander Saydakov - * @author Lee Rhodes - */ -public final class KllDoublesSketchSortedViewIterator implements DoublesSortedViewIterator { - - private final double[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - KllDoublesSketchSortedViewIterator(final double[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public double getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java index e2e4d808a..5484e8bf1 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java @@ -24,7 +24,6 @@ import static org.apache.datasketches.common.ByteArrayUtil.putFloatLE; import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallySpacedDoubles; import java.util.Objects; @@ -175,21 +174,6 @@ public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria s return kllFloatsSV.getCDF(splitPoints, searchCrit); } - @Override - public FloatsPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - final double[] ranks = equallySpacedDoubles(numEquallyWeighted); - final float[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final FloatsPartitionBoundaries fpb = new FloatsPartitionBoundaries(); - fpb.N = this.getN(); - fpb.ranks = ranks; - fpb.boundaries = boundaries; - return fpb; - } - @Override public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java index 8c5808ead..accf039de 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchIterator.java @@ -24,20 +24,12 @@ /** * Iterator over KllFloatsSketch. The order is not defined. */ -public final class KllFloatsSketchIterator implements QuantilesFloatsSketchIterator { +public final class KllFloatsSketchIterator extends KllSketchIterator implements QuantilesFloatsSketchIterator { private final float[] quantiles; - private final int[] levelsArr; - private final int numLevels; - private int level; - private int index; - private long weight; - private boolean isInitialized; KllFloatsSketchIterator(final float[] quantiles, final int[] levelsArr, final int numLevels) { + super(levelsArr, numLevels); this.quantiles = quantiles; - this.levelsArr = levelsArr; - this.numLevels = numLevels; - this.isInitialized = false; } @Override @@ -45,34 +37,4 @@ public float getQuantile() { return quantiles[index]; } - @Override - public long getWeight() { - return weight; - } - - @Override - public boolean next() { - if (!isInitialized) { - level = 0; - index = levelsArr[level]; - weight = 1; - isInitialized = true; - } else { - index++; - } - if (index < levelsArr[level + 1]) { - return true; - } - // go to the next non-empty level - do { - level++; - if (level == numLevels) { - return false; // run out of levels - } - weight *= 2; - } while (levelsArr[level] == levelsArr[level + 1]); - index = levelsArr[level]; - return true; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java index 8f47a8da7..08678503c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java @@ -27,6 +27,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.quantilescommon.FloatsSortedView; +import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesUtil; @@ -40,6 +41,9 @@ public final class KllFloatsSketchSortedView implements FloatsSortedView { private final float[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final double[] normRanks; + private final float maxItem; + private final float minItem; /** * Construct from elements for testing. @@ -47,44 +51,80 @@ public final class KllFloatsSketchSortedView implements FloatsSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - KllFloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN) { + KllFloatsSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN, + final float maxItem, final float minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; } /** * Constructs this Sorted View given the sketch - * @param sk the given KllFloatsSketch. + * @param sketch the given KllFloatsSketch. */ - public KllFloatsSketchSortedView(final KllFloatsSketch sk) { - this.totalN = sk.getN(); - final float[] srcQuantiles = sk.getFloatItemsArray(); - final int[] srcLevels = sk.levelsArr; - final int srcNumLevels = sk.getNumLevels(); + public KllFloatsSketchSortedView(final KllFloatsSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); + final float[] srcQuantiles = sketch.getFloatItemsArray(); + final int[] srcLevels = sketch.levelsArr; + final int srcNumLevels = sketch.getNumLevels(); - if (!sk.isLevelZeroSorted()) { + if (!sketch.isLevelZeroSorted()) { Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); - if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); } + if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } } final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage quantiles = new float[numQuantiles]; cumWeights = new long[numQuantiles]; populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; } + //end of constructors + @Override public long[] getCumulativeWeights() { return cumWeights.clone(); } + @Override + public float getMaxItem() { + return maxItem; + } + + @Override + public float getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public double[] getNormalizedRanks() { + return normRanks; + } + @Override public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { @@ -116,8 +156,8 @@ public boolean isEmpty() { } @Override - public KllFloatsSketchSortedViewIterator iterator() { - return new KllFloatsSketchSortedViewIterator(quantiles, cumWeights); + public FloatsSortedViewIterator iterator() { + return new FloatsSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java deleted file mode 100644 index 87c2e88bd..000000000 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.kll; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over KllFloatsSketchSortedView - * @author Alexander Saydakov - * @author Lee Rhodes - */ -public final class KllFloatsSketchSortedViewIterator implements FloatsSortedViewIterator { - - private final float[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - KllFloatsSketchSortedViewIterator(final float[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public float getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java index d5f73b00d..f0e923fbd 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java @@ -23,7 +23,6 @@ import static java.lang.Math.min; import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallySpacedDoubles; import java.lang.reflect.Array; import java.util.Comparator; @@ -34,7 +33,10 @@ import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator; @@ -46,7 +48,7 @@ * @see org.apache.datasketches.kll.KllSketch */ @SuppressWarnings("unchecked") -public abstract class KllItemsSketch extends KllSketch implements QuantilesGenericAPI { +public abstract class KllItemsSketch extends KllSketch implements QuantilesGenericAPI, PartitioningFeature { private KllItemsSketchSortedView kllItemsSV = null; final Comparator comparator; final ArrayOfItemsSerDe serDe; @@ -150,18 +152,11 @@ public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searc } @Override - public GenericPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, + public GenericPartitionBoundaries getPartitionBoundaries(final int numEquallySized, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - final double[] ranks = equallySpacedDoubles(numEquallyWeighted); - final Object[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final GenericPartitionBoundaries gpb = new GenericPartitionBoundaries<>(); - gpb.N = this.getN(); - gpb.ranks = ranks; - gpb.boundaries = (T[])boundaries; - return gpb; + if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + refreshSortedView(); + return kllItemsSV.getPartitionBoundaries(numEquallySized, searchCrit); } @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java index 4adb9d79b..3a0a8da0f 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchIterator.java @@ -24,20 +24,12 @@ /** * Iterator over KllItemsSketch. The order is not defined. */ -public final class KllItemsSketchIterator implements QuantilesGenericSketchIterator { +public final class KllItemsSketchIterator extends KllSketchIterator implements QuantilesGenericSketchIterator { private final Object[] quantiles; - private final int[] levelsArr; - private final int numLevels; - private int level; - private int index; - private long weight; - private boolean isInitialized_; KllItemsSketchIterator(final Object[] quantiles, final int[] levelsArr, final int numLevels) { + super(levelsArr, numLevels); this.quantiles = quantiles; - this.levelsArr = levelsArr; - this.numLevels = numLevels; - this.isInitialized_ = false; } @SuppressWarnings("unchecked") @@ -46,34 +38,4 @@ public T getQuantile() { return (T)quantiles[index]; } - @Override - public long getWeight() { - return weight; - } - - @Override - public boolean next() { - if (!isInitialized_) { - level = 0; - index = levelsArr[level]; - weight = 1; - isInitialized_ = true; - } else { - index++; - } - if (index < levelsArr[level + 1]) { - return true; - } - // go to the next non-empty level - do { - level++; - if (level == numLevels) { - return false; // run out of levels - } - weight *= 2; - } while (levelsArr[level] == levelsArr[level + 1]); - index = levelsArr[level]; - return true; - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java index 7c066dff1..4b901f54a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java @@ -22,6 +22,7 @@ import static org.apache.datasketches.quantilescommon.GenericInequalitySearch.find; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles; import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.lang.reflect.Array; @@ -31,10 +32,13 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView; import org.apache.datasketches.quantilescommon.GenericSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; +import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesUtil; /** @@ -43,13 +47,15 @@ * @author Alexander Saydakov * @author Lee Rhodes */ -@SuppressWarnings("unchecked") -public class KllItemsSketchSortedView implements GenericSortedView { - private final Object[] quantiles; +public class KllItemsSketchSortedView implements GenericSortedView, PartitioningFeature { + private final T[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final Comparator comparator; + private final T maxItem; private final T minItem; - private final Comparator comp; + private final Class clazz; + private final double[] normRanks; /** * Construct from elements for testing only. @@ -59,49 +65,59 @@ public class KllItemsSketchSortedView implements GenericSortedView { * @param minItem used to extract the type of T * @param comparator the Comparator for type T */ + @SuppressWarnings("unchecked") KllItemsSketchSortedView( final T[] quantiles, final long[] cumWeights, final long totalN, - final T minItem, - final Comparator comparator) { + final Comparator comparator, + final T maxItem, + final T minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.comparator = comparator; + this.maxItem = maxItem; this.minItem = minItem; - this.comp = comparator; + this.clazz = (Class)quantiles[0].getClass(); + this.normRanks = convertCumWtsToNormRanks(cumWeights, totalN); } /** * Constructs this Sorted View given the sketch - * @param sk the given KllItemsSketch. + * @param sketch the given KllItemsSketch. */ - KllItemsSketchSortedView(final KllItemsSketch sk) { - this.totalN = sk.getN(); - this.minItem = sk.getMinItem(); - final Object[] srcQuantiles = sk.getTotalItemsArray(); - final int[] srcLevels = sk.levelsArr; - final int srcNumLevels = sk.getNumLevels(); - this.comp = sk.comparator; + @SuppressWarnings("unchecked") + KllItemsSketchSortedView(final KllItemsSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + this.totalN = sketch.getN(); + final T[] srcQuantiles = sketch.getTotalItemsArray(); + final int[] srcLevels = sketch.levelsArr; + final int srcNumLevels = sketch.getNumLevels(); + this.comparator = sketch.comparator; + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); + this.clazz = (Class)sketch.serDe.getClassOfT(); if (totalN == 0) { throw new SketchesArgumentException(EMPTY_MSG); } - if (!sk.isLevelZeroSorted()) { - Arrays.sort((T[])srcQuantiles, srcLevels[0], srcLevels[1], comp); - if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); } + if (!sketch.isLevelZeroSorted()) { + Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1], comparator); + if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } } final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage - quantiles = new Object[numQuantiles]; + quantiles = (T[]) Array.newInstance(sketch.serDe.getClassOfT(), numQuantiles); cumWeights = new long[numQuantiles]; populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + this.normRanks = convertCumWtsToNormRanks(cumWeights, totalN); } //end of constructors - @Override //implemented here because it needs the comparator + @Override public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - GenericSortedView.validateItems(splitPoints, comp); + GenericSortedView.validateItems(splitPoints, comparator); final int len = splitPoints.length + 1; final double[] buckets = new double[len]; for (int i = 0; i < len - 1; i++) { @@ -116,10 +132,66 @@ public long[] getCumulativeWeights() { return cumWeights.clone(); } - @Override //implemented here because it needs the comparator + @Override + public T getMaxItem() { + return maxItem; + } + + @Override + public T getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public double[] getNormalizedRanks() { + return normRanks.clone(); + } + + @Override + @SuppressWarnings("unchecked") + public GenericPartitionBoundaries getPartitionBoundaries(final int numEquallySized, + final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + final long totalN = this.totalN; + final int svLen = cumWeights.length; + //adjust ends of sortedView arrays + cumWeights[0] = 1L; + cumWeights[svLen - 1] = totalN; + normRanks[0] = 1.0 / totalN; + normRanks[svLen - 1] = 1.0; + quantiles[0] = this.getMinItem(); + quantiles[svLen - 1] = this.getMaxItem(); + + final double[] evSpNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySized + 1); + final int len = evSpNormRanks.length; + final T[] evSpQuantiles = (T[]) Array.newInstance(clazz, len); + + final long[] evSpNatRanks = new long[len]; + for (int i = 0; i < len; i++) { + final int index = getQuantileIndex(evSpNormRanks[i], searchCrit); + evSpQuantiles[i] = getQuantileFromIndex(index); + evSpNatRanks[i] = getCumWeightFromIndex(index); + } + final GenericPartitionBoundaries gpb = new GenericPartitionBoundaries<>( + this.totalN, + evSpQuantiles.clone(), + evSpNatRanks.clone(), + evSpNormRanks.clone(), + getMaxItem(), + getMinItem(), + searchCrit); + return gpb; + } + + @Override public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - GenericSortedView.validateItems(splitPoints, comp); + GenericSortedView.validateItems(splitPoints, comparator); final double[] buckets = getCDF(splitPoints, searchCrit); final int len = buckets.length; for (int i = len; i-- > 1; ) { @@ -132,35 +204,36 @@ public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searc public T getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); + final int index = getQuantileIndex(rank, searchCrit); + return getQuantileFromIndex(index); + } + + private T getQuantileFromIndex(final int index) { return quantiles[index]; } + + private long getCumWeightFromIndex(final int index) { return cumWeights[index]; } + + private int getQuantileIndex(final double rank, final QuantileSearchCriteria searchCrit) { final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); - if (index == -1) { - return (T) quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; - } - return (T) quantiles[index]; + if (index == -1) { return len - 1; } + return index; } - /** - * Special version of getQuantile to support the getPartitionBoundaries(int) function. - * @param weight ultimately comes from selected integral weights computed by the sketch. - * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ - * the quantile directly corresponding to the given weight internal to the sketch. - * @return the approximate quantile given the weight. - */ - T getQuantile(final long weight, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } - final int len = cumWeights.length; - final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; - final int index = InequalitySearch.find(cumWeights, 0, len - 1, weight, crit); - if (index == -1) { - return (T) quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + @SuppressWarnings("unchecked") + public T[] getQuantiles(final double[] ranks, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + final int len = ranks.length; + final T[] quants = (T[]) Array.newInstance(clazz, len); + for (int i = 0; i < len; i++) { + quants[i] = getQuantile(ranks[i], searchCrit); } - return (T) quantiles[index]; + return quants; } @Override + @SuppressWarnings("unchecked") public T[] getQuantiles() { final T[] quants = (T[]) Array.newInstance(minItem.getClass(), quantiles.length); System.arraycopy(quantiles, 0, quants, 0, quantiles.length); @@ -172,7 +245,7 @@ public double getRank(final T quantile, final QuantileSearchCriteria searchCrit) if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } final int len = quantiles.length; final Inequality crit = (searchCrit == INCLUSIVE) ? Inequality.LE : Inequality.LT; - final int index = find((T[])quantiles, 0, len - 1, quantile, crit, comp); + final int index = find(quantiles, 0, len - 1, quantile, crit, comparator); if (index == -1) { return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile } @@ -185,12 +258,19 @@ public boolean isEmpty() { } @Override - public KllItemsSketchSortedViewIterator iterator() { - return new KllItemsSketchSortedViewIterator<>((T[])quantiles, cumWeights); + public GenericSortedViewIterator iterator() { + return new GenericSortedViewIterator<>(quantiles, cumWeights); } //restricted methods + private static double[] convertCumWtsToNormRanks(final long[] cumWeights, final long totalN) { + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + return normRanks; + } + private void populateFromSketch(final Object[] srcQuantiles, final int[] srcLevels, final int srcNumLevels, final int numItems) { final int[] myLevels = new int[srcNumLevels + 1]; @@ -212,7 +292,7 @@ private void populateFromSketch(final Object[] srcQuantiles, final int[] srcLeve weight *= 2; } final int numLevels = dstLevel; - blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels, comp); //create unit weights + blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels, comparator); //create unit weights KllHelper.convertToCumulative(cumWeights); } @@ -255,6 +335,7 @@ private static void blockyTandemMergeSortRecursion( startingLevel2, numLevels2, comp); } + @SuppressWarnings("unchecked") private static void tandemMerge( final Object[] quantilesSrc, final long[] weightsSrc, final Object[] quantilesDst, final long[] weightsDst, @@ -290,15 +371,4 @@ private static void tandemMerge( } } - /** - * Iterator over KllItemsSketchSortedView. - * @param type of quantile (item) - */ - public static final class KllItemsSketchSortedViewIterator extends GenericSortedViewIterator { - - KllItemsSketchSortedViewIterator(final T[] quantiles, final long[] cumWeights) { - super(quantiles, cumWeights); - } - } - } diff --git a/src/main/java/org/apache/datasketches/kll/KllSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllSketchIterator.java new file mode 100644 index 000000000..feaf33f53 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllSketchIterator.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.quantilescommon.QuantilesSketchIterator; + +/** + * The base implementation for the KLL sketch iterator hierarchy used for viewing the + * non-ordered quantiles retained by a sketch. + * + *

Prototype example of the recommended iteration loop:

+ *
{@code
+ *   SketchIterator itr = sketch.iterator();
+ *   while (itr.next()) {
+ *     ...get*();
+ *   }
+ * }
+ * + * @author Lee Rhodes + */ +public class KllSketchIterator implements QuantilesSketchIterator { + protected final int[] levelsArr; + protected final int numLevels; + protected int level; + protected int index; + protected long weight; + protected boolean isInitialized_; + + KllSketchIterator(final int[] levelsArr, final int numLevels) { + this.levelsArr = levelsArr; + this.numLevels = numLevels; + this.isInitialized_ = false; + } + + @Override + public long getWeight() { + return weight; + } + + @Override + public boolean next() { + if (!isInitialized_) { + level = 0; + index = levelsArr[level]; + weight = 1; + isInitialized_ = true; + } else { + index++; + } + if (index < levelsArr[level + 1]) { + return true; + } + // go to the next non-empty level + do { + level++; + if (level == numLevels) { + return false; // run out of levels + } + weight *= 2; + } while (levelsArr[level] == levelsArr[level + 1]); + index = levelsArr[level]; + return true; + } + +} diff --git a/src/main/java/org/apache/datasketches/partitions/BoundsRule.java b/src/main/java/org/apache/datasketches/partitions/BoundsRule.java new file mode 100644 index 000000000..68dc87bc1 --- /dev/null +++ b/src/main/java/org/apache/datasketches/partitions/BoundsRule.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +public enum BoundsRule { + + /** + * Include both the upper and lower bounds + */ + INCLUDE_BOTH, + + /** + * Include only the upper bound but not the lower bound + */ + INCLUDE_UPPER, + /** + * Include only the lower bound but not the upper bound + */ + INCLUDE_LOWER +} diff --git a/src/main/java/org/apache/datasketches/partitions/Partitioner.java b/src/main/java/org/apache/datasketches/partitions/Partitioner.java new file mode 100644 index 000000000..65577385a --- /dev/null +++ b/src/main/java/org/apache/datasketches/partitions/Partitioner.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static java.lang.Math.ceil; +import static java.lang.Math.log; +import static java.lang.Math.max; +import static java.lang.Math.min; +import static java.lang.Math.pow; +import static java.lang.Math.round; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; +import org.apache.datasketches.quantilescommon.Stack; + +/** + * A partitioning process that can partition very large data sets into thousands to millions + * of partitions of approximately the same size. + * @param T the data type + * @param S the quantiles sketch that implements both QuantilesGenericAPI and PartitioningFeature. + */ +//@SuppressWarnings("unused") +public class Partitioner & PartitioningFeature> { + private static final QuantileSearchCriteria defaultCriteria = INCLUSIVE; + private final long tgtPartitionSize; + private final int maxPartsPerSk; + private final SketchFillRequest fillReq; + private final QuantileSearchCriteria criteria; + private final Stack> stack = new Stack<>(); + + //computed once at the beginning + private int numLevels; + private int partitionsPerSk; + //output + private final List> finalPartitionList = new ArrayList<>(); + + /** + * This constructor assumes a QuantileSearchCriteria of INCLUSIVE. + * @param tgtPartitionSize the target size of the resulting partitions in number of items. + * @param maxPartsPerPass The maximum number of partitions to request from the sketch. The smaller this number is + * the smaller the variance will be of the resulting partitions, but this will increase the number of passes of the + * source data set. + * @param fillReq The is an implementation of the SketchFillRequest call-back supplied by the user and implements + * the SketchFillRequest interface. + */ + public Partitioner( + final long tgtPartitionSize, + final int maxPartsPerPass, + final SketchFillRequest fillReq) { + this(tgtPartitionSize, maxPartsPerPass, fillReq, defaultCriteria); + } + + /** + * This constructor includes the QuantileSearchCriteria criteria as a parameter. + * @param tgtPartitionSize the target size of the resulting partitions in number of items. + * @param maxPartsPerSk The maximum number of partitions to request from the sketch. The smaller this number is + * the smaller the variance will be of the resulting partitions, but this will increase the number of passes of the + * source data set. + * @param fillReq The is an implementation of the SketchFillRequest call-back supplied by the user. + * @param criteria This is the desired QuantileSearchCriteria to be used. + */ + public Partitioner( + final long tgtPartitionSize, + final int maxPartsPerSk, + final SketchFillRequest fillReq, + final QuantileSearchCriteria criteria) { + this.tgtPartitionSize = tgtPartitionSize; + this.maxPartsPerSk = maxPartsPerSk; + this.fillReq = fillReq; + this.criteria = criteria; + } + + /** + * This initiates the partitioning process + * @param sk A sketch of the entire data set. + * @return the final partitioning list + */ + public List> partition(final S sk) { + if (sk.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final long inputN = sk.getN(); + final double guessNumParts = max(1.0, ceil((double)inputN / tgtPartitionSize)); + this.numLevels = (int)max(1, ceil(log(guessNumParts) / log(maxPartsPerSk))); + final int partsPerSk = (int)round(pow(guessNumParts, 1.0 / numLevels)); + this.partitionsPerSk = min(partsPerSk, maxPartsPerSk); + final GenericPartitionBoundaries gpb = sk.getPartitionBoundaries(partitionsPerSk, criteria); + final StackElement se = new StackElement<>(gpb, stack.size() + 1, 0, "1"); + stack.push(se); + partitionSearch(stack); + return finalPartitionList; + } + + private void partitionSearch(final Stack> stack) { + if (stack.isEmpty()) { + return; + } + final StackElement se = stack.peek(); + final GenericPartitionBoundaries gpb = se.gpb; + final int numParts = gpb.getNumPartitions(); + + if (stack.size() == numLevels) { //at max level + while (++se.part <= numParts) { //add rows to final partition list + final PartitionBoundsRow row = new PartitionBoundsRow<>(se); + finalPartitionList.add(row); + } + stack.pop(); + partitionSearch(stack); + } + else { //not at max level + if (++se.part <= numParts) { + final PartitionBoundsRow row = new PartitionBoundsRow<>(se); + final S sk = fillReq.getRange(row.lowerBound, row.upperBound, row.rule); + final GenericPartitionBoundaries gpb2 = sk.getPartitionBoundaries(this.partitionsPerSk, criteria); + final int level = stack.size() + 1; + final String partId = se.partId + "." + se.part + "," + level; + final StackElement se2 = new StackElement<>(gpb2, level, 0, partId); + stack.push(se2); + partitionSearch(stack); + } + //done with all parts at this level + if (stack.isEmpty()) { + return; + } + stack.pop(); + partitionSearch(stack); + } + } + + /** + * Holds data for a Stack element + */ + public static class StackElement { + public final GenericPartitionBoundaries gpb; + public int part; + public String partId; + + public StackElement(final GenericPartitionBoundaries gpb, final int level, final int part, final String partId) { + this.gpb = gpb; + this.part = part; + this.partId = partId; + } + } + + /** + * Defines a row for List of PartitionBounds. + */ + public static class PartitionBoundsRow { + public int part; + public String partId; + public long approxNumDeltaItems; + public BoundsRule rule; + public T lowerBound; + public T upperBound; + + public PartitionBoundsRow(final StackElement se) { + final GenericPartitionBoundaries gpb = se.gpb; + this.part = se.part; + this.partId = se.partId + "." + part; + final QuantileSearchCriteria searchCrit = gpb.getSearchCriteria(); + final T[] boundaries = gpb.getBoundaries(); + final int numParts = gpb.getNumPartitions(); + if (searchCrit == INCLUSIVE) { + if (part == 1) { + lowerBound = gpb.getMinItem(); + upperBound = boundaries[part]; + rule = BoundsRule.INCLUDE_BOTH; + } else { + lowerBound = boundaries[part - 1]; + upperBound = boundaries[part]; + rule = BoundsRule.INCLUDE_UPPER; + } + } else { //EXCLUSIVE + if (part == numParts) { + lowerBound = boundaries[part - 1]; + upperBound = gpb.getMaxItem(); + rule = BoundsRule.INCLUDE_BOTH; + } else { + lowerBound = boundaries[part - 1]; + upperBound = boundaries[part]; + rule = BoundsRule.INCLUDE_LOWER; + } + } + approxNumDeltaItems = gpb.getNumDeltaItems()[part]; + } + } + +} diff --git a/src/main/java/org/apache/datasketches/partitions/SketchFillRequest.java b/src/main/java/org/apache/datasketches/partitions/SketchFillRequest.java new file mode 100644 index 000000000..d005561d0 --- /dev/null +++ b/src/main/java/org/apache/datasketches/partitions/SketchFillRequest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import org.apache.datasketches.quantilescommon.PartitioningFeature; +import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; + +/** + * This is a callback request to the data source to fill a quantiles sketch, + * which is returned to the caller. + * + * @author Lee Rhodes + */ +public interface SketchFillRequest & PartitioningFeature> { + + /** + * This is a callback request to the data source to fill a quantiles sketch + * with a range of data between upper and lower bounds. Which of these bounds are to be included is determined by + * the BoundsRule. + * + *

This range of data may or may not be subsequently further partitioned.

+ * @param lowerQuantile the lowest quantile of a range + * @param upperQuantile the highest quantile of a range + * @param boundsRule determines which quantile bounds to include + * @return a quantiles sketch filled from the given upper and lower bounds. + */ + public S getRange(final T lowerQuantile, final T upperQuantile, final BoundsRule boundsRule); + +} diff --git a/src/main/java/org/apache/datasketches/partitions/package-info.java b/src/main/java/org/apache/datasketches/partitions/package-info.java new file mode 100644 index 000000000..cee11ec1d --- /dev/null +++ b/src/main/java/org/apache/datasketches/partitions/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * + */ +package org.apache.datasketches.partitions; diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java index b3a78d5af..bbcdf44f7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java @@ -28,7 +28,6 @@ import static org.apache.datasketches.quantiles.ClassicUtil.checkK; import static org.apache.datasketches.quantiles.ClassicUtil.computeNumLevelsNeeded; import static org.apache.datasketches.quantiles.ClassicUtil.computeRetainedItems; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallySpacedDoubles; import java.util.Random; @@ -170,21 +169,6 @@ public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria @Override public abstract double getMinItem(); - @Override - public DoublesPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } - final double[] ranks = equallySpacedDoubles(numEquallyWeighted); - final double[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final DoublesPartitionBoundaries dpb = new DoublesPartitionBoundaries(); - dpb.N = this.getN(); - dpb.ranks = ranks; - dpb.boundaries = boundaries; - return dpb; - } - @Override public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java index ef250fe5f..b746bae15 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedView.java @@ -27,8 +27,10 @@ import java.util.Arrays; +import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.quantilescommon.DoublesSortedView; +import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesUtil; @@ -42,6 +44,9 @@ public final class DoublesSketchSortedView implements DoublesSortedView { private final double[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final double[] normRanks; + private final double maxItem; + private final double minItem; /** * Construct from elements for testing. @@ -49,10 +54,17 @@ public final class DoublesSketchSortedView implements DoublesSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - DoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN) { + DoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN, + final double maxItem, final double minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; } /** @@ -60,7 +72,10 @@ public final class DoublesSketchSortedView implements DoublesSortedView { * @param sketch the given Classic Quantiles DoublesSketch */ public DoublesSketchSortedView(final DoublesSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); final int k = sketch.getK(); final int numQuantiles = sketch.getNumRetained(); quantiles = new double[numQuantiles]; @@ -77,6 +92,34 @@ public DoublesSketchSortedView(final DoublesSketch sketch) { if (convertToCumulative(cumWeights) != totalN) { throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); } + final double[] normRanks = new double[numQuantiles]; + for (int i = 0; i < numQuantiles; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; + } + + @Override + public long[] getCumulativeWeights() { + return cumWeights.clone(); + } + + @Override + public double getMaxItem() { + return maxItem; + } + + @Override + public double getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public double[] getNormalizedRanks() { + return normRanks.clone(); } @Override @@ -84,29 +127,11 @@ public double getQuantile(final double rank, final QuantileSearchCriteria search if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; - } - return quantiles[index]; - } - - /** - * Special version of getQuantile to support the getPartitionBoundaries(int) function. - * @param weight ultimately comes from selected integral weights computed by the sketch. - * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ - * the quantile directly corresponding to the given weight internal to the sketch. - * @return the approximate quantile given the weight. - */ - double getQuantile(final long weight, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } - final int len = cumWeights.length; - final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; - final int index = InequalitySearch.find(cumWeights, 0, len - 1, weight, crit); - if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; } return quantiles[index]; } @@ -123,11 +148,6 @@ public double getRank(final double quantile, final QuantileSearchCriteria search return (double)cumWeights[index] / totalN; } - @Override - public long[] getCumulativeWeights() { - return cumWeights.clone(); - } - @Override public double[] getQuantiles() { return quantiles.clone(); @@ -139,8 +159,8 @@ public boolean isEmpty() { } @Override - public DoublesSketchSortedViewIterator iterator() { - return new DoublesSketchSortedViewIterator(quantiles, cumWeights); + public DoublesSortedViewIterator iterator() { + return new DoublesSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedViewIterator.java deleted file mode 100644 index f834fb2aa..000000000 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchSortedViewIterator.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.quantiles; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over DoublesSketchSortedView. - */ -public final class DoublesSketchSortedViewIterator implements DoublesSortedViewIterator { - - private final double[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - DoublesSketchSortedViewIterator(final double[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public double getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java index 64f66fde2..6b247347a 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java @@ -36,10 +36,7 @@ import static org.apache.datasketches.quantiles.PreambleUtil.extractN; import static org.apache.datasketches.quantiles.PreambleUtil.extractPreLongs; import static org.apache.datasketches.quantiles.PreambleUtil.extractSerVer; -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallySpacedLongs; -import java.lang.reflect.Array; import java.util.Arrays; import java.util.Comparator; import java.util.Objects; @@ -49,7 +46,8 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; -import org.apache.datasketches.quantilescommon.GenericSortedView; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; @@ -74,25 +72,13 @@ * * @param The sketch data type */ -public final class ItemsSketch implements QuantilesGenericAPI { - +public final class ItemsSketch implements QuantilesGenericAPI, PartitioningFeature { final Class clazz; - private final Comparator comparator_; - final int k_; - long n_; - - /** - * The largest item ever seen in the stream. - */ - T maxItem_; - - /** - * The smallest item ever seen in the stream. - */ - T minItem_; + T maxItem_; //The largest item ever seen in the stream. + T minItem_; //The smallest item ever seen in the stream. /** * In the initial on-heap version, equals combinedBuffer_.length. @@ -132,7 +118,7 @@ public final class ItemsSketch implements QuantilesGenericAPI { /** * Setting the seed makes the results of the sketch deterministic if the input items are * received in exactly the same order. This is only useful when performing test comparisons, - * otherwise is not recommended. + * otherwise, it is not recommended. */ public static final Random rand = new Random(); @@ -220,7 +206,6 @@ public static ItemsSketch getInstance( final boolean empty = checkPreLongsFlagsCap(preambleLongs, flags, memCapBytes); checkFamilyID(familyID); - final ItemsSketch sk = getInstance(clazz, k, comparator); //checks k if (empty) { return sk; } @@ -265,10 +250,7 @@ static ItemsSketch copy(final ItemsSketch sketch) { return qsCopy; } - @Override - public double[] getCDF(final T[] splitPoints) { - return getCDF(splitPoints, INCLUSIVE); - } + //END of Constructors @Override public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) { @@ -295,25 +277,11 @@ public T getMinItem() { } @Override - public GenericPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, + public GenericPartitionBoundaries getPartitionBoundaries(final int numEquallySized, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } refreshSortedView(); - final long[] weights = equallySpacedLongs(1, getN(), numEquallyWeighted); - final T[] boundaries = getQuantiles(weights, searchCrit); - final GenericPartitionBoundaries gpb = new GenericPartitionBoundaries<>(); - gpb.N = this.getN(); - gpb.boundaries = boundaries; - gpb.weights = weights; - final double[] ranks = new double[weights.length]; - for (int i = 0; i < weights.length; i++) { ranks[i] = (double)weights[i] / getN(); } - gpb.ranks = ranks; - return gpb; - } - - @Override - public double[] getPMF(final T[] splitPoints) { - return getPMF(splitPoints, INCLUSIVE); + return classicQisSV.getPartitionBoundaries(numEquallySized, searchCrit); } @Override @@ -323,11 +291,6 @@ public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searc return classicQisSV.getPMF(splitPoints, searchCrit); } - @Override - public T getQuantile(final double rank) { - return getQuantile(rank, INCLUSIVE); - } - @Override public T getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } @@ -348,36 +311,10 @@ public T getQuantileUpperBound(final double rank) { } @Override - public T[] getQuantiles(final double[] ranks) { - return getQuantiles(ranks, INCLUSIVE); - } - - @Override - @SuppressWarnings("unchecked") public T[] getQuantiles(final double[] ranks, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } refreshSortedView(); - final int len = ranks.length; - final T[] quantiles = (T[]) Array.newInstance(minItem_.getClass(), len); - for (int i = 0; i < len; i++) { - quantiles[i] = classicQisSV.getQuantile(ranks[i], searchCrit); - } - return quantiles; - } - - @SuppressWarnings("unchecked") - private T[] getQuantiles(final long[] weights, final QuantileSearchCriteria crit) { - final int len = weights.length; - final T[] quantiles = (T[]) Array.newInstance(minItem_.getClass(), len); - for (int i = 0; i < len; i++) { - quantiles[i] = classicQisSV.getQuantile(weights[i], crit); - } - return quantiles; - } - - @Override - public double getRank(final T quantile) { - return getRank(quantile, INCLUSIVE); + return classicQisSV.getQuantiles(ranks, searchCrit); } @Override @@ -397,11 +334,6 @@ public double getRankUpperBound(final double rank) { return min(1.0, rank + getNormalizedRankError(k_, false)); } - @Override - public double[] getRanks(final T[] quantiles) { - return getRanks(quantiles, INCLUSIVE); - } - @Override public double[] getRanks(final T[] quantiles, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } @@ -522,11 +454,6 @@ public byte[] toByteArray(final boolean ordered, final ArrayOfItemsSerDe serD return ItemsByteArrayImpl.toByteArray(this, ordered, serDe); } - @Override - public String toString() { - return toString(true, false); - } - /** * Returns summary information about this sketch. Used for debugging. * @param sketchSummary if true includes sketch summary @@ -592,7 +519,7 @@ public void putMemory(final WritableMemory dstMem, final ArrayOfItemsSerDe se } @Override - public GenericSortedView getSortedView() { + public ItemsSketchSortedView getSortedView() { if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } return refreshSortedView(); } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index 68ec30e36..869b68021 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -22,18 +22,23 @@ import static org.apache.datasketches.quantilescommon.GenericInequalitySearch.find; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles; import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.lang.reflect.Array; import java.util.Arrays; import java.util.Comparator; +import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView; import org.apache.datasketches.quantilescommon.GenericSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; +import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesUtil; /** @@ -42,11 +47,15 @@ * @author Kevin Lang * @author Alexander Saydakov */ -public class ItemsSketchSortedView implements GenericSortedView { +public class ItemsSketchSortedView implements GenericSortedView, PartitioningFeature { private final T[] quantiles; private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; private final Comparator comparator; + private final T maxItem; + private final T minItem; + private final Class clazz; + private final double[] normRanks; /** * Construct from elements for testing. @@ -55,15 +64,22 @@ public class ItemsSketchSortedView implements GenericSortedView { * @param totalN the total number of items presented to the sketch. * @param comparator comparator for type T */ + @SuppressWarnings("unchecked") ItemsSketchSortedView( final T[] quantiles, - final long[] cumWeights, + final long[] cumWeights, //or Natural Ranks final long totalN, - final Comparator comparator) { + final Comparator comparator, + final T maxItem, + final T minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; this.comparator = comparator; + this.maxItem = maxItem; + this.minItem = minItem; + this.clazz = (Class)quantiles[0].getClass(); + this.normRanks = convertCumWtsToNormRanks(cumWeights, totalN); } /** @@ -72,12 +88,16 @@ public class ItemsSketchSortedView implements GenericSortedView { */ @SuppressWarnings("unchecked") ItemsSketchSortedView(final ItemsSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } this.totalN = sketch.getN(); final int k = sketch.getK(); final int numQuantiles = sketch.getNumRetained(); - quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); + this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); + this.minItem = sketch.minItem_; + this.maxItem = sketch.maxItem_; cumWeights = new long[numQuantiles]; comparator = sketch.getComparator(); + clazz = sketch.clazz; final Object[] combinedBuffer = sketch.getCombinedBuffer(); final int baseBufferCount = sketch.getBaseBufferCount(); @@ -94,9 +114,12 @@ public class ItemsSketchSortedView implements GenericSortedView { if (convertToCumulative(cumWeights) != totalN) { throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); } + this.normRanks = convertCumWtsToNormRanks(cumWeights, totalN); } - @Override //implemented here because it needs the comparator + //end of constructors + + @Override public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } GenericSortedView.validateItems(splitPoints, comparator); @@ -114,7 +137,62 @@ public long[] getCumulativeWeights() { return cumWeights.clone(); } - @Override //implemented here because it needs the comparator + @Override + public T getMaxItem() { + return maxItem; + } + + @Override + public T getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public double[] getNormalizedRanks() { + return normRanks.clone(); + } + + @Override + @SuppressWarnings("unchecked") + public GenericPartitionBoundaries getPartitionBoundaries(final int numEquallySized, + final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + final long totalN = this.totalN; + final int svLen = cumWeights.length; + //adjust ends of sortedView arrays + cumWeights[0] = 1L; + cumWeights[svLen - 1] = totalN; + normRanks[0] = 1.0 / totalN; + normRanks[svLen - 1] = 1.0; + quantiles[0] = this.getMinItem(); + quantiles[svLen - 1] = this.getMaxItem(); + + final double[] evSpNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySized + 1); + final int len = evSpNormRanks.length; + final T[] evSpQuantiles = (T[]) Array.newInstance(clazz, len); + final long[] evSpNatRanks = new long[len]; + for (int i = 0; i < len; i++) { + final int index = getQuantileIndex(evSpNormRanks[i], searchCrit); + evSpQuantiles[i] = getQuantileFromIndex(index); + evSpNatRanks[i] = getCumWeightFromIndex(index); + } + final GenericPartitionBoundaries gpb = new GenericPartitionBoundaries<>( + this.totalN, + evSpQuantiles.clone(), + evSpNatRanks.clone(), + evSpNormRanks.clone(), + getMaxItem(), + getMinItem(), + searchCrit); + return gpb; + } + + @Override public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } GenericSortedView.validateItems(splitPoints, comparator); @@ -130,32 +208,32 @@ public double[] getPMF(final T[] splitPoints, final QuantileSearchCriteria searc public T getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); + final int index = getQuantileIndex(rank, searchCrit); + return getQuantileFromIndex(index); + } + + private T getQuantileFromIndex(final int index) { return quantiles[index]; } + + private long getCumWeightFromIndex(final int index) { return cumWeights[index]; } + + private int getQuantileIndex(final double rank, final QuantileSearchCriteria searchCrit) { final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); - if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; - } - return quantiles[index]; + if (index == -1) { return len - 1; } + return index; } - /** - * Special version of getQuantile to support the getPartitionBoundaries(int) function. - * @param weight ultimately comes from selected integral weights computed by the sketch. - * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ - * the quantile directly corresponding to the given weight internal to the sketch. - * @return the approximate quantile given the weight. - */ - T getQuantile(final long weight, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } - final int len = cumWeights.length; - final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; - final int index = InequalitySearch.find(cumWeights, 0, len - 1, weight, crit); - if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + @SuppressWarnings("unchecked") + public T[] getQuantiles(final double[] ranks, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + final int len = ranks.length; + final T[] quants = (T[]) Array.newInstance(clazz, len); + for (int i = 0; i < len; i++) { + quants[i] = getQuantile(ranks[i], searchCrit); } - return quantiles[index]; + return quants; } @Override @@ -181,8 +259,8 @@ public boolean isEmpty() { } @Override - public ItemsSketchSortedViewIterator iterator() { - return new ItemsSketchSortedViewIterator<>(quantiles, cumWeights); + public GenericSortedViewIterator iterator() { + return new GenericSortedViewIterator<>(quantiles, cumWeights); } //restricted methods @@ -236,6 +314,13 @@ private final static void populateFromItemsSketch( Arrays.sort(quantilesArr, startOfBaseBufferBlock, numQuantiles, comparator); } + private static double[] convertCumWtsToNormRanks(final long[] cumWeights, final long totalN) { + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + return normRanks; + } + /** * Convert the individual weights into cumulative weights. * An array of {1,1,1,1} becomes {1,2,3,4} @@ -251,15 +336,4 @@ private static long convertToCumulative(final long[] array) { return subtotal; } - /** - * Iterator over ItemsSketchSortedView. - * @param type of quantile (item) - */ - public static final class ItemsSketchSortedViewIterator extends GenericSortedViewIterator { - - ItemsSketchSortedViewIterator(final T[] quantiles, final long[] cumWeights) { - super(quantiles, cumWeights); - } - } - } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java index 8c299321e..bdc3cc75c 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java @@ -20,7 +20,7 @@ package org.apache.datasketches.quantilescommon; /** - * The Sorted View for quantiles of primitive type double. + * The Sorted View for quantile sketches of primitive type double. * @see SortedView * @author Alexander Saydakov * @author Lee Rhodes @@ -71,6 +71,24 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) return buckets; } + /** + * Returns the maximum item of the stream. This may be distinct from the largest item retained by the + * sketch algorithm. + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + double getMaxItem(); + + /** + * Returns the minimum item of the stream. This may be distinct from the smallest item retained by the + * sketch algorithm. + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + double getMinItem(); + /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream * as an array of probability masses as doubles on the interval [0.0, 1.0], diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedViewIterator.java index df9c41f23..da112dc2e 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedViewIterator.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedViewIterator.java @@ -20,12 +20,15 @@ package org.apache.datasketches.quantilescommon; /** - * The quantiles SortedView iterator for type double. - * @see SortedViewIterator - * @author Alexander Saydakov - * @author Lee Rhodes + * Iterator over quantile sketches of primitive type double. */ -public interface DoublesSortedViewIterator extends SortedViewIterator { +public final class DoublesSortedViewIterator extends SortedViewIterator { + private final double[] quantiles; + + public DoublesSortedViewIterator(final double[] quantiles, final long[] cumWeights) { + super(cumWeights); + this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter + } /** * Gets the quantile at the current index. @@ -35,7 +38,8 @@ public interface DoublesSortedViewIterator extends SortedViewIterator { * * @return the quantile at the current index. */ - double getQuantile(); + public double getQuantile() { + return quantiles[index]; + } } - diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java index 7127b5928..0a0c54b5a 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java @@ -71,6 +71,24 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) return buckets; } + /** + * Returns the maximum item of the stream. This may be distinct from the largest item retained by the + * sketch algorithm. + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + float getMaxItem(); + + /** + * Returns the minimum item of the stream. This may be distinct from the smallest item retained by the + * sketch algorithm. + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + float getMinItem(); + /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream * as an array of probability masses as doubles on the interval [0.0, 1.0], diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedViewIterator.java index ff6203f45..a40bacef1 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedViewIterator.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedViewIterator.java @@ -20,12 +20,15 @@ package org.apache.datasketches.quantilescommon; /** - * The quantiles SortedView Iterator for type float. - * @see SortedViewIterator - * @author Alexander Saydakov - * @author Lee Rhodes + * Iterator over quantile sketches of primitive type float. */ -public interface FloatsSortedViewIterator extends SortedViewIterator { +public final class FloatsSortedViewIterator extends SortedViewIterator { + private final float[] quantiles; + + public FloatsSortedViewIterator(final float[] quantiles, final long[] cumWeights) { + super(cumWeights); + this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter + } /** * Gets the quantile at the current index. @@ -35,7 +38,8 @@ public interface FloatsSortedViewIterator extends SortedViewIterator { * * @return the quantile at the current index. */ - float getQuantile(); + public float getQuantile() { + return quantiles[index]; + } } - diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java new file mode 100644 index 000000000..733f7846d --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +import org.apache.datasketches.common.SketchesStateException; + +/** + * Implements PartitionBoundaries + */ +public class GenericPartitionBoundaries implements PartitionBoundaries { + private long totalN; //totalN of source sketch + private T[] boundaries; //quantiles at the boundaries + private long[] natRanks; //natural ranks at the boundaries + private double[] normRanks; //normalized ranks at the boundaries + private T maxItem; //of the source sketch + private T minItem; //of the source sketch + private QuantileSearchCriteria searchCrit; //of the source sketch query to getPartitionBoundaries. + //computed + private long[] numDeltaItems; //num of items in each part + private int numPartitions; //num of partitions + + public GenericPartitionBoundaries( + final long totalN, + final T[] boundaries, + final long[] natRanks, + final double[] normRanks, + final T maxItem, + final T minItem, + final QuantileSearchCriteria searchCrit) { + this.totalN = totalN; + this.boundaries = boundaries; + this.natRanks = natRanks; + this.normRanks = normRanks; + this.maxItem = maxItem; + this.minItem = minItem; + this.searchCrit = searchCrit; + //check and compute + final int len = boundaries.length; + if (len < 2) { throw new SketchesStateException("Source sketch is empty"); } + numDeltaItems = new long[len]; + numDeltaItems[0] = 0; // index 0 is always 0 + for (int i = 1; i < len; i++) { + final int addOne = ( (i == 1 && (this.searchCrit == INCLUSIVE)) + || ((i == (len - 1)) && this.searchCrit == EXCLUSIVE) ) ? 1 : 0; + numDeltaItems[i] = natRanks[i] - natRanks[i - 1] + addOne; + } + this.numPartitions = len - 1; + } + + @Override + public long getN() { return totalN; } + + /** + * Gets an ordered array of boundaries that sequentially define the upper and lower boundaries of partitions. + * These partitions are to be constructed by an external process. Each boundary is essentially a reference and + * should uniquely identify an item or a set of identical items from the original stream of data fed to the + * originating sketch. + * + *

Assume boundaries array has size N + 1. Let the indicies be sequentially numbered from 0 to N. + * The number of partitions is always one less than the size of the boundaries array. + * Let the the partitions be sequentially numbered from 1 to N. + * + *

If these results were computed using QuantileSearchCriteria.INCLUSIVE then these sequential boundaries + * are to be interpreted as follows: + *

    + *
  • Partition 1: include all items >= index 0 and <= index 1.
  • + *
  • Partition 2: include all items > index 1 and <= index 2.
  • + *
  • Partition N: include all items > index N-1 and <= index N.
  • + *
+ * + *

If these results were computed using QuantileSearchCriteria.EXCLUSIVE then these sequential boundaries + * are to be interpreted as follows: + *

    + *
  • Partition 1: include all items >= index 0 and < index 1.
  • + *
  • Partition 2: include all items >= index 1 and < index 2.
  • + *
  • Partition N: include all items >= index N-1 and <= index N.
  • + *
+ * + * @return an array of boundaries that sequentially define the upper and lower boundaries of partitions. + */ + public T[] getBoundaries() { return boundaries; } + + @Override + public long[] getNaturalRanks() { return natRanks; } + + @Override + public double[] getNormalizedRanks() { return normRanks; } + + @Override + public long[] getNumDeltaItems() { return numDeltaItems; } + + @Override + public int getNumPartitions() { return numPartitions; } + + /** + * Returns the maximum item of the stream. This may be distinct from the largest item retained by the + * sketch algorithm. + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + public T getMaxItem() { return maxItem; } + + /** + * Returns the minimum item of the stream. This may be distinct from the smallest item retained by the + * sketch algorithm. + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + public T getMinItem() { return minItem; } + + @Override + public QuantileSearchCriteria getSearchCriteria() { return searchCrit; } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java index 452467bb7..e3d89a6e2 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java @@ -69,6 +69,24 @@ public interface GenericSortedView extends SortedView { */ double[] getCDF(T[] splitPoints, QuantileSearchCriteria searchCrit); + /** + * Returns the maximum item of the stream. This may be distinct from the largest item retained by the + * sketch algorithm. + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + T getMaxItem(); + + /** + * Returns the minimum item of the stream. This may be distinct from the smallest item retained by the + * sketch algorithm. + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + T getMinItem(); + /** * Returns an approximation to the Probability Mass Function (PMF) of the input stream * as an array of probability masses as doubles on the interval [0.0, 1.0], @@ -126,8 +144,8 @@ public interface GenericSortedView extends SortedView { T getQuantile(double rank, QuantileSearchCriteria searchCrit); /** - * Returns the array of quantiles. - * @return the array of quantiles. + * Returns the full array of quantiles. + * @return the full array of quantiles. */ T[] getQuantiles(); @@ -169,4 +187,3 @@ static void validateItems(final T[] items, final Comparator compa } } - diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java index 69b454a92..5a5c00e26 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedViewIterator.java @@ -19,58 +19,28 @@ package org.apache.datasketches.quantilescommon; -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - /** - * The quantiles SortedView Iterator for generic types. - * @see SortedViewIterator + * Iterator over quantile sketches of generic type. * @param The generic quantile type - * @author Alexander Saydakov - * @author Lee Rhodes */ -public class GenericSortedViewIterator implements SortedViewIterator { +public class GenericSortedViewIterator extends SortedViewIterator { private final T[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; public GenericSortedViewIterator(final T[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter - this.cumWeights = cumWeights; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; + super(cumWeights); + this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter } + /** + * Gets the quantile at the current index. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * + * @return the quantile at the current index. + */ public T getQuantile() { return quantiles[index]; } - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java new file mode 100644 index 000000000..e3c59d2c7 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/PartitionBoundaries.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * This defines a set of results computed from the getParitionBoundaries() function and + * encapsulates the basic methods needed to construct actual partitions based on generic items. + */ +public interface PartitionBoundaries { + + /** + * Gets the length of the input stream offered to the underlying sketch. + * @return the length of the input stream offered to the underlying sketch. + */ + long getN(); + + /** + * Gets an ordered array of natural ranks of the associated array of partition boundaries utilizing + * a specified search criterion. Natural ranks are integral values on the interval [1, N] + * @return an array of natural ranks. + */ + long[] getNaturalRanks(); + + /** + * Gets an ordered array of normalized ranks of the associated array of partition boundaries utilizing + * a specified search criterion. Normalized ranks are double values on the interval [0.0, 1.0]. + * @return an array of normalized ranks. + */ + double[] getNormalizedRanks(); + + /** + * Gets the number of items to be included for each partition as an array. + * The count at index 0 is 0. The number of items included in the first partition, defined by the boundaries at + * index 0 and index 1, is at index 1 in this array, etc. + * @return the number of items to be included for each partition as an array. + */ + long[] getNumDeltaItems(); + + /** + * Gets the number of partitions + * @return the number of partitions + */ + int getNumPartitions(); + + /** + * Gets the search criteria specified for the source sketch + * @return The search criteria specified for the source sketch + */ + QuantileSearchCriteria getSearchCriteria(); +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java new file mode 100644 index 000000000..3ff51a3b4 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/PartitioningFeature.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +/** + * This enables the special functions for performing efficient partitioning of massive data. + */ +public interface PartitioningFeature { + + /** + * This method returns an instance of + * {@link GenericPartitionBoundaries GenericPartitionBoundaries} which provides + * sufficient information for the user to create the given number of equally sized partitions, where "equally sized" + * refers to an approximately equal number of items per partition. + * + *

This method is equivalent to + * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallySized, INCLUSIVE)}. + *

+ * + * @param numEquallySized an integer that specifies the number of equally sized partitions between + * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. + * This must be a positive integer greater than zero. + *
    + *
  • A 1 will return: minItem, maxItem.
  • + *
  • A 2 will return: minItem, median quantile, maxItem.
  • + *
  • Etc.
  • + *
+ * + * @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}. + * @throws IllegalArgumentException if sketch is empty. + * @throws IllegalArgumentException if numEquallySized is less than 1. + */ + default GenericPartitionBoundaries getPartitionBoundaries(int numEquallySized) { + return getPartitionBoundaries(numEquallySized, INCLUSIVE); + } + + /** + * This method returns an instance of + * {@link GenericPartitionBoundaries GenericPartitionBoundaries} which provides + * sufficient information for the user to create the given number of equally sized partitions, where "equally sized" + * refers to an approximately equal number of items per partition. + * + * @param numEquallySized an integer that specifies the number of equally sized partitions between + * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. + * This must be a positive integer greater than zero. + *
    + *
  • A 1 will return: minItem, maxItem.
  • + *
  • A 2 will return: minItem, median quantile, maxItem.
  • + *
  • Etc.
  • + *
+ * + * @param searchCrit + * If INCLUSIVE, all the returned quantiles are the upper boundaries of the equally sized partitions + * with the exception of the lowest returned quantile, which is the lowest boundary of the lowest ranked partition. + * If EXCLUSIVE, all the returned quantiles are the lower boundaries of the equally sized partitions + * with the exception of the highest returned quantile, which is the upper boundary of the highest ranked partition. + * + * @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}. + * @throws IllegalArgumentException if sketch is empty. + * @throws IllegalArgumentException if numEquallySized is less than 1. + */ + GenericPartitionBoundaries getPartitionBoundaries(int numEquallySized, QuantileSearchCriteria searchCrit); + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index 74e5d8061..38502ecaa 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -219,8 +219,8 @@ public interface QuantilesAPI { int getK(); /** - * Gets the length of the input stream. - * @return the length of the input stream. + * Gets the length of the input stream offered to the sketch.. + * @return the length of the input stream offered to the sketch. */ long getN(); diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index a70b08372..31a5bedf9 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -92,56 +92,6 @@ default double[] getCDF(double[] splitPoints) { */ double getMinItem(); - /** - * This method returns an instance of {@link DoublesPartitionBoundaries DoublesPartitionBoundaries} which provides - * sufficient information for the user to create the given number of equally weighted partitions. - * - *

This method is equivalent to - * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallyWeighted, INCLUSIVE)}. - *

- * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *
    - *
  • A 1 will return: minItem, maxItem.
  • - *
  • A 2 will return: minItem, median quantile, maxItem.
  • - *
  • Etc.
  • - *
- * - * @return an instance of {@link DoublesPartitionBoundaries DoublesPartitionBoundaries}. - * @throws IllegalArgumentException if sketch is empty. - * @throws IllegalArgumentException if numEquallyWeighted is less than 1. - */ - default DoublesPartitionBoundaries getPartitionBoundaries(int numEquallyWeighted) { - return getPartitionBoundaries(numEquallyWeighted, INCLUSIVE); - } - - /** - * This method returns an instance of {@link DoublesPartitionBoundaries DoublesPartitionBoundaries} which provides - * sufficient information for the user to create the given number of equally weighted partitions. - * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *
    - *
  • A 1 will return: minItem, maxItem.
  • - *
  • A 2 will return: minItem, median quantile, maxItem.
  • - *
  • Etc.
  • - *
- * - * @param searchCrit - * If INCLUSIVE, all the returned quantiles are the upper boundaries of the equally weighted partitions - * with the exception of the lowest returned quantile, which is the lowest boundary of the lowest ranked partition. - * If EXCLUSIVE, all the returned quantiles are the lower boundaries of the equally weighted partitions - * with the exception of the highest returned quantile, which is the upper boundary of the highest ranked partition. - * - * @return an instance of {@link DoublesPartitionBoundaries DoublesPartitionBoundaries}. - * @throws IllegalArgumentException if sketch is empty. - * @throws IllegalArgumentException if numEquallyWeighted is less than 1. - */ - DoublesPartitionBoundaries getPartitionBoundaries(int numEquallyWeighted, QuantileSearchCriteria searchCrit); - /** * This is equivalent to {@link #getPMF(double[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} * @param splitPoints an array of m unique, monotonically increasing items. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index c6ea484cc..2fcbdd99f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -91,56 +91,6 @@ default double[] getCDF(float[] splitPoints) { */ float getMinItem(); - /** - * This method returns an instance of {@link FloatsPartitionBoundaries FloatsPartitionBoundaries} which provides - * sufficient information for the user to create the given number of equally weighted partitions. - * - *

This method is equivalent to - * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallyWeighted, INCLUSIVE)}. - *

- * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *
    - *
  • A 1 will return: minItem, maxItem.
  • - *
  • A 2 will return: minItem, median quantile, maxItem.
  • - *
  • Etc.
  • - *
- * - * @return an instance of {@link FloatsPartitionBoundaries FloatsPartitionBoundaries}. - * @throws IllegalArgumentException if sketch is empty. - * @throws IllegalArgumentException if numEquallyWeighted is less than 1. - */ - default FloatsPartitionBoundaries getPartitionBoundaries(int numEquallyWeighted) { - return getPartitionBoundaries(numEquallyWeighted, INCLUSIVE); - } - - /** - * This method returns an instance of {@link FloatsPartitionBoundaries FloatsPartitionBoundaries} which provides - * sufficient information for the user to create the given number of equally weighted partitions. - * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *
    - *
  • A 1 will return: minItem, maxItem.
  • - *
  • A 2 will return: minItem, median quantile, maxItem.
  • - *
  • Etc.
  • - *
- * - * @param searchCrit - * If INCLUSIVE, all the returned quantiles are the upper boundaries of the equally weighted partitions - * with the exception of the lowest returned quantile, which is the lowest boundary of the lowest ranked partition. - * If EXCLUSIVE, all the returned quantiles are the lower boundaries of the equally weighted partitions - * with the exception of the highest returned quantile, which is the upper boundary of the highest ranked partition. - * - * @return an instance of {@link FloatsPartitionBoundaries FloatsPartitionBoundaries}. - * @throws IllegalArgumentException if sketch is empty. - * @throws IllegalArgumentException if numEquallyWeighted is less than 1. - */ - FloatsPartitionBoundaries getPartitionBoundaries(int numEquallyWeighted, QuantileSearchCriteria searchCrit); - /** * This is equivalent to {@link #getPMF(float[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} * @param splitPoints an array of m unique, monotonically increasing items. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index f8dd8e62d..fbd7f691f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -92,58 +92,6 @@ default double[] getCDF(T[] splitPoints) { */ T getMinItem(); - /** - * This method returns an instance of - * {@link GenericPartitionBoundaries GenericPartitionBoundaries} which provides - * sufficient information for the user to create the given number of equally weighted partitions. - * - *

This method is equivalent to - * {@link #getPartitionBoundaries(int, QuantileSearchCriteria) getPartitionBoundaries(numEquallyWeighted, INCLUSIVE)}. - *

- * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *
    - *
  • A 1 will return: minItem, maxItem.
  • - *
  • A 2 will return: minItem, median quantile, maxItem.
  • - *
  • Etc.
  • - *
- * - * @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}. - * @throws IllegalArgumentException if sketch is empty. - * @throws IllegalArgumentException if numEquallyWeighted is less than 1. - */ - default GenericPartitionBoundaries getPartitionBoundaries(int numEquallyWeighted) { - return getPartitionBoundaries(numEquallyWeighted, INCLUSIVE); - } - - /** - * This method returns an instance of - * {@link GenericPartitionBoundaries GenericPartitionBoundaries} which provides - * sufficient information for the user to create the given number of equally weighted partitions. - * - * @param numEquallyWeighted an integer that specifies the number of equally weighted partitions between - * {@link #getMinItem() getMinItem()} and {@link #getMaxItem() getMaxItem()}. - * This must be a positive integer greater than zero. - *
    - *
  • A 1 will return: minItem, maxItem.
  • - *
  • A 2 will return: minItem, median quantile, maxItem.
  • - *
  • Etc.
  • - *
- * - * @param searchCrit - * If INCLUSIVE, all the returned quantiles are the upper boundaries of the equally weighted partitions - * with the exception of the lowest returned quantile, which is the lowest boundary of the lowest ranked partition. - * If EXCLUSIVE, all the returned quantiles are the lower boundaries of the equally weighted partitions - * with the exception of the highest returned quantile, which is the upper boundary of the highest ranked partition. - * - * @return an instance of {@link GenericPartitionBoundaries GenericPartitionBoundaries}. - * @throws IllegalArgumentException if sketch is empty. - * @throws IllegalArgumentException if numEquallyWeighted is less than 1. - */ - GenericPartitionBoundaries getPartitionBoundaries(int numEquallyWeighted, QuantileSearchCriteria searchCrit); - /** * This is equivalent to {@link #getPMF(Object[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} * @param splitPoints an array of m unique, monotonically increasing items. @@ -337,47 +285,5 @@ default double[] getRanks(T[] quantiles) { */ void update(T item); - /** - * This encapsulates the essential information needed to construct actual partitions and is returned from the - * getPartitionBoundaries(int, QuantileSearchCritera) method. - * @param generic value T for the item type - */ - static class GenericPartitionBoundaries { - - /** - * The total number of items presented to the sketch. - * - *

To compute the weight or density of a specific - * partition i where i varies from 1 to m partitions: - *

{@code
-     * long N = getN();
-     * double[] ranks = getRanks();
-     * long weight = Math.round((ranks[i] - ranks[i - 1]) * N);
-     * }
- */ - public long N; - - /** - * The normalized ranks that correspond to the returned boundaries. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always 0.0, and index m is always 1.0. - */ - public double[] ranks; - - /** - * The cumulative weights that correspond to the returned boundaries. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always 1, and index m is always n. - */ - public long[] weights; - - /** - * The partition boundaries as quantiles. - * The returned array is of size (m + 1), where m is the requested number of partitions. - * Index 0 of the returned array is always {@link #getMinItem() getMinItem()}, and index m is always - * {@link #getMaxItem() getMaxItem()}. - */ - public T[] boundaries; - } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java index 848ee3105..a35aa27cd 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java @@ -21,6 +21,7 @@ import static java.lang.Math.log; import static java.lang.Math.pow; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import java.util.Objects; @@ -208,18 +209,17 @@ public static double[] evenlyLogSpaced(final double value1, final double value2, return arr; } - public static double maxPrecision; - - public static double getNaturalRank(final double normalizedRank, final long totalN) { - final double naturalRank = normalizedRank * totalN; - if (totalN <= 1_000_000L) { - final double precision = Util.ceilingPowerBaseOfDouble(10.0, totalN) ; - maxPrecision = precision; - final double trimmedNatRank = Math.round(naturalRank * precision) / precision; - return trimmedNatRank; - } else { - return naturalRank; + public static final double tailRoundingFactor = 1e7; + + public static double getNaturalRank( + final double normalizedRank, + final long totalN, + final QuantileSearchCriteria searchCrit) { + double naturalRank = (normalizedRank * totalN); + if (totalN <= tailRoundingFactor) { + naturalRank = Math.round(naturalRank * tailRoundingFactor) / tailRoundingFactor; } + return (searchCrit == INCLUSIVE) ? (long)Math.ceil(naturalRank) : (long)Math.floor(naturalRank); } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java index 434b548a9..92acfb2d4 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/SortedView.java @@ -20,19 +20,15 @@ package org.apache.datasketches.quantilescommon; /** - * This is the base interface for the Sorted View interface hierarchy. + * This is the base interface for the Sorted View interface hierarchy and defines the methods that are type independent. * - *

The Sorted View provides a view of the data retained by a quantiles-type sketch - * that would be cumbersome to get any other way. - * One can iterate over the contents of the sketch using the sketch's iterator, but the result is not sorted.

+ *

The SortedView interface hierarchy provides a sorted view of the data retained by a quantiles-type sketch that + * would be cumbersome to get any other way. + * One could use the sketch's iterator to iterate over the contents of the sketch, + * but the result would not be sorted.

* - *

Once this sorted view has been created, it provides not only a sorted view of the data retained by the sketch - * but also the basic queries, such as getRank(), getQuantile(), and getCDF() and getPMF(). - * In addition, the iterator obtained from this sorted view provides useful detailed information about each entry.

- * - *

The data from a Sorted view is an unbiased sample of the input stream that can be used for other kinds of - * analysis not directly provided by the sketch. For example, comparing two sketches using the Kolmogorov-Smirnov - * test.

+ *

The data from a Sorted view is an unbiased random sample of the input stream that can be used for other kinds of + * analysis not directly provided by the sketch.

* * @author Alexander Saydakov * @author Lee Rhodes @@ -40,11 +36,25 @@ public interface SortedView { /** - * Returns the array of cumulative weights - * @return the array of cumulative weights + * Returns the array of cumulative weights from the sketch. + * Also known as the natural ranks, which are the Natural Numbers on the interval [1, N]. + * @return the array of cumulative weights (or natural ranks). */ long[] getCumulativeWeights(); + /** + * Returns the array of normalized ranks. The normalized ranks are the natural ranks divided by N. + * The normalized ranks are fractional numbers on the interval (0,1.0]. + * @return the array of normalized ranks. + */ + double[] getNormalizedRanks(); + + /** + * Returns the total number of items presented to the sourcing sketch. + * @return the total number of items presented to the sourcing sketch. + */ + long getN(); + /** * Returns true if this sorted view is empty. * @return true if this sorted view is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java index b36a2594e..06c298d4e 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/SortedViewIterator.java @@ -19,6 +19,8 @@ package org.apache.datasketches.quantilescommon; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + /** * This is the base interface for the SortedViewIterator hierarchy used with a SortedView obtained * from a quantile-type sketch. This provides an ordered iterator over the retained quantiles of @@ -35,30 +37,47 @@ * @author Alexander Saydakov * @author Lee Rhodes */ -public interface SortedViewIterator { +public class SortedViewIterator { + protected final long[] cumWeights; + protected long totalN; + protected int index; + + SortedViewIterator(final long[] cumWeights) { + this.cumWeights = cumWeights; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter + this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; + index = -1; + } /** - * Gets the cumulative weight at the current index (or previous index) based on the chosen search criterion. - * This is also referred to as the "Natural Rank". + * Gets the natural rank at the current index (or previous index) based on the chosen search criterion. + * This is also referred to as the "cumulative weight". The natural rank is a number in the range [1, N], + * where N ({@link #getN()}) is the total number of items fed to the sketch. * *

Don't call this before calling next() for the first time * or after getting false from next().

* - * @param searchCrit if INCLUSIVE, includes the weight at the current index in the cumulative sum. - * Otherwise, it will return the cumulative weight of the previous index. - * @return cumulative weight at the current index on the chosen search criterion. + * @param searchCrit if INCLUSIVE, includes the weight of the item at the current index in the computation of + * the natural rank. + * Otherwise, it will return the natural rank of the previous index. + * @return the natural rank at the current index (or previous index) based on the chosen search criterion. */ - long getCumulativeWeight(QuantileSearchCriteria searchCrit); + public long getNaturalRank(final QuantileSearchCriteria searchCrit) { + if (searchCrit == INCLUSIVE) { return cumWeights[index]; } + return (index == 0) ? 0 : cumWeights[index - 1]; + } /** * Gets the total count of all items presented to the sketch. * @return the total count of all items presented to the sketch. */ - long getN(); + public long getN() { + return totalN; + } /** * Gets the normalized rank at the current index (or previous index) - * based on the chosen search criterion. + * based on the chosen search criterion. Where normalized rank = natural rank / N ({@link #getN()}) + * and is a fraction in the range (0,1.0]. * *

Don't call this before calling next() for the first time * or after getting false from next().

@@ -68,24 +87,32 @@ public interface SortedViewIterator { * @return the normalized rank at the current index (or previous index) * based on the chosen search criterion. */ - double getNormalizedRank(QuantileSearchCriteria searchCrit); + public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { + return (double) getNaturalRank(searchCrit) / totalN; + } /** - * Gets the natural weight at the current index. + * Gets the weight contribution of the item at the current index. * *

Don't call this before calling next() for the first time * or after getting false from next().

* - * @return the natural weight at the current index. + * @return the weight contribution of the item at the current index. */ - long getWeight(); + public long getWeight() { + if (index == 0) { return cumWeights[0]; } + return cumWeights[index] - cumWeights[index - 1]; + } /** * Advances the index and checks if it is valid. * The state of this iterator is undefined before the first call of this method. * @return true if the next index is valid. */ - boolean next(); + public boolean next() { + index++; + return index < cumWeights.length; + } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/Stack.java b/src/main/java/org/apache/datasketches/quantilescommon/Stack.java new file mode 100644 index 000000000..68d6378b5 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/Stack.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import java.util.ArrayList; + +import org.apache.datasketches.common.SketchesStateException; + +/** + * A classic LIFO stack based on ArrayList (as opposed to Vector). + * All of the methods of ArrayList are available. + */ +public class Stack extends ArrayList { + private static final long serialVersionUID = 1L; + + /** + * Creates an empty stack. + */ + public Stack() { } + + /** + * Pushes an item onto the stack + * @param item the given item + * @return the given element + */ + public E push(final E item) { + add(item); + return item; + } + + /** + * Removes the item at the top of the stack. + * @return the item at the top of the stack. + */ + public E pop() { + final E item = peek(); + remove(size() - 1); + return item; + } + + /** + * Allows examination of the top item without removing it. + * @return the top item without removing it + */ + public E peek() { + final int len = size(); + if (len == 0) { throw new SketchesStateException("Stack is empty"); } + return get(len - 1); + } + +} diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index 7c11ee2ab..e587cd633 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -19,11 +19,8 @@ package org.apache.datasketches.req; -import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallySpacedDoubles; - import org.apache.datasketches.quantilescommon.FloatsSortedView; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; -import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesFloatsAPI; import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator; @@ -62,21 +59,6 @@ abstract class BaseReqSketch implements QuantilesFloatsAPI { @Override public abstract float getMinItem(); - @Override - public FloatsPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted, - final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } - final double[] ranks = equallySpacedDoubles(numEquallyWeighted); - final float[] boundaries = getQuantiles(ranks, searchCrit); - boundaries[0] = getMinItem(); - boundaries[boundaries.length - 1] = getMaxItem(); - final FloatsPartitionBoundaries fpb = new FloatsPartitionBoundaries(); - fpb.N = this.getN(); - fpb.ranks = ranks; - fpb.boundaries = boundaries; - return fpb; - } - /** * Returns an a priori estimate of relative standard error (RSE, expressed as a number in [0,1]). * Derived from Lemma 12 in https://arxiv.org/abs/2004.01668v2, but the constant factors were diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java index f06461650..dbf14be6d 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java @@ -20,11 +20,14 @@ package org.apache.datasketches.req; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.util.List; +import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.quantilescommon.FloatsSortedView; +import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; import org.apache.datasketches.quantilescommon.InequalitySearch; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesAPI; @@ -39,6 +42,9 @@ public final class ReqSketchSortedView implements FloatsSortedView { private float[] quantiles; private long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights private final long totalN; + private final double[] normRanks; + private final float maxItem; + private final float minItem; /** * Construct from elements for testing. @@ -46,60 +52,76 @@ public final class ReqSketchSortedView implements FloatsSortedView { * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. */ - ReqSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN) { + ReqSketchSortedView(final float[] quantiles, final long[] cumWeights, final long totalN, + final float maxItem, final float minItem) { this.quantiles = quantiles; this.cumWeights = cumWeights; this.totalN = totalN; + this.maxItem = maxItem; + this.minItem = minItem; + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; } /** * Constructs this Sorted View given the sketch - * @param sk the given ReqSketch + * @param sketch the given ReqSketch */ - public ReqSketchSortedView(final ReqSketch sk) { - totalN = sk.getN(); - buildSortedViewArrays(sk); + public ReqSketchSortedView(final ReqSketch sketch) { + if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + this.totalN = sketch.getN(); + this.maxItem = sketch.getMaxItem(); + this.minItem = sketch.getMinItem(); + buildSortedViewArrays(sketch); + final int len = cumWeights.length; + final double[] normRanks = new double[len]; + for (int i = 0; i < len; i++) { normRanks[i] = (double)cumWeights[i] / totalN; } + this.normRanks = normRanks; } + //end of constructors + @Override public long[] getCumulativeWeights() { return cumWeights.clone(); } + @Override + public float getMaxItem() { + return maxItem; + } + + @Override + public float getMinItem() { + return minItem; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public double[] getNormalizedRanks() { + return normRanks; + } + @Override public float getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } QuantilesUtil.checkNormalizedRankBounds(rank); final int len = cumWeights.length; - final double naturalRank = getNaturalRank(rank, totalN); + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); if (index == -1) { - return quantiles[quantiles.length - 1]; ///EXCLUSIVE (GT) case: normRank == 1.0; + return quantiles[len - 1]; ///EXCLUSIVE (GT) case: normRank == 1.0; } return quantiles[index]; } - /** - * Special version of getQuantile to support the getPartitionBoundaries(int) function. - * @param weight ultimately comes from selected integral weights computed by the sketch. - * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ - * the quantile directly corresponding to the given weight internal to the sketch. - * @return the approximate quantile given the weight. - */ - float getQuantile(final long weight, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } - final int len = cumWeights.length; - final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; - final int index = InequalitySearch.find(cumWeights, 0, len - 1, weight, crit); - if (index == -1) { - return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; - } - return quantiles[index]; - } - - - @Override public float[] getQuantiles() { return quantiles.clone(); @@ -123,8 +145,8 @@ public boolean isEmpty() { } @Override - public ReqSketchSortedViewIterator iterator() { - return new ReqSketchSortedViewIterator(quantiles, cumWeights); + public FloatsSortedViewIterator iterator() { + return new FloatsSortedViewIterator(quantiles, cumWeights); } //restricted methods diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java deleted file mode 100644 index 6dbc63222..000000000 --- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.req; - -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; - -import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; -import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; - -/** - * Iterator over ReqSketchSortedView. - * @author Alexander Saydakov - * @author Lee Rhodes - */ -public final class ReqSketchSortedViewIterator implements FloatsSortedViewIterator { - - private final float[] quantiles; - private final long[] cumWeights; - private final long totalN; - private int index; - - ReqSketchSortedViewIterator(final float[] quantiles, final long[] cumWeights) { - this.quantiles = quantiles; - this.cumWeights = cumWeights; - this.totalN = (cumWeights.length > 0) ? cumWeights[cumWeights.length - 1] : 0; - index = -1; - } - - @Override - public long getCumulativeWeight(final QuantileSearchCriteria searchCrit) { - if (searchCrit == INCLUSIVE) { return cumWeights[index]; } - return (index == 0) ? 0 : cumWeights[index - 1]; - } - - @Override - public long getN() { - return totalN; - } - - @Override - public double getNormalizedRank(final QuantileSearchCriteria searchCrit) { - return (double) getCumulativeWeight(searchCrit) / totalN; - } - - @Override - public float getQuantile() { - return quantiles[index]; - } - - @Override - public long getWeight() { - if (index == 0) { return cumWeights[0]; } - return cumWeights[index] - cumWeights[index - 1]; - } - - @Override - public boolean next() { - index++; - return index < quantiles.length; - } - -} diff --git a/src/test/java/org/apache/datasketches/common/UtilTest.java b/src/test/java/org/apache/datasketches/common/UtilTest.java index a68671685..50112a315 100644 --- a/src/test/java/org/apache/datasketches/common/UtilTest.java +++ b/src/test/java/org/apache/datasketches/common/UtilTest.java @@ -263,9 +263,14 @@ public void checkZeroPad() { @Test public void checkCharacterPad() { - final String s = "Pad 30, postpend z:"; - final String out = characterPad(s, 30, 'z', true); + String s = "Pad 30, postpend z:"; + String out = characterPad(s, 30, 'z', true); println(out); + assertEquals(out, "Pad 30, postpend z:zzzzzzzzzzz"); + s = "Pad 30, prepend z:"; + out = characterPad(s, 30, 'z', false); + println(out); + assertEquals(out,"zzzzzzzzzzzzPad 30, prepend z:"); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java index bc7651b14..ccfb52533 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactItemsSketchIteratorTest.java @@ -96,8 +96,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), "1"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -105,8 +105,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), "2"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index e4e349205..a8ca4145e 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -21,7 +21,6 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -423,21 +422,6 @@ public void nanSplitPoint() { sketch.getCDF(new double[] {Double.NaN}); } - @Test - public void getQuantiles() { - final KllDoublesSketch sketch = getUpdatableDirectDoublesSketch(200, 0); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - double[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - double[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkSimpleMergeDirect() { //used for troubleshooting int k = 20; diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java index 6f9ea0ba5..3013e6295 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectFloatsSketchTest.java @@ -21,7 +21,6 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; -import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; @@ -423,21 +422,6 @@ public void nanSplitPoint() { sketch.getCDF(new float[] {Float.NaN}); } - @Test - public void getQuantiles() { - final KllFloatsSketch sketch = getUpdatableDirectFloatSketch(200, 0); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - float[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - float[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkSimpleMergeDirect() { //used for troubleshooting int k = 20; diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java index d428cd259..7a12d8466 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchIteratorTest.java @@ -77,8 +77,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 1.0); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -86,8 +86,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 2.0); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index ba63e8bef..8aeabb8bf 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -391,21 +391,6 @@ public void nanSplitPoint() { sketch.getCDF(new double[] {Double.NaN}); } - @Test - public void getQuantiles() { - final KllDoublesSketch sketch = KllDoublesSketch.newHeapInstance(); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - double[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - double[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkReset() { KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(20); @@ -456,18 +441,18 @@ public void sortedView() { assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 1); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 2); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 3); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 2); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 3); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); assertEquals(itr.next(), false); } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java index e511de562..88003b836 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchIteratorTest.java @@ -77,8 +77,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 1.0f); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -86,8 +86,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), 2.0f); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java index 161ee4318..846965cb8 100644 --- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java @@ -391,21 +391,6 @@ public void nanSplitPoint() { sketch.getCDF(new float[] {Float.NaN}); } - @Test - public void getQuantiles() { - final KllFloatsSketch sketch = KllFloatsSketch.newHeapInstance(); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - float[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - float[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void checkReset() { KllFloatsSketch sk = KllFloatsSketch.newHeapInstance(20); @@ -456,18 +441,18 @@ public void sortedView() { assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 1); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 2); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), 3); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 2); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 3); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); assertEquals(itr.next(), false); } diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java b/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java index 5eb513aa8..b0024420c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java +++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchSortedViewString.java @@ -30,8 +30,9 @@ public KllItemsSketchSortedViewString( final String[] quantiles, final long[] cumWeights, final long totalN, - final String minItem, - final Comparator comparator) { - super(quantiles, cumWeights, totalN, minItem, comparator); + final Comparator comparator, + final String maxItem, + final String minItem) { + super(quantiles, cumWeights, totalN, comparator, maxItem, minItem); } } diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java index a980841b6..deb3cb9c8 100644 --- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java @@ -461,10 +461,10 @@ public void getQuantiles() { sketch.update("C"); sketch.update("D"); String[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - String[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; + String[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).getBoundaries(); assertEquals(quantiles1, quantiles2); quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; + quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).getBoundaries(); assertEquals(quantiles1, quantiles2); } @@ -528,18 +528,18 @@ public void sortedView() { assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), "A"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), "AB"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.next(), true); assertEquals(itr.getQuantile(), "ABC"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 2); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 3); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); assertEquals(itr.next(), false); } diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchiteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllItemsSketchiteratorTest.java index 0607ff5d7..f97eb2320 100644 --- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchiteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchiteratorTest.java @@ -82,8 +82,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), "1"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -91,8 +91,8 @@ public void twoItemSketchForSortedViewIterator() { assertEquals(itr.getQuantile(), "2"); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDirectDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDirectDoublesTest.java index 45feb7637..28095dda0 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDirectDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDirectDoublesTest.java @@ -58,19 +58,6 @@ public void checkBounds() { assertTrue(rest - restLB < (2 * eps)); } - @Test - public void checkMisc() { - final int k = 8; - final KllDoublesSketch sk = getDirectDoublesSketch(k, 0); - try { sk.getPartitionBoundaries(10); fail(); } catch (SketchesArgumentException e) {} - for (int i = 0; i < 20; i++) { sk.update(i); } - final double[] items = sk.getDoubleItemsArray(); - assertEquals(items.length, 16); - final int[] levels = sk.getLevelsArray(sk.sketchStructure); - assertEquals(levels.length, 3); - assertEquals(sk.getNumLevels(), 2); - } - //@Test //enable static println(..) for visual checking public void visualCheckToString() { final int k = 20; diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDirectFloatsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDirectFloatsTest.java index 6f042ce06..5f88baed4 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDirectFloatsTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDirectFloatsTest.java @@ -58,19 +58,6 @@ public void checkBounds() { assertTrue(rest - restLB < (2 * eps)); } - @Test - public void checkMisc() { - final int k = 8; - final KllFloatsSketch sk = getDirectFloatsSketch(k, 0); - try { sk.getPartitionBoundaries(10); fail(); } catch (SketchesArgumentException e) {} - for (int i = 0; i < 20; i++) { sk.update(i); } - final float[] items = sk.getFloatItemsArray(); - assertEquals(items.length, 16); - final int[] levels = sk.getLevelsArray(sk.sketchStructure); - assertEquals(levels.length, 3); - assertEquals(sk.getNumLevels(), 2); - } - //@Test //enable static println(..) for visual checking public void visualCheckToString() { final int k = 20; diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java index 35d73fce3..0524db725 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java @@ -30,7 +30,7 @@ import org.apache.datasketches.common.ArrayOfStringsSerDe; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.Util; -import org.apache.datasketches.kll.KllItemsSketchSortedView.KllItemsSketchSortedViewIterator; +import org.apache.datasketches.quantilescommon.GenericSortedViewIterator; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; @@ -201,7 +201,7 @@ public void viewCompactionAndSortedView() { for (int i = 1; i <= n; i++) { sk.update(Util.intToFixedLengthString(i, digits)); } println(sk.toString(true, true)); KllItemsSketchSortedView sv = sk.getSortedView(); - KllItemsSketchSortedViewIterator itr = sv.iterator(); + GenericSortedViewIterator itr = sv.iterator(); println("### SORTED VIEW"); printf("%12s%12s\n", "Value", "CumWeight"); while (itr.next()) { diff --git a/src/test/java/org/apache/datasketches/partitions/ClassicPartitionsTest.java b/src/test/java/org/apache/datasketches/partitions/ClassicPartitionsTest.java new file mode 100644 index 000000000..f26031465 --- /dev/null +++ b/src/test/java/org/apache/datasketches/partitions/ClassicPartitionsTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.common.Util.milliSecToString; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +import java.util.List; + +import org.apache.datasketches.partitions.Partitioner; +import org.apache.datasketches.partitions.Partitioner.PartitionBoundsRow; +import org.apache.datasketches.quantiles.ItemsSketch; +import org.testng.annotations.Test; + +@SuppressWarnings("unused") +public class ClassicPartitionsTest { + private final int k = 1 << 15; + private final long totalN = 100_000_000L; + private final long tgtPartitionSize = (long)3e6; + private final int maxPartsPerSk = 100; + + //@Test + public void checkClassicPartitioner() { + println("Classic ItemsSketch Partitions Test"); + printf("Sketch K :%,20d\n", k); + printf("Total N :%,20d\n", totalN); + printf("Tgt Partition Size :%,20d\n", tgtPartitionSize); + printf("Max Parts Per Sketch :%20d\n", maxPartsPerSk); + + final long startTime_mS = System.currentTimeMillis(); + final ItemsSketchFillRequestLongAsString fillReq = new ItemsSketchFillRequestLongAsString(k, totalN); + final ItemsSketch sk = fillReq.getRange(1L, totalN, INCLUDE_BOTH); + final long endFillInitialSketchTime_mS = System.currentTimeMillis(); + final Partitioner> partitioner = new Partitioner<>( + tgtPartitionSize, + maxPartsPerSk, + fillReq, + INCLUSIVE); + final List> list = partitioner.partition(sk); + outputList(list); + + final long endTime_mS = System.currentTimeMillis(); + final long fillInitialSketchTime_mS = endFillInitialSketchTime_mS - startTime_mS; + final long partitioningTime_mS = endTime_mS - endFillInitialSketchTime_mS; + final long totalTime_mS = endTime_mS - startTime_mS; + println(""); + println("FillInitialSketchTime: " + milliSecToString(fillInitialSketchTime_mS)); + println("PartioningTime : " + milliSecToString(partitioningTime_mS)); + println("Total Time : " + milliSecToString(totalTime_mS)); + } + + private static final String[] hdr = + { "Level.Part", "Partition", "LowerBound", "UpperBound", "ApproxNumItems", "Include Rule" }; + private static final String hdrFmt = "%15s %10s %15s %15s %15s %15s\n"; + private static final String dFmt = "%15s %10d %15s %15s %15d %15s\n"; + + void outputList(final List> list) { + printf(hdrFmt, (Object[]) hdr); + final int numParts = list.size(); + final double meanPartSize = (double)totalN / numParts; + double size = 0; + double sumSizes = 0; + double sumAbsRelErr = 0; + double sumSqErr = 0; + for (int i = 0; i < numParts; i++) { + final PartitionBoundsRow row = list.get(i); + printf(dFmt, row.partId , (i + 1), row.lowerBound, row.upperBound, row.approxNumDeltaItems, row.rule.name()); + size = row.approxNumDeltaItems; + sumSizes += size; + sumAbsRelErr += Math.abs(size / meanPartSize - 1.0); + final double absErr = size - meanPartSize; + sumSqErr += absErr * absErr; + } + final double meanAbsRelErr = sumAbsRelErr / numParts; + final double meanSqErr = sumSqErr / numParts; //intermediate value + final double normMeanSqErr = meanSqErr / (meanPartSize * meanPartSize); //intermediate value + final double rmsRelErr = Math.sqrt(normMeanSqErr); //a.k.a. Normalized RMS Error or NRMSE + + printf("Total ApproxNumItems :%,20d\n",(long)sumSizes); + printf("Mean Partition Size :%,20.1f\n",meanPartSize); + printf("Mean Abs Rel Error :%20.3f%%\n",meanAbsRelErr * 100); + printf("Norm RMS Error :%20.3f%%\n",rmsRelErr * 100); + } + + private final static boolean enablePrinting = true; + + /** + * @param o the Object to print + */ + private static final void print(final Object o) { + if (enablePrinting) { System.out.print(o.toString()); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + +} diff --git a/src/test/java/org/apache/datasketches/partitions/ItemsSketchFillRequestLongAsString.java b/src/test/java/org/apache/datasketches/partitions/ItemsSketchFillRequestLongAsString.java new file mode 100644 index 000000000..2b966051f --- /dev/null +++ b/src/test/java/org/apache/datasketches/partitions/ItemsSketchFillRequestLongAsString.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_UPPER; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.digits; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString; + +import java.util.Comparator; +import java.util.Random; + +import org.apache.datasketches.quantiles.ItemsSketch; + +/** + * This is an simulated data set with a given N used for testing. + * @author Lee Rhodes + */ +public class ItemsSketchFillRequestLongAsString implements SketchFillRequest> { + private int k; + private int numDigits; + private Random rand = new Random(); + + public ItemsSketchFillRequestLongAsString() { + k = 1 << 10; + numDigits = 3; + } + + public ItemsSketchFillRequestLongAsString(final int k, final long totalN) { + this.k = k; + this.numDigits = digits(totalN); + } + + @Override + public ItemsSketch getRange(final String lowerQuantile, final String upperQuantile, + final BoundsRule bounds) { + final ItemsSketch sk = ItemsSketch.getInstance(String.class, k, Comparator.naturalOrder()); + final long lower = Long.parseLong(lowerQuantile.trim()); + final long upper = Long.parseLong(upperQuantile.trim()); + if (bounds == INCLUDE_BOTH) { + for (long i = lower; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else if (bounds == INCLUDE_UPPER) { + for (long i = lower + 1; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else { //INCLUDE_LOWER + for (long i = lower; i < upper; i++) { sk.update(getString(i, numDigits)); } + } + return sk; + } + + public ItemsSketch getRange(final long lowerQuantile, final long upperQuantile, final BoundsRule bounds) { + final ItemsSketch sk = ItemsSketch.getInstance(String.class, k, Comparator.naturalOrder()); + final long lower = lowerQuantile; + final long upper = upperQuantile; + if (bounds == INCLUDE_BOTH) { + for (long i = lower; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else if (bounds == INCLUDE_UPPER) { + for (long i = lower + 1; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else { //INCLUDE_LOWER + for (long i = lower; i < upper; i++) { sk.update(getString(i, numDigits)); } + } + return sk; + } + + public ItemsSketch getRangeRandom(final long lowerQuantile, final long upperQuantile, + final BoundsRule bounds) { + final ItemsSketch sk = ItemsSketch.getInstance(String.class, k, Comparator.naturalOrder()); + final long lower = lowerQuantile; + final long upper = upperQuantile; + this.rand = new Random(); + if (bounds == INCLUDE_BOTH) { + for (long i = lower; i <= upper; i++) { + sk.update(getString(randBetween(lowerQuantile, upperQuantile, bounds), numDigits)); + } + } else if (bounds == INCLUDE_UPPER) { + for (long i = lower + 1; i <= upper; i++) { + sk.update(getString(randBetween(lowerQuantile, upperQuantile, bounds), numDigits)); + } + } else { //INCLUDE_LOWER + for (long i = lower; i < upper; i++) { + sk.update(getString(randBetween(lowerQuantile, upperQuantile, bounds), numDigits)); + } + } + return sk; + } + + private final long randBetween(final long lb, final long ub, final BoundsRule bounds) { + final double r = rand.nextDouble(); + final long range; + final long offset; + if (bounds == INCLUDE_BOTH) { + range = ub - lb; + offset = lb; + } + else if (bounds == INCLUDE_UPPER) { + range = ub - lb - 1; + offset = lb + 1; + } else { //INCLUDE_LOWER + range = ub - lb - 1; + offset = lb; + } + return Math.round(r * range + offset); + } + +} diff --git a/src/test/java/org/apache/datasketches/partitions/KllItemsSketchFillRequestLongAsString.java b/src/test/java/org/apache/datasketches/partitions/KllItemsSketchFillRequestLongAsString.java new file mode 100644 index 000000000..53d80190f --- /dev/null +++ b/src/test/java/org/apache/datasketches/partitions/KllItemsSketchFillRequestLongAsString.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_UPPER; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.digits; +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString; + +import java.util.Comparator; +import java.util.Random; + +import org.apache.datasketches.common.ArrayOfStringsSerDe; +import org.apache.datasketches.kll.KllItemsSketch; + +/** + * This is an simulated data set with a given N used for testing. + * @author Lee Rhodes + */ +public class KllItemsSketchFillRequestLongAsString implements SketchFillRequest> { + private int k; + private int numDigits; + private Random rand = new Random(); + + public KllItemsSketchFillRequestLongAsString() { + k = 1 << 10; + numDigits = 3; + } + + public KllItemsSketchFillRequestLongAsString(final int k, final long totalN) { + this.k = k; + this.numDigits = digits(totalN); + } + + @Override + public KllItemsSketch getRange(final String lowerQuantile, final String upperQuantile, + final BoundsRule bounds) { + KllItemsSketch sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + long lower = Long.parseLong(lowerQuantile.trim()); + long upper = Long.parseLong(upperQuantile.trim()); + if (bounds == INCLUDE_BOTH) { + for (long i = lower; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else if (bounds == INCLUDE_UPPER) { + for (long i = lower + 1; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else { //INCLUDE_LOWER + for (long i = lower; i < upper; i++) { sk.update(getString(i, numDigits)); } + } + return sk; + } + + public KllItemsSketch getRange(final long lowerQuantile, final long upperQuantile, final BoundsRule bounds) { + KllItemsSketch sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + long lower = lowerQuantile; + long upper = upperQuantile; + if (bounds == INCLUDE_BOTH) { + for (long i = lower; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else if (bounds == INCLUDE_UPPER) { + for (long i = lower + 1; i <= upper; i++) { sk.update(getString(i, numDigits)); } + } else { //INCLUDE_LOWER + for (long i = lower; i < upper; i++) { sk.update(getString(i, numDigits)); } + } + return sk; + } + + public KllItemsSketch getRangeRandom(final long lowerQuantile, final long upperQuantile, final BoundsRule bounds) { + KllItemsSketch sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), new ArrayOfStringsSerDe()); + long lower = lowerQuantile; + long upper = upperQuantile; + this.rand = new Random(); + if (bounds == INCLUDE_BOTH) { + for (long i = lower; i <= upper; i++) { + sk.update(getString(randBetween(lowerQuantile, upperQuantile, bounds), numDigits)); + } + } else if (bounds == INCLUDE_UPPER) { + for (long i = lower + 1; i <= upper; i++) { + sk.update(getString(randBetween(lowerQuantile, upperQuantile, bounds), numDigits)); + } + } else { //INCLUDE_LOWER + for (long i = lower; i < upper; i++) { + sk.update(getString(randBetween(lowerQuantile, upperQuantile, bounds), numDigits)); + } + } + return sk; + } + + private final long randBetween(final long lb, final long ub, final BoundsRule bounds) { + double r = rand.nextDouble(); + long range; + long offset; + if (bounds == INCLUDE_BOTH) { + range = ub - lb; + offset = lb; + } + else if (bounds == INCLUDE_UPPER) { + range = ub - lb - 1; + offset = lb + 1; + } else { //INCLUDE_LOWER + range = ub - lb - 1; + offset = lb; + } + return Math.round(r * range + offset); + } + +} diff --git a/src/test/java/org/apache/datasketches/partitions/KllPartitionsTest.java b/src/test/java/org/apache/datasketches/partitions/KllPartitionsTest.java new file mode 100644 index 000000000..3b44d9988 --- /dev/null +++ b/src/test/java/org/apache/datasketches/partitions/KllPartitionsTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.partitions; + +import static org.apache.datasketches.common.Util.milliSecToString; +import static org.apache.datasketches.partitions.BoundsRule.INCLUDE_BOTH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +import java.util.List; + +import org.apache.datasketches.partitions.Partitioner; +import org.apache.datasketches.partitions.Partitioner.PartitionBoundsRow; +import org.apache.datasketches.kll.KllItemsSketch; +import org.testng.annotations.Test; + +@SuppressWarnings("unused") +public class KllPartitionsTest { + private final int k = 1 << 15; + private final long totalN = 100_000_000L; + private final long tgtPartitionSize = (long)3e6; + private final int maxPartsPerSk = 100; + + //@Test + public void checkKllPartitioner() { + println("KllItemsSketch Partitions Test"); + printf("Sketch K :%,20d\n", k); + printf("Total N :%,20d\n", totalN); + printf("Tgt Partition Size :%,20d\n", tgtPartitionSize); + printf("Max Parts Per Sketch :%20d\n", maxPartsPerSk); + + final long startTime_mS = System.currentTimeMillis(); + final KllItemsSketchFillRequestLongAsString fillReq = new KllItemsSketchFillRequestLongAsString(k, totalN); + final KllItemsSketch sk = fillReq.getRange(1L, totalN, INCLUDE_BOTH); + final long endFillInitialSketchTime_mS = System.currentTimeMillis(); + final Partitioner> partitioner = new Partitioner<>( + tgtPartitionSize, + maxPartsPerSk, + fillReq, + INCLUSIVE); + final List> list = partitioner.partition(sk); + outputList(list); + + final long endTime_mS = System.currentTimeMillis(); + final long fillInitialSketchTime_mS = endFillInitialSketchTime_mS - startTime_mS; + final long partitioningTime_mS = endTime_mS - endFillInitialSketchTime_mS; + final long totalTime_mS = endTime_mS - startTime_mS; + println(""); + println("FillInitialSketchTime: " + milliSecToString(fillInitialSketchTime_mS)); + println("PartioningTime : " + milliSecToString(partitioningTime_mS)); + println("Total Time : " + milliSecToString(totalTime_mS)); + } + + private static final String[] hdr = + { "Level.Part", "Partition", "LowerBound", "UpperBound", "ApproxNumItems", "Include Rule" }; + private static final String hdrFmt = "%15s %10s %15s %15s %15s %15s\n"; + private static final String dFmt = "%15s %10d %15s %15s %15d %15s\n"; + + void outputList(final List> list) { + printf(hdrFmt, (Object[]) hdr); + final int numParts = list.size(); + final double meanPartSize = (double)totalN / numParts; + double size = 0; + double sumSizes = 0; + double sumAbsRelErr = 0; + double sumSqErr = 0; + for (int i = 0; i < numParts; i++) { + final PartitionBoundsRow row = list.get(i); + printf(dFmt, row.partId , (i + 1), row.lowerBound, row.upperBound, row.approxNumDeltaItems, row.rule.name()); + size = row.approxNumDeltaItems; + sumSizes += size; + sumAbsRelErr += Math.abs(size / meanPartSize - 1.0); + final double absErr = size - meanPartSize; + sumSqErr += absErr * absErr; + } + final double meanAbsRelErr = sumAbsRelErr / numParts; + final double meanSqErr = sumSqErr / numParts; //intermediate value + final double normMeanSqErr = meanSqErr / (meanPartSize * meanPartSize); //intermediate value + final double rmsRelErr = Math.sqrt(normMeanSqErr); //a.k.a. Normalized RMS Error or NRMSE + + printf("Total ApproxNumItems :%,20d\n",(long)sumSizes); + printf("Mean Partition Size :%,20.1f\n",meanPartSize); + printf("Mean Abs Rel Error :%20.3f%%\n",meanAbsRelErr * 100); + printf("Norm RMS Error :%20.3f%%\n",rmsRelErr * 100); + } + + private final static boolean enablePrinting = true; + + /** + * @param o the Object to print + */ + private static final void print(final Object o) { + if (enablePrinting) { System.out.print(o.toString()); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + +} diff --git a/src/test/java/org/apache/datasketches/quantiles/CustomQuantilesTest.java b/src/test/java/org/apache/datasketches/quantiles/CustomQuantilesTest.java index 216b91f72..d3193883b 100644 --- a/src/test/java/org/apache/datasketches/quantiles/CustomQuantilesTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/CustomQuantilesTest.java @@ -91,7 +91,7 @@ public void checkQuantilesV400() { double qTrue = getTrueDoubleQuantile(cumWtsArr, quantilesArr, normRankIn, EXCLUSIVE); assertEquals(qEst, qTrue); double rawNatRank = normRankIn * N; - double trimNatRank = getNaturalRank(normRankIn, N); + double trimNatRank = getNaturalRank(normRankIn, N, EXCLUSIVE); printf("%22.18f %22.18f %22.18f %13.1f", normRankIn, rawNatRank, trimNatRank, qEst); if (qEst != qTrue) { println(" " + qEst + " != " +qTrue); } else { println(""); } } @@ -120,7 +120,7 @@ public void checkQuantilesV400() { double qTrue = getTrueDoubleQuantile(cumWtsArr, quantilesArr, normRankIn, INCLUSIVE); assertEquals(qEst, qTrue); double rawNatRank = normRankIn * N; - double trimNatRank = getNaturalRank(normRankIn, N); + double trimNatRank = getNaturalRank(normRankIn, N, INCLUSIVE); printf("%22.18f %22.18f %22.18f %13.1f", normRankIn, rawNatRank, trimNatRank, qEst); if (qEst != qTrue) { println(" " + qEst + " != " +qTrue); } else { println(""); } } diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index fdd7918d1..d4f549ebe 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -134,7 +134,6 @@ public void checkEmptyExceptions() { try { uds.getMaxItem(); fail(); } catch (IllegalArgumentException e) {} try { uds.getMinItem(); fail(); } catch (IllegalArgumentException e) {} try { uds.getRank(1.0); fail(); } catch (IllegalArgumentException e) {} - try { uds.getPartitionBoundaries(5); fail(); } catch (IllegalArgumentException e) {} try { uds.getPMF(new double[] { 0, 0.5, 1.0 }); fail(); } catch (IllegalArgumentException e) {} try { uds.getCDF(new double[] { 0, 0.5, 1.0 }); fail(); } catch (IllegalArgumentException e) {} } @@ -199,15 +198,15 @@ public void sortedView() { Assert.assertEquals(it.next(), true); Assert.assertEquals(it.getQuantile(), 1); Assert.assertEquals(it.getWeight(), 1); - Assert.assertEquals(it.getCumulativeWeight(INCLUSIVE), 1); + Assert.assertEquals(it.getNaturalRank(INCLUSIVE), 1); Assert.assertEquals(it.next(), true); Assert.assertEquals(it.getQuantile(), 2); Assert.assertEquals(it.getWeight(), 1); - Assert.assertEquals(it.getCumulativeWeight(INCLUSIVE), 2); + Assert.assertEquals(it.getNaturalRank(INCLUSIVE), 2); Assert.assertEquals(it.next(), true); Assert.assertEquals(it.getQuantile(), 3); Assert.assertEquals(it.getWeight(), 1); - Assert.assertEquals(it.getCumulativeWeight(INCLUSIVE), 3); + Assert.assertEquals(it.getNaturalRank(INCLUSIVE), 3); Assert.assertEquals(it.next(), false); } } diff --git a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java index b5fd7b2d3..eba9f6b55 100644 --- a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java @@ -782,31 +782,6 @@ public void testIt() { assertTrue(qsk2.isEmpty()); } - @Test - public void checkEvenlySpacedQuantiles() { - DoublesSketch qsk = buildAndLoadQS(32, 1001); - double[] values = qsk.getPartitionBoundaries(10).boundaries; - for (int i = 0; i comparator) { - super(quantiles, cumWeights, totalN, comparator); + final Comparator comparator, + final String maxItem, + final String minItem) { + super(quantiles, cumWeights, totalN, comparator, maxItem, minItem); } } diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java index f123b01bd..0d8527bbf 100644 --- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java @@ -599,15 +599,15 @@ public void sortedView() { assertEquals(it.next(), true); assertEquals(it.getQuantile(), 1); assertEquals(it.getWeight(), 1); - assertEquals(it.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(it.getNaturalRank(INCLUSIVE), 1); assertEquals(it.next(), true); assertEquals(it.getQuantile(), 2); assertEquals(it.getWeight(), 1); - assertEquals(it.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(it.getNaturalRank(INCLUSIVE), 2); assertEquals(it.next(), true); assertEquals(it.getQuantile(), 3); assertEquals(it.getWeight(), 1); - assertEquals(it.getCumulativeWeight(INCLUSIVE), 3); + assertEquals(it.getNaturalRank(INCLUSIVE), 3); assertEquals(it.next(), false); } } @@ -617,7 +617,7 @@ public void sortedView2() { Double[] qArr = {8.0, 10.0, 10.0, 20.0}; long[] cwArr = {1, 3, 4, 5}; Comparator comp = Comparator.naturalOrder(); - ItemsSketchSortedView sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp); + ItemsSketchSortedView sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp, 20.0, 8.0); double[] ranks = {0, .1, .2, .3, .6, .7, .8, .9, 1.0}; Double[] qOut = new Double[9]; for (int i = 0; i < ranks.length; i++) { @@ -640,10 +640,10 @@ public void getQuantiles() { sketch.update(3); sketch.update(4); Integer[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - Integer[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; + Integer[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).getBoundaries(); assertEquals(quantiles1, quantiles2); quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; + quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).getBoundaries(); assertEquals(quantiles1, quantiles2); } diff --git a/src/test/java/org/apache/datasketches/quantiles/SkewedDataTest.java b/src/test/java/org/apache/datasketches/quantiles/SkewedDataTest.java new file mode 100644 index 000000000..d27911cab --- /dev/null +++ b/src/test/java/org/apache/datasketches/quantiles/SkewedDataTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantiles; + +import java.util.Comparator; + +import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.*; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +import org.apache.datasketches.quantilescommon.GenericSortedViewIterator; +import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.testng.annotations.Test; + +/** + * blah + */ +@SuppressWarnings("unused") +public class SkewedDataTest { + static String[] hdr = {"N", "MaxItem", "MinItem", "NumParts", "SearchCriteria"}; + static String hdrfmt = "%6s %10s %10s %10s %15s\n"; + static String hdrdfmt = "%6d %10s %10s %10d %15s\n"; + + static String[] rowhdr = {"Row", "NormRanks", "NatRanks", "Boundaries", "DeltaItems"}; + static String rowhdrfmt = "%5s %12s %12s %12s %12s\n"; + static String rowdfmt = "%5d %12.8f %12d %12s %12d\n"; + + static String[] rowhdr2 = {"Row", "NormRanks", "NatRanks", "Boundaries"}; + static String rowhdrfmt2= "%5s %12s %12s %12s\n"; + static String rowdfmt2 = "%5d %12.8f %12d %12s\n"; + + //@Test //visual only + public void checkWithSkew() { + int n = 2050; + int k = 1 << 15; + int n2 = 200; + int totalN = n + n2; + int numDigits = digits(totalN); + long v2 = 1000L; + int numParts = 22; + QuantileSearchCriteria searchCrit = QuantileSearchCriteria.INCLUSIVE; + ItemsSketch sk = ItemsSketch.getInstance(String.class,k, Comparator.naturalOrder()); + + for (long i = 1; i <= n; i++) { sk.update(getString(i, numDigits)); } + for (long i = 1; i <= n2; i++) { sk.update(getString(v2, numDigits)); } + ItemsSketchSortedView sv = sk.getSortedView(); + GenericSortedViewIterator itr = sv.iterator(); + println("SORTED VIEW:"); + printf(rowhdrfmt2, (Object[])rowhdr2); + int j = 0; + while (itr.next()) { + printf(rowdfmt2, j++, itr.getNormalizedRank(searchCrit), itr.getNaturalRank(searchCrit), itr.getQuantile()); + } + + GenericPartitionBoundaries gpb = sv.getPartitionBoundaries(numParts, searchCrit); + int arrLen = gpb.getBoundaries().length; + double[] normRanks = gpb.getNormalizedRanks(); + long[] natRanks = gpb.getNaturalRanks(); + String[] boundaries = gpb.getBoundaries(); + long[] numDeltaItems = gpb.getNumDeltaItems(); + println(""); + println("GET PARTITION BOUNDARIES:"); + printf(hdrfmt, (Object[]) hdr); + printf(hdrdfmt, totalN, gpb.getMaxItem(), gpb.getMinItem(), numParts, searchCrit.toString()); + println(""); + printf(rowhdrfmt, (Object[]) rowhdr); + for (int i = 0; i < arrLen; i++) { + printf(rowdfmt, i, normRanks[i], natRanks[i], boundaries[i], numDeltaItems[i]); + } + } + + private final static boolean enablePrinting = true; + + /** + * @param o the Object to print + */ + private static final void print(final Object o) { + if (enablePrinting) { System.out.print(o.toString()); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + +} diff --git a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java index 5f4c4c753..df151c8ce 100644 --- a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java +++ b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java @@ -77,7 +77,6 @@ */ public class CrossCheckQuantilesTest { private ArrayOfStringsSerDe serDe = new ArrayOfStringsSerDe(); - private final String minItem = "10"; private final Comparator comparator = Comparator.naturalOrder(); private final static int k = 32; //all sketches are in exact mode @@ -121,6 +120,14 @@ public class CrossCheckQuantilesTest { {2,1,2,1,2,1,2,1} }; + final float[] svMaxFValues = { 10, 10, 40, 50, 40 }; + final float[] svMinFValues = { 10, 10, 10, 10, 10 }; + final double[] svMaxDValues = { 10, 10, 40, 50, 40 }; + final double[] svMinDValues = { 10, 10, 10, 10, 10 }; + final String[] svMaxIValues = { "10", "10", "40", "50", "40" }; + final String[] svMinIValues = { "10", "10", "10", "10", "10" }; + + int numSets; long[][] svCumWeights; @@ -329,32 +336,44 @@ private void buildSketches(int set) { /*******BUILD & LOAD SVs***********/ private void buildSVs(int set) throws Exception { - reqFloatsSV = getRawReqSV(svFValues[set], svCumWeights[set], totalN[set]); - kllFloatsSV = getRawKllFloatsSV(svFValues[set], svCumWeights[set], totalN[set]); - kllDoublesSV = getRawKllDoublesSV(svDValues[set], svCumWeights[set], totalN[set]); - classicDoublesSV = getRawClassicDoublesSV(svDValues[set], svCumWeights[set], totalN[set]); - kllItemsSV = new KllItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set], minItem, comparator); - itemsSV = new ItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set], comparator); + reqFloatsSV = getRawReqSV(svFValues[set], svCumWeights[set], totalN[set], + svMaxFValues[set], svMinFValues[set]); + kllFloatsSV = getRawKllFloatsSV(svFValues[set], svCumWeights[set], totalN[set], + svMaxFValues[set], svMinFValues[set]); + kllDoublesSV = getRawKllDoublesSV(svDValues[set], svCumWeights[set], totalN[set], + svMaxDValues[set], svMinDValues[set]); + classicDoublesSV = getRawClassicDoublesSV(svDValues[set], svCumWeights[set], totalN[set], + svMaxDValues[set], svMinDValues[set]); + String svImax = svIValues[set][svIValues[set].length - 1]; + String svImin = svIValues[set][0]; + kllItemsSV = new KllItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set], + comparator, svImax, svImin); + itemsSV = new ItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set], + comparator, svImax, svImin); } private final static ReqSketchSortedView getRawReqSV( - final float[] values, final long[] cumWeights, final long totalN) throws Exception { - return (ReqSketchSortedView) REQ_SV_CTOR.newInstance(values, cumWeights, totalN); + final float[] values, final long[] cumWeights, final long totalN, final float maxItem, final float minItem) + throws Exception { + return (ReqSketchSortedView) REQ_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem); } private final static KllFloatsSketchSortedView getRawKllFloatsSV( - final float[] values, final long[] cumWeights, final long totalN) throws Exception { - return (KllFloatsSketchSortedView) KLL_FLOATS_SV_CTOR.newInstance(values, cumWeights, totalN); + final float[] values, final long[] cumWeights, final long totalN, final float maxItem, final float minItem) + throws Exception { + return (KllFloatsSketchSortedView) KLL_FLOATS_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem); } private final static KllDoublesSketchSortedView getRawKllDoublesSV( - final double[] values, final long[] cumWeights, final long totalN) throws Exception { - return (KllDoublesSketchSortedView) KLL_DOUBLES_SV_CTOR.newInstance(values, cumWeights, totalN); + final double[] values, final long[] cumWeights, final long totalN, final double maxItem, final double minItem) + throws Exception { + return (KllDoublesSketchSortedView) KLL_DOUBLES_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem); } private final static DoublesSketchSortedView getRawClassicDoublesSV( - final double[] values, final long[] cumWeights, final long totalN) throws Exception { - return (DoublesSketchSortedView) CLASSIC_DOUBLES_SV_CTOR.newInstance(values, cumWeights, totalN); + final double[] values, final long[] cumWeights, final long totalN, final double maxItem, final double minItem) + throws Exception { + return (DoublesSketchSortedView) CLASSIC_DOUBLES_SV_CTOR.newInstance(values, cumWeights, totalN, maxItem, minItem); } /********BUILD DATA SETS**********/ diff --git a/src/test/java/org/apache/datasketches/quantilescommon/LongsAsOrderableStrings.java b/src/test/java/org/apache/datasketches/quantilescommon/LongsAsOrderableStrings.java new file mode 100644 index 000000000..d8eb60d56 --- /dev/null +++ b/src/test/java/org/apache/datasketches/quantilescommon/LongsAsOrderableStrings.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static java.lang.Math.ceil; +import static java.lang.Math.log; +import static org.apache.datasketches.common.Util.characterPad; + +/** + * Creates a string from a positive long value that is orderable in the + * same order as its long value. + */ +public final class LongsAsOrderableStrings { + + /** + * Converts the given long into a string with leading spaces based on the given numDigits. + * This allows the stings to be ordered as if they were longs. + * @param value the value to convert + * @param numDigits the maximum required number of total spaces for digits. + * @return the given long into a string with leading spaces + */ + public static String getString(final long value, final int numDigits) { + return characterPad(Long.toString(value), numDigits, ' ', false); + } + + /** + * Converts the given String back to a long by trimming any leading or trailing spaces. + * @param value the given string to convert + * @return the given String back to a long + */ + public static long getLong(final String value) { + return Long.parseLong(value.trim()); + } + + /** + * Computes the number of digits required to display the given positive long value. + * This does not include commas or other digit separators. + * This works with longs less than 1E15. + * @param maxValue the maximum anticipated long value. + * @return the number of required display digits + */ + public static int digits(final long maxValue) { + if (maxValue <= 0) { return 1; } + return (int) ceil(log(maxValue + 1) / log(10.0)); + } + +} diff --git a/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java b/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java index b756c5da1..191629fbe 100644 --- a/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java +++ b/src/test/java/org/apache/datasketches/quantilescommon/ReflectUtilityTest.java @@ -50,10 +50,14 @@ private ReflectUtilityTest() {} KLL_DOUBLES_SV = getClass("org.apache.datasketches.kll.KllDoublesSketchSortedView"); CLASSIC_DOUBLES_SV = getClass("org.apache.datasketches.quantiles.DoublesSketchSortedView"); - REQ_SV_CTOR = getConstructor(REQ_SV, float[].class, long[].class, long.class); - KLL_FLOATS_SV_CTOR = getConstructor(KLL_FLOATS_SV, float[].class, long[].class, long.class); - KLL_DOUBLES_SV_CTOR = getConstructor(KLL_DOUBLES_SV, double[].class, long[].class, long.class); - CLASSIC_DOUBLES_SV_CTOR = getConstructor(CLASSIC_DOUBLES_SV, double[].class, long[].class, long.class); + REQ_SV_CTOR = + getConstructor(REQ_SV, float[].class, long[].class, long.class, float.class, float.class); + KLL_FLOATS_SV_CTOR = + getConstructor(KLL_FLOATS_SV, float[].class, long[].class, long.class, float.class, float.class); + KLL_DOUBLES_SV_CTOR = + getConstructor(KLL_DOUBLES_SV, double[].class, long[].class, long.class, double.class, double.class); + CLASSIC_DOUBLES_SV_CTOR = + getConstructor(CLASSIC_DOUBLES_SV, double[].class, long[].class, long.class, double.class, double.class); } @Test //Example @@ -62,7 +66,7 @@ public static void checkCtr() throws Exception { long[] larr = { 1, 2, 3 }; long n = 3; ReqSketchSortedView reqSV = - (ReqSketchSortedView) REQ_SV_CTOR.newInstance(farr, larr, n); + (ReqSketchSortedView) REQ_SV_CTOR.newInstance(farr, larr, n, 10f, 30f); float q = reqSV.getQuantile(1.0, INCLUSIVE); assertEquals(q, 30f); } diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java index eb75790e5..003a53c3b 100644 --- a/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java @@ -21,12 +21,12 @@ import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; import org.apache.datasketches.quantilescommon.FloatsSortedView; import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; -import org.testng.Assert; import org.testng.annotations.Test; /** @@ -39,13 +39,6 @@ public class ReqSketchSortedViewTest { private final int dup = 2; private final int n = numV * dup; - @Test - public void emptySketch() { - ReqSketch sketch = ReqSketch.builder().build(); - FloatsSortedViewIterator itr = sketch.getSortedView().iterator(); - Assert.assertFalse(itr.next()); - } - @Test public void twoValueSketch() { ReqSketch sketch = ReqSketch.builder().build(); @@ -57,8 +50,8 @@ public void twoValueSketch() { assertEquals(itr.getQuantile(), 1f); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 0); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); @@ -66,8 +59,8 @@ public void twoValueSketch() { assertEquals(itr.getQuantile(), 2f); assertEquals(itr.getWeight(), 1); - assertEquals(itr.getCumulativeWeight(EXCLUSIVE), 1); - assertEquals(itr.getCumulativeWeight(INCLUSIVE), 2); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); } @@ -111,9 +104,9 @@ private static void printIterator(final FloatsSortedViewIterator itr) { while (itr.next()) { float v = itr.getQuantile(); long wt = itr.getWeight(); - long cumWtNotInc = itr.getCumulativeWeight(EXCLUSIVE); + long cumWtNotInc = itr.getNaturalRank(EXCLUSIVE); double nRankNotInc = itr.getNormalizedRank(EXCLUSIVE); - long cumWtInc = itr.getCumulativeWeight(INCLUSIVE); + long cumWtInc = itr.getNaturalRank(INCLUSIVE); double nRankInc = itr.getNormalizedRank(INCLUSIVE); printf(fmt, v, wt, cumWtNotInc, nRankNotInc, cumWtInc, nRankInc); } diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchTest.java index 4db9112a8..78b321e1d 100644 --- a/src/test/java/org/apache/datasketches/req/ReqSketchTest.java +++ b/src/test/java/org/apache/datasketches/req/ReqSketchTest.java @@ -29,6 +29,7 @@ import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.quantilescommon.FloatsSortedView; +import org.apache.datasketches.quantilescommon.FloatsSortedViewIterator; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; import org.apache.datasketches.quantilescommon.QuantilesFloatsSketchIterator; import org.apache.datasketches.quantilescommon.QuantilesUtil; @@ -152,13 +153,13 @@ private static void checkGetRanks(final ReqSketch sk, final int max, final int i private static void checkSortedView(final ReqSketch sk, final int iDebug) { final ReqSketchSortedView sv = new ReqSketchSortedView(sk); - final ReqSketchSortedViewIterator itr = sv.iterator(); + final FloatsSortedViewIterator itr = sv.iterator(); final int retainedCount = sk.getNumRetained(); final long totalN = sk.getN(); int count = 0; long cumWt = 0; while (itr.next()) { - cumWt = itr.getCumulativeWeight(INCLUSIVE); + cumWt = itr.getNaturalRank(INCLUSIVE); count++; } assertEquals(cumWt, totalN); @@ -234,21 +235,6 @@ private static void checkMerge(final ReqSketch sk, final int iDebug) { //specific tests - @Test - public void getQuantiles() { - final ReqSketch sketch = ReqSketch.builder().setK(12).build(); - sketch.update(1); - sketch.update(2); - sketch.update(3); - sketch.update(4); - float[] quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, EXCLUSIVE); - float[] quantiles2 = sketch.getPartitionBoundaries(2, EXCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - quantiles1 = sketch.getQuantiles(new double[] {0.0, 0.5, 1.0}, INCLUSIVE); - quantiles2 = sketch.getPartitionBoundaries(2, INCLUSIVE).boundaries; - assertEquals(quantiles1, quantiles2); - } - @Test public void merge() { final ReqSketch s1 = ReqSketch.builder().setK(12).build();