Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for get partition boundaries #475

Merged
merged 15 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,5 +154,5 @@ In Eclipse, open the project *Properties / Java Build Path / Module Dependencies

#### SpotBugs

* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you will get a lot of false positive or low risk issues that we have examined and exliminated with this exclusion file.
* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you may get a lot of false positive or low risk issues that we have examined and eliminated with this exclusion file.

7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ under the License.
<version>${testng.version}</version>
<scope>test</scope>
</dependency>
<!--
<dependency>
<groupId>org.apache.datasketches</groupId>
<artifactId>datasketches-java-common</artifactId>
<version>1.0.0</version>
</dependency>
-->
leerho marked this conversation as resolved.
Show resolved Hide resolved
</dependencies>

<build>
Expand Down
81 changes: 33 additions & 48 deletions src/main/java/org/apache/datasketches/common/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static java.lang.Math.log;
import static java.lang.Math.pow;
import static java.lang.Math.round;
import static java.util.Arrays.fill;

import java.util.Comparator;

Expand Down Expand Up @@ -217,7 +218,7 @@ public static String nanoSecToString(final long nS) {

/**
* Returns the given time in milliseconds formatted as Hours:Min:Sec.mSec
* @param mS the given nanoseconds
* @param mS the given milliseconds
* @return the given time in milliseconds formatted as Hours:Min:Sec.mSec
*/
public static String milliSecToString(final long mS) {
Expand All @@ -244,40 +245,20 @@ public static String zeroPad(final String s, final int fieldLength) {

/**
* Prepend or postpend the given string with the given character to fill the given field length.
* If the given string is equal or greater than the given field length, it will be returned
* without modification.
* If the given string is equal to or greater than the given field length, it will be returned without modification.
* @param s the given string
* @param fieldLength the desired field length
* @param padChar the desired pad character
* @param postpend if true append the pacCharacters to the end of the string.
* @return prepended or postpended given string with the given character to fill the given field
* length.
* @return prepended or postpended given string with the given character to fill the given field length.
*/
public static String characterPad(final String s, final int fieldLength, final char padChar,
final boolean postpend) {
final char[] chArr = s.toCharArray();
final int sLen = chArr.length;
public static String characterPad(final String s, final int fieldLength, final char padChar, final boolean postpend) {
final int sLen = s.length();
if (sLen < fieldLength) {
final char[] out = new char[fieldLength];
final int blanks = fieldLength - sLen;

if (postpend) {
for (int i = 0; i < sLen; i++) {
out[i] = chArr[i];
}
for (int i = sLen; i < fieldLength; i++) {
out[i] = padChar;
}
} else { //prepend
for (int i = 0; i < blanks; i++) {
out[i] = padChar;
}
for (int i = blanks; i < fieldLength; i++) {
out[i] = chArr[i - blanks];
}
}

return String.valueOf(out);
final char[] cArr = new char[fieldLength - sLen];
fill(cArr, padChar);
final String addstr = String.valueOf(cArr);
return (postpend) ? s.concat(addstr) : addstr.concat(s);
}
return s;
}
Expand Down Expand Up @@ -376,8 +357,8 @@ public static int ceilingIntPowerOf2(final int n) {
}

/**
* Computes the long ceiling power of 2 within the range [1, 2^30]. This is the smallest positive power
* of 2 that is equal to or greater than the given n and a mathematical integer.
* Computes the long ceiling power of 2 within the range [1, 2^62]. This is the smallest positive power
* of 2 that is equal to or greater than the given n and a mathematical long.
*
* <p>For:
* <ul>
Expand Down Expand Up @@ -550,56 +531,60 @@ public static double powerSeriesNextDouble(final int ppb, final double curPoint,
}

/**
* Computes the ceiling power of given <i>base</i> and <i>n</i> as doubles.
* This is the smallest positive power
* of <i>base</i> that equal to or greater than the given <i>n</i> and equal to a mathematical integer.
* Returns the ceiling of a given <i>n</i> given a <i>base</i>, where the ceiling is an integral power of the base.
* This is the smallest positive power of <i>base</i> that is equal to or greater than the given <i>n</i>
* and equal to a mathematical integer.
* The result of this function is consistent with {@link #ceilingIntPowerOf2(int)} for values
* less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
*
* @param base The base in the expression &#8968;base<sup>n</sup>&#8969;.
* <p>The formula is: <i>base<sup>ceiling(log<sub>base</sub>(x))</sup></i></p>
*
* @param base The number in the expression &#8968;base<sup>n</sup>&#8969;.
* @param n The input argument.
* @return the ceiling power of <i>base</i> as a double and equal to a mathematical integer.
*/
public static double ceilingPowerBaseOfDouble(final double base, final double n) {
final double x = n < 1.0 ? 1.0 : n;
return pow(base, ceil(logBaseOfX(base, x)));
return Math.round(pow(base, ceil(logBaseOfX(base, x))));
}

/**
* Computes the floor power of given <i>base</i> and <i>n</i> as doubles.
* This is the largest positive power
* of <i>base</i> that equal to or less than the given n and equal to a mathematical integer.
* Computes the floor of a given <i>n</i> given <i>base</i>, where the floor is an integral power of the base.
* This is the largest positive power of <i>base</i> that is equal to or less than the given <i>n</i>
* and equal to a mathematical integer.
* The result of this function is consistent with {@link #floorPowerOf2(int)} for values
* less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
*
* @param base The base in the expression &#8970;base<sup>n</sup>&#8971;.
* <p>The formula is: <i>base<sup>floor(log<sub>base</sub>(x))</sup></i></p>
*
* @param base The number in the expression &#8970;base<sup>n</sup>&#8971;.
* @param n The input argument.
* @return the floor power of 2 and equal to a mathematical integer.
*/
public static double floorPowerBaseOfDouble(final double base, final double n) {
final double x = n < 1.0 ? 1.0 : n;
return pow(base, floor(logBaseOfX(base, x)));
return Math.round(pow(base, floor(logBaseOfX(base, x))));
}

// Logarithm related

/**
* The log base 2 of the value
* The log<sub>2</sub>(value)
* @param value the given value
* @return The log base 2 of the value
* @return log<sub>2</sub>(value)
*/
public static double log2(final double value) {
return log(value) / LOG2;
}

/**
* Returns the logarithm_logBase of x. Example: logB(2.0, x) = log(x) / log(2.0).
* @param logBase the base of the logarithm used
* Returns the log<sub>base</sub>(x). Example, if base = 2.0: logB(2.0, x) = log(x) / log(2.0).
* @param base The number in the expression log(x) / log(base).
* @param x the given value
* @return the logarithm_logBase of x: Example: logB(2.0, x) = log(x) / log(2.0).
* @return the log<sub>base</sub>(x)
*/
public static double logBaseOfX(final double logBase, final double x) {
return log(x) / log(logBase);
public static double logBaseOfX(final double base, final double x) {
return log(x) / log(base);
}

/**
Expand Down
16 changes: 0 additions & 16 deletions src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import static org.apache.datasketches.common.ByteArrayUtil.putDoubleLE;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks;

import java.util.Objects;

Expand Down Expand Up @@ -175,21 +174,6 @@ public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria
return kllDoublesSV.getCDF(splitPoints, searchCrit);
}

@Override
public DoublesPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted,
final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
final double[] ranks = equallyWeightedRanks(numEquallyWeighted);
final double[] boundaries = getQuantiles(ranks, searchCrit);
boundaries[0] = getMinItem();
boundaries[boundaries.length - 1] = getMaxItem();
final DoublesPartitionBoundaries dpb = new DoublesPartitionBoundaries();
dpb.N = this.getN();
dpb.ranks = ranks;
dpb.boundaries = boundaries;
return dpb;
}

@Override
public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,55 +24,17 @@
/**
* Iterator over KllDoublesSketch. The order is not defined.
*/
public final class KllDoublesSketchIterator implements QuantilesDoublesSketchIterator {
public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator {
private final double[] quantiles;
private final int[] levelsArr;
private final int numLevels;
private int level;
private int index;
private long weight;
private boolean isInitialized;

KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) {
super(levelsArr, numLevels);
this.quantiles = quantiles;
this.levelsArr = levelsArr;
this.numLevels = numLevels;
this.isInitialized = false;
}

@Override
public double getQuantile() {
return quantiles[index];
}

@Override
public long getWeight() {
return weight;
}

@Override
public boolean next() {
if (!isInitialized) {
level = 0;
index = levelsArr[level];
weight = 1;
isInitialized = true;
} else {
index++;
}
if (index < levelsArr[level + 1]) {
return true;
}
// go to the next non-empty level
do {
level++;
if (level == numLevels) {
return false; // run out of levels
}
weight *= 2;
} while (levelsArr[level] == levelsArr[level + 1]);
index = levelsArr[level];
return true;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@

import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank;

import java.util.Arrays;

import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.quantilescommon.DoublesSortedView;
import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator;
import org.apache.datasketches.quantilescommon.InequalitySearch;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.datasketches.quantilescommon.QuantilesUtil;
Expand All @@ -39,32 +41,40 @@ public final class KllDoublesSketchSortedView implements DoublesSortedView {
private final double[] quantiles;
private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights
private final long totalN;
private final double maxItem;
private final double minItem;

/**
* Construct from elements for testing.
* @param quantiles sorted array of quantiles
* @param cumWeights sorted, monotonically increasing cumulative weights.
* @param totalN the total number of items presented to the sketch.
*/
KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN) {
KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN,
final double maxItem, final double minItem) {
this.quantiles = quantiles;
this.cumWeights = cumWeights;
this.totalN = totalN;
this.maxItem = maxItem;
this.minItem = minItem;
}

/**
* Constructs this Sorted View given the sketch
* @param sk the given KllDoublesSketch.
* @param sketch the given KllDoublesSketch.
*/
public KllDoublesSketchSortedView(final KllDoublesSketch sk) {
this.totalN = sk.getN();
final double[] srcQuantiles = sk.getDoubleItemsArray();
final int[] srcLevels = sk.levelsArr;
final int srcNumLevels = sk.getNumLevels();

if (!sk.isLevelZeroSorted()) {
public KllDoublesSketchSortedView(final KllDoublesSketch sketch) {
if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
this.totalN = sketch.getN();
this.maxItem = sketch.getMaxItem();
this.minItem = sketch.getMinItem();
final double[] srcQuantiles = sketch.getDoubleItemsArray();
final int[] srcLevels = sketch.levelsArr;
final int srcNumLevels = sketch.getNumLevels();

if (!sketch.isLevelZeroSorted()) {
leerho marked this conversation as resolved.
Show resolved Hide resolved
Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]);
if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); }
if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); }
}

final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage
Expand All @@ -78,17 +88,31 @@ public long[] getCumulativeWeights() {
return cumWeights.clone();
}

@Override
public double getMaxItem() {
return maxItem;
}

@Override
public double getMinItem() {
return minItem;
}

@Override
public long getN() {
return totalN;
}

@Override
public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
QuantilesUtil.checkNormalizedRankBounds(rank);
final int len = cumWeights.length;
final long naturalRank = (searchCrit == INCLUSIVE)
? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN);
final double naturalRank = getNaturalRank(rank, totalN, searchCrit);
final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT;
final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit);
if (index == -1) {
return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
}
return quantiles[index];
}
Expand Down Expand Up @@ -116,8 +140,8 @@ public boolean isEmpty() {
}

@Override
public KllDoublesSketchSortedViewIterator iterator() {
return new KllDoublesSketchSortedViewIterator(quantiles, cumWeights);
public DoublesSortedViewIterator iterator() {
return new DoublesSortedViewIterator(quantiles, cumWeights);
}

//restricted methods
Expand Down
Loading