Skip to content

Commit

Permalink
Merge pull request #475 from apache/Fixes_for_getPartitionBoundaries
Browse files Browse the repository at this point in the history
Fixes for get partition boundaries
  • Loading branch information
leerho authored Nov 30, 2023
2 parents 007f35b + 51aef3c commit e06ae7d
Show file tree
Hide file tree
Showing 75 changed files with 2,489 additions and 1,341 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,5 +154,5 @@ In Eclipse, open the project *Properties / Java Build Path / Module Dependencies

#### SpotBugs

* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you will get a lot of false positive or low risk issues that we have examined and exliminated with this exclusion file.
* Make sure you configure SpotBugs with the /tools/FindBugsExcludeFilter.xml file. Otherwise, you may get a lot of false positive or low risk issues that we have examined and eliminated with this exclusion file.

7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ under the License.
<version>${testng.version}</version>
<scope>test</scope>
</dependency>
<!--
<dependency>
<groupId>org.apache.datasketches</groupId>
<artifactId>datasketches-java-common</artifactId>
<version>1.0.0</version>
</dependency>
-->
</dependencies>

<build>
Expand Down
81 changes: 33 additions & 48 deletions src/main/java/org/apache/datasketches/common/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static java.lang.Math.log;
import static java.lang.Math.pow;
import static java.lang.Math.round;
import static java.util.Arrays.fill;

import java.util.Comparator;

Expand Down Expand Up @@ -217,7 +218,7 @@ public static String nanoSecToString(final long nS) {

/**
* Returns the given time in milliseconds formatted as Hours:Min:Sec.mSec
* @param mS the given nanoseconds
* @param mS the given milliseconds
* @return the given time in milliseconds formatted as Hours:Min:Sec.mSec
*/
public static String milliSecToString(final long mS) {
Expand All @@ -244,40 +245,20 @@ public static String zeroPad(final String s, final int fieldLength) {

/**
* Prepend or postpend the given string with the given character to fill the given field length.
* If the given string is equal or greater than the given field length, it will be returned
* without modification.
* If the given string is equal to or greater than the given field length, it will be returned without modification.
* @param s the given string
* @param fieldLength the desired field length
* @param padChar the desired pad character
* @param postpend if true append the pacCharacters to the end of the string.
* @return prepended or postpended given string with the given character to fill the given field
* length.
* @return prepended or postpended given string with the given character to fill the given field length.
*/
public static String characterPad(final String s, final int fieldLength, final char padChar,
final boolean postpend) {
final char[] chArr = s.toCharArray();
final int sLen = chArr.length;
public static String characterPad(final String s, final int fieldLength, final char padChar, final boolean postpend) {
final int sLen = s.length();
if (sLen < fieldLength) {
final char[] out = new char[fieldLength];
final int blanks = fieldLength - sLen;

if (postpend) {
for (int i = 0; i < sLen; i++) {
out[i] = chArr[i];
}
for (int i = sLen; i < fieldLength; i++) {
out[i] = padChar;
}
} else { //prepend
for (int i = 0; i < blanks; i++) {
out[i] = padChar;
}
for (int i = blanks; i < fieldLength; i++) {
out[i] = chArr[i - blanks];
}
}

return String.valueOf(out);
final char[] cArr = new char[fieldLength - sLen];
fill(cArr, padChar);
final String addstr = String.valueOf(cArr);
return (postpend) ? s.concat(addstr) : addstr.concat(s);
}
return s;
}
Expand Down Expand Up @@ -376,8 +357,8 @@ public static int ceilingIntPowerOf2(final int n) {
}

/**
* Computes the long ceiling power of 2 within the range [1, 2^30]. This is the smallest positive power
* of 2 that is equal to or greater than the given n and a mathematical integer.
* Computes the long ceiling power of 2 within the range [1, 2^62]. This is the smallest positive power
* of 2 that is equal to or greater than the given n and a mathematical long.
*
* <p>For:
* <ul>
Expand Down Expand Up @@ -550,56 +531,60 @@ public static double powerSeriesNextDouble(final int ppb, final double curPoint,
}

/**
* Computes the ceiling power of given <i>base</i> and <i>n</i> as doubles.
* This is the smallest positive power
* of <i>base</i> that equal to or greater than the given <i>n</i> and equal to a mathematical integer.
* Returns the ceiling of a given <i>n</i> given a <i>base</i>, where the ceiling is an integral power of the base.
* This is the smallest positive power of <i>base</i> that is equal to or greater than the given <i>n</i>
* and equal to a mathematical integer.
* The result of this function is consistent with {@link #ceilingIntPowerOf2(int)} for values
* less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
*
* @param base The base in the expression &#8968;base<sup>n</sup>&#8969;.
* <p>The formula is: <i>base<sup>ceiling(log<sub>base</sub>(x))</sup></i></p>
*
* @param base The number in the expression &#8968;base<sup>n</sup>&#8969;.
* @param n The input argument.
* @return the ceiling power of <i>base</i> as a double and equal to a mathematical integer.
*/
public static double ceilingPowerBaseOfDouble(final double base, final double n) {
final double x = n < 1.0 ? 1.0 : n;
return pow(base, ceil(logBaseOfX(base, x)));
return Math.round(pow(base, ceil(logBaseOfX(base, x))));
}

/**
* Computes the floor power of given <i>base</i> and <i>n</i> as doubles.
* This is the largest positive power
* of <i>base</i> that equal to or less than the given n and equal to a mathematical integer.
* Computes the floor of a given <i>n</i> given <i>base</i>, where the floor is an integral power of the base.
* This is the largest positive power of <i>base</i> that is equal to or less than the given <i>n</i>
* and equal to a mathematical integer.
* The result of this function is consistent with {@link #floorPowerOf2(int)} for values
* less than one. I.e., if <i>n &lt; 1,</i> the result is 1.
*
* @param base The base in the expression &#8970;base<sup>n</sup>&#8971;.
* <p>The formula is: <i>base<sup>floor(log<sub>base</sub>(x))</sup></i></p>
*
* @param base The number in the expression &#8970;base<sup>n</sup>&#8971;.
* @param n The input argument.
* @return the floor power of 2 and equal to a mathematical integer.
*/
public static double floorPowerBaseOfDouble(final double base, final double n) {
final double x = n < 1.0 ? 1.0 : n;
return pow(base, floor(logBaseOfX(base, x)));
return Math.round(pow(base, floor(logBaseOfX(base, x))));
}

// Logarithm related

/**
* The log base 2 of the value
* The log<sub>2</sub>(value)
* @param value the given value
* @return The log base 2 of the value
* @return log<sub>2</sub>(value)
*/
public static double log2(final double value) {
return log(value) / LOG2;
}

/**
* Returns the logarithm_logBase of x. Example: logB(2.0, x) = log(x) / log(2.0).
* @param logBase the base of the logarithm used
* Returns the log<sub>base</sub>(x). Example, if base = 2.0: logB(2.0, x) = log(x) / log(2.0).
* @param base The number in the expression log(x) / log(base).
* @param x the given value
* @return the logarithm_logBase of x: Example: logB(2.0, x) = log(x) / log(2.0).
* @return the log<sub>base</sub>(x)
*/
public static double logBaseOfX(final double logBase, final double x) {
return log(x) / log(logBase);
public static double logBaseOfX(final double base, final double x) {
return log(x) / log(base);
}

/**
Expand Down
16 changes: 0 additions & 16 deletions src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import static org.apache.datasketches.common.ByteArrayUtil.putDoubleLE;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
import static org.apache.datasketches.quantilescommon.QuantilesUtil.equallyWeightedRanks;

import java.util.Objects;

Expand Down Expand Up @@ -175,21 +174,6 @@ public double[] getCDF(final double[] splitPoints, final QuantileSearchCriteria
return kllDoublesSV.getCDF(splitPoints, searchCrit);
}

@Override
public DoublesPartitionBoundaries getPartitionBoundaries(final int numEquallyWeighted,
final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
final double[] ranks = equallyWeightedRanks(numEquallyWeighted);
final double[] boundaries = getQuantiles(ranks, searchCrit);
boundaries[0] = getMinItem();
boundaries[boundaries.length - 1] = getMaxItem();
final DoublesPartitionBoundaries dpb = new DoublesPartitionBoundaries();
dpb.N = this.getN();
dpb.ranks = ranks;
dpb.boundaries = boundaries;
return dpb;
}

@Override
public double[] getPMF(final double[] splitPoints, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,55 +24,17 @@
/**
* Iterator over KllDoublesSketch. The order is not defined.
*/
public final class KllDoublesSketchIterator implements QuantilesDoublesSketchIterator {
public final class KllDoublesSketchIterator extends KllSketchIterator implements QuantilesDoublesSketchIterator {
private final double[] quantiles;
private final int[] levelsArr;
private final int numLevels;
private int level;
private int index;
private long weight;
private boolean isInitialized;

KllDoublesSketchIterator(final double[] quantiles, final int[] levelsArr, final int numLevels) {
super(levelsArr, numLevels);
this.quantiles = quantiles;
this.levelsArr = levelsArr;
this.numLevels = numLevels;
this.isInitialized = false;
}

@Override
public double getQuantile() {
return quantiles[index];
}

@Override
public long getWeight() {
return weight;
}

@Override
public boolean next() {
if (!isInitialized) {
level = 0;
index = levelsArr[level];
weight = 1;
isInitialized = true;
} else {
index++;
}
if (index < levelsArr[level + 1]) {
return true;
}
// go to the next non-empty level
do {
level++;
if (level == numLevels) {
return false; // run out of levels
}
weight *= 2;
} while (levelsArr[level] == levelsArr[level + 1]);
index = levelsArr[level];
return true;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@

import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank;

import java.util.Arrays;

import org.apache.datasketches.common.SketchesArgumentException;
import org.apache.datasketches.quantilescommon.DoublesSortedView;
import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator;
import org.apache.datasketches.quantilescommon.InequalitySearch;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.datasketches.quantilescommon.QuantilesUtil;
Expand All @@ -39,32 +41,40 @@ public final class KllDoublesSketchSortedView implements DoublesSortedView {
private final double[] quantiles;
private final long[] cumWeights; //comes in as individual weights, converted to cumulative natural weights
private final long totalN;
private final double maxItem;
private final double minItem;

/**
* Construct from elements for testing.
* @param quantiles sorted array of quantiles
* @param cumWeights sorted, monotonically increasing cumulative weights.
* @param totalN the total number of items presented to the sketch.
*/
KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN) {
KllDoublesSketchSortedView(final double[] quantiles, final long[] cumWeights, final long totalN,
final double maxItem, final double minItem) {
this.quantiles = quantiles;
this.cumWeights = cumWeights;
this.totalN = totalN;
this.maxItem = maxItem;
this.minItem = minItem;
}

/**
* Constructs this Sorted View given the sketch
* @param sk the given KllDoublesSketch.
* @param sketch the given KllDoublesSketch.
*/
public KllDoublesSketchSortedView(final KllDoublesSketch sk) {
this.totalN = sk.getN();
final double[] srcQuantiles = sk.getDoubleItemsArray();
final int[] srcLevels = sk.levelsArr;
final int srcNumLevels = sk.getNumLevels();

if (!sk.isLevelZeroSorted()) {
public KllDoublesSketchSortedView(final KllDoublesSketch sketch) {
if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
this.totalN = sketch.getN();
this.maxItem = sketch.getMaxItem();
this.minItem = sketch.getMinItem();
final double[] srcQuantiles = sketch.getDoubleItemsArray();
final int[] srcLevels = sketch.levelsArr;
final int srcNumLevels = sketch.getNumLevels();

if (!sketch.isLevelZeroSorted()) {
Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]);
if (!sk.hasMemory()) { sk.setLevelZeroSorted(true); }
if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); }
}

final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage
Expand All @@ -78,17 +88,31 @@ public long[] getCumulativeWeights() {
return cumWeights.clone();
}

@Override
public double getMaxItem() {
return maxItem;
}

@Override
public double getMinItem() {
return minItem;
}

@Override
public long getN() {
return totalN;
}

@Override
public double getQuantile(final double rank, final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); }
QuantilesUtil.checkNormalizedRankBounds(rank);
final int len = cumWeights.length;
final long naturalRank = (searchCrit == INCLUSIVE)
? (long)Math.ceil(rank * totalN) : (long)Math.floor(rank * totalN);
final double naturalRank = getNaturalRank(rank, totalN, searchCrit);
final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT;
final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit);
if (index == -1) {
return quantiles[quantiles.length - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0;
}
return quantiles[index];
}
Expand Down Expand Up @@ -116,8 +140,8 @@ public boolean isEmpty() {
}

@Override
public KllDoublesSketchSortedViewIterator iterator() {
return new KllDoublesSketchSortedViewIterator(quantiles, cumWeights);
public DoublesSortedViewIterator iterator() {
return new DoublesSortedViewIterator(quantiles, cumWeights);
}

//restricted methods
Expand Down
Loading

0 comments on commit e06ae7d

Please sign in to comment.