Skip to content

Commit

Permalink
prep for 5.0.2, fix KllItemsSketch level 0 soring issue
Browse files Browse the repository at this point in the history
  • Loading branch information
jmalkin committed Mar 14, 2024
1 parent 4a020cc commit 806bd4b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 4 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ under the License.

<groupId>org.apache.datasketches</groupId>
<artifactId>datasketches-java</artifactId>
<version>5.0.1</version>
<version>5.0.2</version>
<packaging>jar</packaging>

<name>${project.artifactId}</name>
Expand Down Expand Up @@ -734,7 +734,7 @@ under the License.
</pluginManagement>
</build>
</profile>

<profile>
<id>check-cpp-historical-files</id>
<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ private static <T> int[] generalItemsCompress(

// level zero might not be sorted, so we must sort it if we wish to compact it
if ((curLevel == 0) && !isLevelZeroSorted) {
Arrays.sort(inBuf, adjBeg, adjBeg + adjPop);
Arrays.sort((T[])inBuf, adjBeg, adjBeg + adjPop, comp);
}

if (popAbove == 0) { // Level above is empty, so halve up
Expand Down Expand Up @@ -486,4 +486,3 @@ private static <T> void populateItemWorkArrays(
// }

}

31 changes: 31 additions & 0 deletions src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static java.lang.Math.ceil;
import static org.apache.datasketches.kll.KllSketch.SketchStructure.*;
import static org.apache.datasketches.kll.KllSketch.SketchType.*;
import static org.apache.datasketches.quantilescommon.LongsAsOrderableStrings.getString;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.testng.Assert.assertEquals;
Expand All @@ -31,6 +32,7 @@
import static org.testng.Assert.fail;

import java.util.Comparator;
import java.util.Random;

import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.common.SketchesArgumentException;
Expand All @@ -42,6 +44,7 @@
import org.apache.datasketches.quantilescommon.DoublesSortedView;
import org.apache.datasketches.quantilescommon.GenericSortedView;
import org.apache.datasketches.quantilescommon.GenericSortedViewIterator;
import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator;
import org.testng.annotations.Test;

@SuppressWarnings("unused")
Expand Down Expand Up @@ -751,6 +754,34 @@ public void checkSortedViewAfterReset() {
try { sk.getSortedView(); fail(); } catch (SketchesArgumentException e) { }
}

@Test
//There is no guarantee that L0 is sorted after a merge.
//The issue is, during a merge, L0 must be sorted prior to a compaction to a higher level.
//Otherwise the higher levels would not be sorted properly.
public void checkL0SortDuringMerge() throws NumberFormatException {
final Random rand = new Random();
final KllItemsSketch<String> sk1 = KllItemsSketch.newHeapInstance(8, Comparator.reverseOrder(), serDe);
final KllItemsSketch<String> sk2 = KllItemsSketch.newHeapInstance(8, Comparator.reverseOrder(), serDe);
final int n = 26; //don't change this
for (int i = 1; i <= n; i++ ) {
final int j = rand.nextInt(n) + 1;
sk1.update(getString(j, 3));
sk2.update(getString(j +100, 3));
}
sk1.merge(sk2);
println(sk1.toString(true, true)); //L1 and above should be sorted in reverse. Ignore L0.
final int lvl1size = sk1.levelsArr[2] - sk1.levelsArr[1];
final QuantilesGenericSketchIterator<String> itr = sk1.iterator();
itr.next();
int prev = Integer.parseInt(itr.getQuantile().trim());
for (int i = 1; i < lvl1size; i++) {
if (itr.next()) {
int v = Integer.parseInt(itr.getQuantile().trim());
assertTrue(v <= prev);
prev = v;
}
}
}

private final static boolean enablePrinting = false;

Expand Down

0 comments on commit 806bd4b

Please sign in to comment.