From ef5b703e6de4b3564877622bfba5a97414cdeb83 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Thu, 15 Feb 2024 16:42:48 -0800 Subject: [PATCH 1/7] All except 2 of the fixes here were security related fixes to harden our classes against "finalizer attacks". See https://wiki.sei.cmu.edu/confluence/display/java/OBJ11-J.+Be+wary+of+letting+constructors+throw+exceptions --- src/main/java/org/apache/datasketches/fdt/FdtSketch.java | 2 +- .../java/org/apache/datasketches/hll/CouponHashSet.java | 2 +- src/main/java/org/apache/datasketches/hll/CouponList.java | 5 +++++ .../java/org/apache/datasketches/hll/DirectAuxHashMap.java | 2 +- .../org/apache/datasketches/hll/DirectCouponHashSet.java | 2 +- .../java/org/apache/datasketches/hll/DirectCouponList.java | 5 +++++ .../java/org/apache/datasketches/hll/DirectHllArray.java | 5 +++++ .../java/org/apache/datasketches/hll/HeapAuxHashMap.java | 2 +- .../java/org/apache/datasketches/hllmap/UniqueCountMap.java | 2 +- .../apache/datasketches/kll/KllItemsSketchSortedView.java | 5 +++++ .../datasketches/quantiles/DirectDoublesSketchAccessor.java | 2 +- .../datasketches/quantiles/DoublesSketchAccessor.java | 5 +++++ .../datasketches/quantiles/ItemsSketchSortedView.java | 5 +++++ .../quantilescommon/GenericPartitionBoundaries.java | 5 +++++ .../apache/datasketches/theta/DirectQuickSelectSketch.java | 5 +++++ .../java/org/apache/datasketches/tuple/CompactSketch.java | 5 +++++ .../org/apache/datasketches/tuple/QuickSelectSketch.java | 5 +++++ .../DirectArrayOfDoublesQuickSelectSketch.java | 5 +++++ .../datasketches/tuple/strings/ArrayOfStringsSummary.java | 2 +- tools/FindBugsExcludeFilter.xml | 6 ++++++ 20 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/apache/datasketches/fdt/FdtSketch.java b/src/main/java/org/apache/datasketches/fdt/FdtSketch.java index 8664e559a..e4cac32a1 100644 --- a/src/main/java/org/apache/datasketches/fdt/FdtSketch.java +++ b/src/main/java/org/apache/datasketches/fdt/FdtSketch.java @@ -46,7 +46,7 @@ * * @author Lee Rhodes */ -public class FdtSketch extends ArrayOfStringsSketch { +public final class FdtSketch extends ArrayOfStringsSketch { /** * Create new instance of Frequent Distinct Tuples sketch with the given diff --git a/src/main/java/org/apache/datasketches/hll/CouponHashSet.java b/src/main/java/org/apache/datasketches/hll/CouponHashSet.java index b2e966687..8d0dec9a7 100644 --- a/src/main/java/org/apache/datasketches/hll/CouponHashSet.java +++ b/src/main/java/org/apache/datasketches/hll/CouponHashSet.java @@ -42,7 +42,7 @@ * @author Lee Rhodes * @author Kevin Lang */ -class CouponHashSet extends CouponList { +final class CouponHashSet extends CouponList { /** * Constructs this sketch with the intent of loading it with data diff --git a/src/main/java/org/apache/datasketches/hll/CouponList.java b/src/main/java/org/apache/datasketches/hll/CouponList.java index 96f8e9e2f..45f426569 100644 --- a/src/main/java/org/apache/datasketches/hll/CouponList.java +++ b/src/main/java/org/apache/datasketches/hll/CouponList.java @@ -41,6 +41,11 @@ class CouponList extends AbstractCoupons { int couponCount; int[] couponIntArr; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + /** * New instance constructor for LIST or SET. * @param lgConfigK the configured Lg K diff --git a/src/main/java/org/apache/datasketches/hll/DirectAuxHashMap.java b/src/main/java/org/apache/datasketches/hll/DirectAuxHashMap.java index fe04ec069..98884f5ea 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectAuxHashMap.java +++ b/src/main/java/org/apache/datasketches/hll/DirectAuxHashMap.java @@ -36,7 +36,7 @@ /** * @author Lee Rhodes */ -class DirectAuxHashMap implements AuxHashMap { +final class DirectAuxHashMap implements AuxHashMap { private final DirectHllArray host; //hosts the WritableMemory and read-only Memory private final boolean readOnly; diff --git a/src/main/java/org/apache/datasketches/hll/DirectCouponHashSet.java b/src/main/java/org/apache/datasketches/hll/DirectCouponHashSet.java index e797113cd..52ddf60ba 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectCouponHashSet.java +++ b/src/main/java/org/apache/datasketches/hll/DirectCouponHashSet.java @@ -42,7 +42,7 @@ /** * @author Lee Rhodes */ -class DirectCouponHashSet extends DirectCouponList { +final class DirectCouponHashSet extends DirectCouponList { //Constructs this sketch with data. DirectCouponHashSet(final int lgConfigK, final TgtHllType tgtHllType, diff --git a/src/main/java/org/apache/datasketches/hll/DirectCouponList.java b/src/main/java/org/apache/datasketches/hll/DirectCouponList.java index 008aac1d0..e8b6ad2ee 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectCouponList.java +++ b/src/main/java/org/apache/datasketches/hll/DirectCouponList.java @@ -61,6 +61,11 @@ class DirectCouponList extends AbstractCoupons { Memory mem; final boolean compact; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + //called from newInstance, writableWrap and DirectCouponHashSet DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, final WritableMemory wmem) { diff --git a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java index 8c87ca4b7..7d4270c15 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java @@ -59,6 +59,11 @@ abstract class DirectHllArray extends AbstractHllArray { long memAdd; final boolean compact; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + //Memory must be already initialized and may have data DirectHllArray(final int lgConfigK, final TgtHllType tgtHllType, final WritableMemory wmem) { super(lgConfigK, tgtHllType, CurMode.HLL); diff --git a/src/main/java/org/apache/datasketches/hll/HeapAuxHashMap.java b/src/main/java/org/apache/datasketches/hll/HeapAuxHashMap.java index ae75d3818..374dbd624 100644 --- a/src/main/java/org/apache/datasketches/hll/HeapAuxHashMap.java +++ b/src/main/java/org/apache/datasketches/hll/HeapAuxHashMap.java @@ -33,7 +33,7 @@ * @author Lee Rhodes * @author Kevin Lang */ -class HeapAuxHashMap implements AuxHashMap { +final class HeapAuxHashMap implements AuxHashMap { private final int lgConfigK; //required for #slot bits private int lgAuxArrInts; private int auxCount; diff --git a/src/main/java/org/apache/datasketches/hllmap/UniqueCountMap.java b/src/main/java/org/apache/datasketches/hllmap/UniqueCountMap.java index f8051d2bc..9a480d940 100644 --- a/src/main/java/org/apache/datasketches/hllmap/UniqueCountMap.java +++ b/src/main/java/org/apache/datasketches/hllmap/UniqueCountMap.java @@ -80,7 +80,7 @@ * @author Alexander Saydakov * @author Kevin Lang */ -public class UniqueCountMap { +public final class UniqueCountMap { private static final String LS = System.getProperty("line.separator"); private static final int NUM_LEVELS = 10; // total of single coupon + traverse + coupon maps + hll private static final int NUM_TRAVERSE_MAPS = 3; diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java index fffb5d704..783ce9bcd 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java @@ -56,6 +56,11 @@ public class KllItemsSketchSortedView implements GenericSortedView, Partit private final T minItem; private final Class clazz; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + /** * Construct from elements for testing only. * @param quantiles sorted array of quantiles diff --git a/src/main/java/org/apache/datasketches/quantiles/DirectDoublesSketchAccessor.java b/src/main/java/org/apache/datasketches/quantiles/DirectDoublesSketchAccessor.java index 49b823354..172ad14b4 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DirectDoublesSketchAccessor.java +++ b/src/main/java/org/apache/datasketches/quantiles/DirectDoublesSketchAccessor.java @@ -26,7 +26,7 @@ /** * @author Jon Malkin */ -class DirectDoublesSketchAccessor extends DoublesSketchAccessor { +final class DirectDoublesSketchAccessor extends DoublesSketchAccessor { DirectDoublesSketchAccessor(final DoublesSketch ds, final boolean forceSize, final int level) { diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java index 7e70c927b..54f83f188 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java @@ -39,6 +39,11 @@ abstract class DoublesSketchAccessor extends DoublesBufferAccessor { int numItems_; int offset_; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + DoublesSketchAccessor(final DoublesSketch ds, final boolean forceSize, final int level) { ds_ = ds; forceSize_ = forceSize; diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index 9638a9a9e..827d9d086 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -56,6 +56,11 @@ public class ItemsSketchSortedView implements GenericSortedView, Partition private final T minItem; private final Class clazz; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + /** * Construct from elements for testing. * @param quantiles sorted array of quantiles diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java index 5c0098a5e..9cba9fbd6 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java @@ -39,6 +39,11 @@ public class GenericPartitionBoundaries implements PartitionBoundaries { private long[] numDeltaItems; //num of items in each part private int numPartitions; //num of partitions + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + public GenericPartitionBoundaries( final long totalN, final T[] boundaries, diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index ef363d248..27174b522 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -81,6 +81,11 @@ private DirectQuickSelectSketch( super(seed, wmem); } + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + /** * Construct a new sketch instance and initialize the given Memory as its backing store. * diff --git a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java index b7e6935ab..c77a28004 100644 --- a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java @@ -51,6 +51,11 @@ private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUD private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED } + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + /** * Create a CompactSketch from correct components * @param hashArr compacted hash array diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java index 640547bae..68d2c44b1 100644 --- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java @@ -57,6 +57,11 @@ private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, private long[] hashTable_; S[] summaryTable_; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + /** * This is to create an instance of a QuickSelectSketch with default resize factor. * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 93cd8ebde..8c08b3a2f 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -45,6 +45,11 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke private int keysOffset_; private int valuesOffset_; + @Override + protected final void finalize() { + // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + } + /** * Construct a new sketch using the given Memory as its backing store. * diff --git a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java index b0baab011..69ee83e8e 100644 --- a/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSummary.java @@ -33,7 +33,7 @@ /** * @author Lee Rhodes */ -public class ArrayOfStringsSummary implements UpdatableSummary { +public final class ArrayOfStringsSummary implements UpdatableSummary { private String[] nodesArr = null; diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml index 62cf08f0b..32d176d7f 100644 --- a/tools/FindBugsExcludeFilter.xml +++ b/tools/FindBugsExcludeFilter.xml @@ -79,4 +79,10 @@ under the License. + + + + + + From 765fdf22fb0685b5e73aeb0ee08c4b1c6d0a4538 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 16 Feb 2024 12:20:33 -0800 Subject: [PATCH 2/7] Just want to see if this fixes the CodeQL issue with /tuple/QuickSelectSketch --- .github/workflows/codeql-analysis.yml | 6 +++--- .../org/apache/datasketches/tuple/QuickSelectSketch.java | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 9faa687e4..7cb4c8976 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -32,7 +32,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -46,7 +46,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # ℹī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -59,6 +59,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java index 68d2c44b1..2cc0d989e 100644 --- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java @@ -58,7 +58,8 @@ private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, S[] summaryTable_; @Override - protected final void finalize() { + protected final void finalize() throws Throwable { + super.finalize(); // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J } From 6ae6956c4741b30cc02f524d2c8b2f098af3b5b0 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Fri, 23 Feb 2024 10:23:08 -0800 Subject: [PATCH 3/7] Changes required to remove vulnerabilities to Finalize Attacks and satisfy SpotBugs warnings. --- .../apache/datasketches/hll/CouponList.java | 9 +- .../datasketches/hll/DirectCouponList.java | 19 +- .../datasketches/hll/DirectHllArray.java | 9 +- .../datasketches/kll/KllItemsSketch.java | 152 +++++++++- .../kll/KllItemsSketchSortedView.java | 137 --------- .../quantiles/DoublesSketchAccessor.java | 24 +- .../quantiles/HeapDoublesSketchAccessor.java | 2 +- .../datasketches/quantiles/ItemsSketch.java | 91 +++++- .../quantiles/ItemsSketchSortedView.java | 203 +++++++------ .../GenericPartitionBoundaries.java | 7 +- .../theta/DirectQuickSelectSketch.java | 51 +++- .../datasketches/tuple/CompactSketch.java | 20 +- .../datasketches/tuple/QuickSelectSketch.java | 271 +++++++++++------- .../org/apache/datasketches/tuple/Sketch.java | 8 +- .../datasketches/tuple/UpdatableSketch.java | 4 +- .../ArrayOfDoublesQuickSelectSketch.java | 4 +- ...DirectArrayOfDoublesQuickSelectSketch.java | 78 +++-- .../strings/ArrayOfStringsSketchTest.java | 4 +- tools/FindBugsExcludeFilter.xml | 21 +- 19 files changed, 668 insertions(+), 446 deletions(-) diff --git a/src/main/java/org/apache/datasketches/hll/CouponList.java b/src/main/java/org/apache/datasketches/hll/CouponList.java index 45f426569..eb1f0481e 100644 --- a/src/main/java/org/apache/datasketches/hll/CouponList.java +++ b/src/main/java/org/apache/datasketches/hll/CouponList.java @@ -41,9 +41,9 @@ class CouponList extends AbstractCoupons { int couponCount; int[] couponIntArr; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + private static int checkLgConfigK(final CurMode curMode, final int lgConfigK) { + if (curMode == CurMode.SET) { assert lgConfigK > 7; } + return lgConfigK; } /** @@ -53,12 +53,11 @@ protected final void finalize() { * @param curMode LIST or SET */ CouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode) { - super(lgConfigK, tgtHllType, curMode); + super(checkLgConfigK(curMode, lgConfigK), tgtHllType, curMode); if (curMode == CurMode.LIST) { lgCouponArrInts = LG_INIT_LIST_SIZE; } else { //SET lgCouponArrInts = LG_INIT_SET_SIZE; - assert lgConfigK > 7; } couponIntArr = new int[1 << lgCouponArrInts]; couponCount = 0; diff --git a/src/main/java/org/apache/datasketches/hll/DirectCouponList.java b/src/main/java/org/apache/datasketches/hll/DirectCouponList.java index e8b6ad2ee..daf1da41c 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectCouponList.java +++ b/src/main/java/org/apache/datasketches/hll/DirectCouponList.java @@ -61,24 +61,21 @@ class DirectCouponList extends AbstractCoupons { Memory mem; final boolean compact; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) { + assert !extractCompactFlag(wmem); + return lgConfigK; } - //called from newInstance, writableWrap and DirectCouponHashSet - DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, - final WritableMemory wmem) { - super(lgConfigK, tgtHllType, curMode); + //called from newInstance, writableWrap and DirectCouponHashSet, must be compact + DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, final WritableMemory wmem) { + super(checkMemCompactFlag(wmem, lgConfigK), tgtHllType, curMode); this.wmem = wmem; mem = wmem; compact = extractCompactFlag(wmem); - assert !compact; } - //called from HllSketch.wrap and from DirectCouponHashSet constructor, may be compact - DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, - final Memory mem) { + //called from HllSketch.wrap and from DirectCouponHashSet constructor, may or may not be compact + DirectCouponList(final int lgConfigK, final TgtHllType tgtHllType, final CurMode curMode, final Memory mem) { super(lgConfigK, tgtHllType, curMode); wmem = null; this.mem = mem; diff --git a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java index 7d4270c15..5b3e1a4ff 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java @@ -59,20 +59,19 @@ abstract class DirectHllArray extends AbstractHllArray { long memAdd; final boolean compact; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) { + assert !extractCompactFlag(wmem); + return lgConfigK; } //Memory must be already initialized and may have data DirectHllArray(final int lgConfigK, final TgtHllType tgtHllType, final WritableMemory wmem) { - super(lgConfigK, tgtHllType, CurMode.HLL); + super(checkMemCompactFlag(wmem, lgConfigK), tgtHllType, CurMode.HLL); this.wmem = wmem; mem = wmem; memObj = wmem.getArray(); memAdd = wmem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); - assert !compact; insertEmptyFlag(wmem, false); } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java index 589c1fa30..6f3fbafc4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketch.java @@ -25,18 +25,19 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; import java.lang.reflect.Array; +import java.util.Arrays; import java.util.Comparator; import java.util.Objects; import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.Util; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.PartitioningFeature; import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; -import org.apache.datasketches.quantilescommon.QuantilesAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericAPI; import org.apache.datasketches.quantilescommon.QuantilesGenericSketchIterator; @@ -154,7 +155,7 @@ public double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria searc @Override public GenericPartitionBoundaries getPartitionBoundaries(final int numEquallySized, final QuantileSearchCriteria searchCrit) { - if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); } + if (isEmpty()) { throw new IllegalArgumentException(EMPTY_MSG); } refreshSortedView(); return kllItemsSV.getPartitionBoundaries(numEquallySized, searchCrit); } @@ -307,13 +308,6 @@ MemoryRequestServer getMemoryRequestServer() { @Override abstract int getMinMaxSizeBytes(); - private final KllItemsSketchSortedView refreshSortedView() { - final KllItemsSketchSortedView sv = (kllItemsSV == null) - ? kllItemsSV = new KllItemsSketchSortedView<>(this) - : kllItemsSV; - return sv; - } - abstract T[] getRetainedItemsArray(); @Override @@ -374,4 +368,144 @@ void setWritableMemory(final WritableMemory wmem) { throw new SketchesArgumentException(UNSUPPORTED_MSG + "Sketch not writable."); } + void updateMinMax(final T item) { + if (isEmpty()) { + setMinItem(item); + setMaxItem(item); + } else { + setMinItem(Util.minT(getMinItem(), item, comparator)); + setMaxItem(Util.maxT(getMaxItem(), item, comparator)); + } + } + + private final KllItemsSketchSortedView refreshSortedView() { + if (kllItemsSV == null) { + final CreateSortedView csv = new CreateSortedView(); + kllItemsSV = csv.getSV(); + } + return kllItemsSV; + } + + private final class CreateSortedView { + T[] quantiles; + long[] cumWeights; + + KllItemsSketchSortedView getSV() { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + if (getN() == 0) { throw new SketchesArgumentException(EMPTY_MSG); } + final T[] srcQuantiles = getTotalItemsArray(); + final int[] srcLevels = levelsArr; + final int srcNumLevels = getNumLevels(); + + if (!isLevelZeroSorted()) { + Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1], comparator); + if (!hasMemory()) { setLevelZeroSorted(true); } + } + final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove free space + quantiles = (T[]) Array.newInstance(serDe.getClassOfT(), numQuantiles); + cumWeights = new long[numQuantiles]; + populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + return new KllItemsSketchSortedView<>(quantiles, cumWeights, getN(), comparator, getMaxItem(), getMinItem()); + } + + private void populateFromSketch(final Object[] srcQuantiles, final int[] srcLevels, + final int srcNumLevels, final int numItems) { + final int[] myLevels = new int[srcNumLevels + 1]; + final int offset = srcLevels[0]; + System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < srcNumLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(cumWeights, fromIndex, toIndex, weight); + myLevels[dstLevel] = fromIndex; + myLevels[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + final int numLevels = dstLevel; + blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels, comparator); //create unit weights + KllHelper.convertToCumulative(cumWeights); + } + } + + private static void blockyTandemMergeSort(final Object[] quantiles, final long[] weights, + final int[] levels, final int numLevels, final Comparator comp) { + if (numLevels == 1) { return; } + + // duplicate the input in preparation for the "ping-pong" copy reduction strategy. + final Object[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); + final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one here + + blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels, comp); + } + + private static void blockyTandemMergeSortRecursion( + final Object[] quantilesSrc, final long[] weightsSrc, + final Object[] quantilesDst, final long[] weightsDst, + final int[] levels, final int startingLevel, final int numLevels, final Comparator comp) { + if (numLevels == 1) { return; } + final int numLevels1 = numLevels / 2; + final int numLevels2 = numLevels - numLevels1; + assert numLevels1 >= 1; + assert numLevels2 >= numLevels1; + final int startingLevel1 = startingLevel; + final int startingLevel2 = startingLevel + numLevels1; + // swap roles of src and dst + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel1, numLevels1, comp); + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel2, numLevels2, comp); + tandemMerge( + quantilesSrc, weightsSrc, + quantilesDst, weightsDst, + levels, + startingLevel1, numLevels1, + startingLevel2, numLevels2, comp); + } + + private static void tandemMerge( + final Object[] quantilesSrc, final long[] weightsSrc, + final Object[] quantilesDst, final long[] weightsDst, + final int[] levelStarts, + final int startingLevel1, final int numLevels1, + final int startingLevel2, final int numLevels2, final Comparator comp) { + final int fromIndex1 = levelStarts[startingLevel1]; + final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive + final int fromIndex2 = levelStarts[startingLevel2]; + final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive + int iSrc1 = fromIndex1; + int iSrc2 = fromIndex2; + int iDst = fromIndex1; + + while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { + if (Util.lt((T) quantilesSrc[iSrc1], (T) quantilesSrc[iSrc2], comp)) { + quantilesDst[iDst] = quantilesSrc[iSrc1]; + weightsDst[iDst] = weightsSrc[iSrc1]; + iSrc1++; + } else { + quantilesDst[iDst] = quantilesSrc[iSrc2]; + weightsDst[iDst] = weightsSrc[iSrc2]; + iSrc2++; + } + iDst++; + } + if (iSrc1 < toIndex1) { + System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); + System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); + } else if (iSrc2 < toIndex2) { + System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); + System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); + } + } + } diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java index 783ce9bcd..553b93af4 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java @@ -26,11 +26,9 @@ import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.lang.reflect.Array; -import java.util.Arrays; import java.util.Comparator; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.Util; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView; @@ -56,11 +54,6 @@ public class KllItemsSketchSortedView implements GenericSortedView, Partit private final T minItem; private final Class clazz; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Construct from elements for testing only. * @param quantiles sorted array of quantiles @@ -86,34 +79,6 @@ protected final void finalize() { this.clazz = (Class)quantiles[0].getClass(); } - /** - * Constructs this Sorted View given the sketch - * @param sketch the given KllItemsSketch. - */ - @SuppressWarnings("unchecked") - KllItemsSketchSortedView(final KllItemsSketch sketch) { - if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - this.totalN = sketch.getN(); - final T[] srcQuantiles = sketch.getTotalItemsArray(); - final int[] srcLevels = sketch.levelsArr; - final int srcNumLevels = sketch.getNumLevels(); - this.comparator = sketch.comparator; - this.maxItem = sketch.getMaxItem(); - this.minItem = sketch.getMinItem(); - this.clazz = (Class)sketch.serDe.getClassOfT(); - - if (totalN == 0) { throw new SketchesArgumentException(EMPTY_MSG); } - if (!sketch.isLevelZeroSorted()) { - Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1], comparator); - if (!sketch.hasMemory()) { sketch.setLevelZeroSorted(true); } - } - - final int numQuantiles = srcLevels[srcNumLevels] - srcLevels[0]; //remove garbage - quantiles = (T[]) Array.newInstance(sketch.serDe.getClassOfT(), numQuantiles); - cumWeights = new long[numQuantiles]; - populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); - } - //end of constructors @Override @@ -253,106 +218,4 @@ public GenericSortedViewIterator iterator() { return new GenericSortedViewIterator<>(quantiles, cumWeights); } - //restricted methods - - private void populateFromSketch(final Object[] srcQuantiles, final int[] srcLevels, - final int srcNumLevels, final int numItems) { - final int[] myLevels = new int[srcNumLevels + 1]; - final int offset = srcLevels[0]; - System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); - int srcLevel = 0; - int dstLevel = 0; - long weight = 1; - while (srcLevel < srcNumLevels) { - final int fromIndex = srcLevels[srcLevel] - offset; - final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive - if (fromIndex < toIndex) { // if equal, skip empty level - Arrays.fill(cumWeights, fromIndex, toIndex, weight); - myLevels[dstLevel] = fromIndex; - myLevels[dstLevel + 1] = toIndex; - dstLevel++; - } - srcLevel++; - weight *= 2; - } - final int numLevels = dstLevel; - blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels, comparator); //create unit weights - KllHelper.convertToCumulative(cumWeights); - } - - private static void blockyTandemMergeSort(final Object[] quantiles, final long[] weights, - final int[] levels, final int numLevels, final Comparator comp) { - if (numLevels == 1) { return; } - - // duplicate the input in preparation for the "ping-pong" copy reduction strategy. - final Object[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); - final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one here - - blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels, comp); - } - - private static void blockyTandemMergeSortRecursion( - final Object[] quantilesSrc, final long[] weightsSrc, - final Object[] quantilesDst, final long[] weightsDst, - final int[] levels, final int startingLevel, final int numLevels, final Comparator comp) { - if (numLevels == 1) { return; } - final int numLevels1 = numLevels / 2; - final int numLevels2 = numLevels - numLevels1; - assert numLevels1 >= 1; - assert numLevels2 >= numLevels1; - final int startingLevel1 = startingLevel; - final int startingLevel2 = startingLevel + numLevels1; - // swap roles of src and dst - blockyTandemMergeSortRecursion( - quantilesDst, weightsDst, - quantilesSrc, weightsSrc, - levels, startingLevel1, numLevels1, comp); - blockyTandemMergeSortRecursion( - quantilesDst, weightsDst, - quantilesSrc, weightsSrc, - levels, startingLevel2, numLevels2, comp); - tandemMerge( - quantilesSrc, weightsSrc, - quantilesDst, weightsDst, - levels, - startingLevel1, numLevels1, - startingLevel2, numLevels2, comp); - } - - @SuppressWarnings("unchecked") - private static void tandemMerge( - final Object[] quantilesSrc, final long[] weightsSrc, - final Object[] quantilesDst, final long[] weightsDst, - final int[] levelStarts, - final int startingLevel1, final int numLevels1, - final int startingLevel2, final int numLevels2, final Comparator comp) { - final int fromIndex1 = levelStarts[startingLevel1]; - final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive - final int fromIndex2 = levelStarts[startingLevel2]; - final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive - int iSrc1 = fromIndex1; - int iSrc2 = fromIndex2; - int iDst = fromIndex1; - - while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { - if (Util.lt((T) quantilesSrc[iSrc1], (T) quantilesSrc[iSrc2], comp)) { - quantilesDst[iDst] = quantilesSrc[iSrc1]; - weightsDst[iDst] = weightsSrc[iSrc1]; - iSrc1++; - } else { - quantilesDst[iDst] = quantilesSrc[iSrc2]; - weightsDst[iDst] = weightsSrc[iSrc2]; - iSrc2++; - } - iDst++; - } - if (iSrc1 < toIndex1) { - System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); - System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); - } else if (iSrc2 < toIndex2) { - System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); - System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); - } - } - } diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java index 54f83f188..1c5913b75 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java @@ -22,6 +22,7 @@ import static org.apache.datasketches.quantiles.PreambleUtil.COMBINED_BUFFER; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; /** * This allows access to package-private levels and data in whatever quantiles sketch you give @@ -39,18 +40,30 @@ abstract class DoublesSketchAccessor extends DoublesBufferAccessor { int numItems_; int offset_; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J + DoublesSketchAccessor( + final DoublesSketch ds, + final boolean forceSize, + final int level) { + this(checkLvl(level), ds, forceSize, level); } - DoublesSketchAccessor(final DoublesSketch ds, final boolean forceSize, final int level) { + private DoublesSketchAccessor( + final boolean secure, + final DoublesSketch ds, + final boolean forceSize, + final int level) { ds_ = ds; forceSize_ = forceSize; - setLevel(level); } + private static final boolean checkLvl(final int level) { + if (level != BB_LVL_IDX && level < 0) { + throw new SketchesArgumentException("Parameter level is < 0."); + } + return true; + } + static DoublesSketchAccessor wrap(final DoublesSketch ds) { return wrap(ds, false); } @@ -71,7 +84,6 @@ DoublesSketchAccessor setLevel(final int lvl) { numItems_ = (forceSize_ ? ds_.getK() * 2 : ds_.getBaseBufferCount()); offset_ = (ds_.hasMemory() ? COMBINED_BUFFER : 0); } else { - assert lvl >= 0; if (((ds_.getBitPattern() & (1L << lvl)) > 0) || forceSize_) { numItems_ = ds_.getK(); } else { diff --git a/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java b/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java index 10fb3f717..87383ae1f 100644 --- a/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java +++ b/src/main/java/org/apache/datasketches/quantiles/HeapDoublesSketchAccessor.java @@ -24,7 +24,7 @@ /** * @author Jon Malkin */ -class HeapDoublesSketchAccessor extends DoublesSketchAccessor { +final class HeapDoublesSketchAccessor extends DoublesSketchAccessor { HeapDoublesSketchAccessor(final DoublesSketch ds, final boolean forceSize, final int level) { diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java index 6b247347a..8a6b7831f 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java @@ -36,7 +36,9 @@ import static org.apache.datasketches.quantiles.PreambleUtil.extractN; import static org.apache.datasketches.quantiles.PreambleUtil.extractPreLongs; import static org.apache.datasketches.quantiles.PreambleUtil.extractSerVer; +//import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import java.lang.reflect.Array; import java.util.Arrays; import java.util.Comparator; import java.util.Objects; @@ -44,6 +46,7 @@ import org.apache.datasketches.common.ArrayOfItemsSerDe; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; @@ -545,13 +548,6 @@ public void update(final T item) { // Restricted - private final ItemsSketchSortedView refreshSortedView() { - final ItemsSketchSortedView sv = (classicQisSV == null) - ? classicQisSV = new ItemsSketchSortedView<>(this) - : classicQisSV; - return sv; - } - /** * Returns the base buffer count * @return the base buffer count @@ -626,4 +622,85 @@ private static void growBaseBuffer(final ItemsSketch sketch) { sketch.combinedBuffer_ = Arrays.copyOf(baseBuffer, newSize); } + private final ItemsSketchSortedView refreshSortedView() { + if (classicQisSV == null) { + final CreateSortedView csv = new CreateSortedView(); + classicQisSV = csv.getSV(); + } + return classicQisSV; + } + + @SuppressWarnings({"rawtypes", "unchecked"}) + private final class CreateSortedView { + final long n = getN(); + final int numQuantiles = getNumRetained(); + T[] quantiles = (T[]) Array.newInstance(clazz, numQuantiles); + long[] cumWeights = new long[numQuantiles]; + final int k = getK(); + + final T[] combinedBuffer = (T[]) getCombinedBuffer(); + final int baseBufferCount = getBaseBufferCount(); + final Comparator comparator = ItemsSketch.this.comparator_; + + ItemsSketchSortedView getSV() { + long weight = 1; + int index = 0; + long bits = getBitPattern(); + assert bits == (n / (2L * k)); // internal consistency check + for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) { + weight *= 2; + if ((bits & 1L) > 0L) { + final int offset = (2 + lvl) * k; + for (int i = 0; i < k; i++) { + quantiles[index] = combinedBuffer[i + offset]; + cumWeights[index] = weight; + index++; + } + } + } + + weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer + final int startOfBaseBufferBlock = index; + + // Copy BaseBuffer over, along with weight = 1 + for (int i = 0; i < baseBufferCount; i++) { + quantiles[index] = combinedBuffer[i]; + cumWeights[index] = weight; + index++; + } + assert index == numQuantiles; + + // Must sort the items that came from the base buffer. + // Don't need to sort the corresponding weights because they are all the same. + Arrays.sort(quantiles, startOfBaseBufferBlock, numQuantiles, comparator); + + // Sort the first "numSamples" slots of the two arrays in tandem, + // taking advantage of the already sorted blocks of length k + ItemsMergeImpl.blockyTandemMergeSort(quantiles, cumWeights, numQuantiles, k, comparator); + + if (convertToCumulative(cumWeights) != n) { + throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); + } + + return new ItemsSketchSortedView( + quantiles, cumWeights, getN(), comparator, getMaxItem(), getMinItem(), getK()); + } + + } + + /** + * Convert the individual weights into cumulative weights. + * An array of {1,1,1,1} becomes {1,2,3,4} + * @param array of actual weights from the sketch, none of the weights may be zero + * @return total weight + */ + private static long convertToCumulative(final long[] array) { + long subtotal = 0; + for (int i = 0; i < array.length; i++) { + final long newSubtotal = subtotal + array[i]; + subtotal = array[i] = newSubtotal; + } + return subtotal; + } + } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index 827d9d086..e9117b93f 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -26,11 +26,11 @@ import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.lang.reflect.Array; -import java.util.Arrays; +//import java.util.Arrays; import java.util.Comparator; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.common.SketchesStateException; +//import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView; @@ -56,13 +56,8 @@ public class ItemsSketchSortedView implements GenericSortedView, Partition private final T minItem; private final Class clazz; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** - * Construct from elements for testing. + * Construct from elements, also used in testing. * @param quantiles sorted array of quantiles * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. @@ -85,39 +80,39 @@ protected final void finalize() { this.clazz = (Class)quantiles[0].getClass(); } - /** - * Constructs this Sorted View given the sketch - * @param sketch the given Classic Quantiles ItemsSketch - */ - @SuppressWarnings("unchecked") - ItemsSketchSortedView(final ItemsSketch sketch) { - if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } - this.totalN = sketch.getN(); - final int k = sketch.getK(); - final int numQuantiles = sketch.getNumRetained(); - this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); - this.minItem = sketch.minItem_; - this.maxItem = sketch.maxItem_; - cumWeights = new long[numQuantiles]; - comparator = sketch.getComparator(); - clazz = sketch.clazz; - - final Object[] combinedBuffer = sketch.getCombinedBuffer(); - final int baseBufferCount = sketch.getBaseBufferCount(); - - // Populate from ItemsSketch: - // copy over the "levels" and then the base buffer, all with appropriate weights - populateFromItemsSketch(k, totalN, sketch.getBitPattern(), (T[]) combinedBuffer, baseBufferCount, - numQuantiles, quantiles, cumWeights, sketch.getComparator()); - - // Sort the first "numSamples" slots of the two arrays in tandem, - // taking advantage of the already sorted blocks of length k - ItemsMergeImpl.blockyTandemMergeSort(quantiles, cumWeights, numQuantiles, k, sketch.getComparator()); - - if (convertToCumulative(cumWeights) != totalN) { - throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); - } - } +// /** +// * Constructs this Sorted View given the sketch +// * @param sketch the given Classic Quantiles ItemsSketch +// */ +// @SuppressWarnings("unchecked") +// ItemsSketchSortedView(final ItemsSketch sketch) { +// if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } +// this.totalN = sketch.getN(); +// final int numQuantiles = sketch.getNumRetained(); +// this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); +// this.minItem = sketch.minItem_; +// this.maxItem = sketch.maxItem_; +// this.cumWeights = new long[numQuantiles]; +// this.comparator = sketch.getComparator(); +// this.clazz = sketch.clazz; +// this.k = sketch.getK(); +// +// final Object[] combinedBuffer = sketch.getCombinedBuffer(); +// final int baseBufferCount = sketch.getBaseBufferCount(); +// +// // Populate from ItemsSketch: +// // copy over the "levels" and then the base buffer, all with appropriate weights +// populateFromItemsSketch(k, totalN, sketch.getBitPattern(), (T[]) combinedBuffer, baseBufferCount, +// numQuantiles, quantiles, cumWeights, sketch.getComparator()); +// +// // Sort the first "numSamples" slots of the two arrays in tandem, +// // taking advantage of the already sorted blocks of length k +// ItemsMergeImpl.blockyTandemMergeSort(quantiles, cumWeights, numQuantiles, k, sketch.getComparator()); +// +// if (convertToCumulative(cumWeights) != totalN) { +// throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); +// } +// } //end of constructors @@ -256,68 +251,68 @@ public GenericSortedViewIterator iterator() { //restricted methods - /** - * Populate the arrays and registers from an ItemsSketch - * @param the data type - * @param k K parameter of sketch - * @param n The current size of the stream - * @param bitPattern the bit pattern for valid log levels - * @param combinedBuffer the combined buffer reference - * @param baseBufferCount the count of the base buffer - * @param numQuantiles number of retained quantiles in the sketch - * @param quantilesArr the consolidated array of all quantiles from the sketch - * @param weightsArr the weights for each item from the sketch - * @param comparator the given comparator for data type T - */ - private final static void populateFromItemsSketch( - final int k, final long n, final long bitPattern, final T[] combinedBuffer, - final int baseBufferCount, final int numQuantiles, final T[] quantilesArr, final long[] weightsArr, - final Comparator comparator) { - long weight = 1; - int nxt = 0; - long bits = bitPattern; - assert bits == (n / (2L * k)); // internal consistency check - for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) { - weight *= 2; - if ((bits & 1L) > 0L) { - final int offset = (2 + lvl) * k; - for (int i = 0; i < k; i++) { - quantilesArr[nxt] = combinedBuffer[i + offset]; - weightsArr[nxt] = weight; - nxt++; - } - } - } - - weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer - final int startOfBaseBufferBlock = nxt; - - // Copy BaseBuffer over, along with weight = 1 - for (int i = 0; i < baseBufferCount; i++) { - quantilesArr[nxt] = combinedBuffer[i]; - weightsArr[nxt] = weight; - nxt++; - } - assert nxt == numQuantiles; - - // Must sort the items that came from the base buffer. - // Don't need to sort the corresponding weights because they are all the same. - Arrays.sort(quantilesArr, startOfBaseBufferBlock, numQuantiles, comparator); - } - - /** - * Convert the individual weights into cumulative weights. - * An array of {1,1,1,1} becomes {1,2,3,4} - * @param array of actual weights from the sketch, none of the weights may be zero - * @return total weight - */ - private static long convertToCumulative(final long[] array) { - long subtotal = 0; - for (int i = 0; i < array.length; i++) { - final long newSubtotal = subtotal + array[i]; - subtotal = array[i] = newSubtotal; - } - return subtotal; - } +// /** +// * Populate the arrays and registers from an ItemsSketch +// * @param the data type +// * @param k K parameter of sketch +// * @param n The current size of the stream +// * @param bitPattern the bit pattern for valid log levels +// * @param combinedBuffer the combined buffer reference +// * @param baseBufferCount the count of the base buffer +// * @param numQuantiles number of retained quantiles in the sketch +// * @param quantilesArr the consolidated array of all quantiles from the sketch +// * @param weightsArr the weights for each item from the sketch +// * @param comparator the given comparator for data type T +// */ +// private final static void populateFromItemsSketch( +// final int k, final long n, final long bitPattern, final T[] combinedBuffer, +// final int baseBufferCount, final int numQuantiles, final T[] quantilesArr, final long[] weightsArr, +// final Comparator comparator) { +// long weight = 1; +// int nxt = 0; +// long bits = bitPattern; +// assert bits == (n / (2L * k)); // internal consistency check +// for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) { +// weight *= 2; +// if ((bits & 1L) > 0L) { +// final int offset = (2 + lvl) * k; +// for (int i = 0; i < k; i++) { +// quantilesArr[nxt] = combinedBuffer[i + offset]; +// weightsArr[nxt] = weight; +// nxt++; +// } +// } +// } +// +// weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer +// final int startOfBaseBufferBlock = nxt; +// +// // Copy BaseBuffer over, along with weight = 1 +// for (int i = 0; i < baseBufferCount; i++) { +// quantilesArr[nxt] = combinedBuffer[i]; +// weightsArr[nxt] = weight; +// nxt++; +// } +// assert nxt == numQuantiles; +// +// // Must sort the items that came from the base buffer. +// // Don't need to sort the corresponding weights because they are all the same. +// Arrays.sort(quantilesArr, startOfBaseBufferBlock, numQuantiles, comparator); +// } +// +// /** +// * Convert the individual weights into cumulative weights. +// * An array of {1,1,1,1} becomes {1,2,3,4} +// * @param array of actual weights from the sketch, none of the weights may be zero +// * @return total weight +// */ +// private static long convertToCumulative(final long[] array) { +// long subtotal = 0; +// for (int i = 0; i < array.length; i++) { +// final long newSubtotal = subtotal + array[i]; +// subtotal = array[i] = newSubtotal; +// } +// return subtotal; +// } } diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java index 9cba9fbd6..c89a4aa25 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericPartitionBoundaries.java @@ -27,7 +27,7 @@ /** * Implements PartitionBoundaries */ -public class GenericPartitionBoundaries implements PartitionBoundaries { +final public class GenericPartitionBoundaries implements PartitionBoundaries { private long totalN; //totalN of source sketch private T[] boundaries; //quantiles at the boundaries private long[] natRanks; //natural ranks at the boundaries @@ -39,11 +39,6 @@ public class GenericPartitionBoundaries implements PartitionBoundaries { private long[] numDeltaItems; //num of items in each part private int numPartitions; //num of partitions - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - public GenericPartitionBoundaries( final long totalN, final T[] boundaries, diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index 27174b522..33efe064e 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -56,6 +56,7 @@ import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.MemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; @@ -81,11 +82,6 @@ private DirectQuickSelectSketch( super(seed, wmem); } - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Construct a new sketch instance and initialize the given Memory as its backing store. * @@ -111,8 +107,29 @@ protected final void finalize() { final MemoryRequestServer memReqSvr, final WritableMemory dstMem, final boolean unionGadget) { - super(seed, dstMem); + this( + checkMemSize(lgNomLongs, rf, dstMem, unionGadget), + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J + lgNomLongs, + seed, + p, + rf, + memReqSvr, + dstMem, + unionGadget); + } + private DirectQuickSelectSketch( + final boolean secure, + final int lgNomLongs, + final long seed, + final float p, + final ResizeFactor rf, + final MemoryRequestServer memReqSvr, + final WritableMemory dstMem, + final boolean unionGadget) { + super(seed, dstMem); //Choose family, preambleLongs final Family family; final int preambleLongs; @@ -128,14 +145,6 @@ protected final void finalize() { //Choose RF, minReqBytes, lgArrLongs. final int lgRF = rf.lg(); final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; - final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); - - //Make sure Memory is large enough - final long curMemCapBytes = dstMem.getCapacity(); - if (curMemCapBytes < minReqBytes) { - throw new SketchesArgumentException( - "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); - } //@formatter:off //Build preamble @@ -164,6 +173,20 @@ protected final void finalize() { memReqSvr_ = memReqSvr; } + private static final boolean checkMemSize( + final int lgNomLongs, final ResizeFactor rf, final Memory dstMem, final boolean unionGadget) { + final int preambleLongs = (unionGadget) ? Family.UNION.getMinPreLongs() : Family.QUICKSELECT.getMinPreLongs(); + final int lgRF = rf.lg(); + final int lgArrLongs = (lgRF == 0) ? lgNomLongs + 1 : ThetaUtil.MIN_LG_ARR_LONGS; + final int minReqBytes = getMemBytes(lgArrLongs, preambleLongs); + final long curMemCapBytes = dstMem.getCapacity(); + if (curMemCapBytes < minReqBytes) { + throw new SketchesArgumentException( + "Memory capacity is too small: " + curMemCapBytes + " < " + minReqBytes); + } + return true; + } + /** * Wrap a sketch around the given source Memory containing sketch data that originated from * this sketch. diff --git a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java index c77a28004..ec58a5e76 100644 --- a/src/main/java/org/apache/datasketches/tuple/CompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/CompactSketch.java @@ -39,23 +39,18 @@ * * @param type of Summary */ -public class CompactSketch extends Sketch { +public final class CompactSketch extends Sketch { private static final byte serialVersionWithSummaryClassNameUID = 1; private static final byte serialVersionUIDLegacy = 2; private static final byte serialVersionUID = 3; private static final short defaultSeedHash = (short) 37836; // for compatibility with C++ - private long[] hashArr_; + private final long[] hashArr_; private S[] summaryArr_; private enum FlagsLegacy { IS_BIG_ENDIAN, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } private enum Flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED } - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Create a CompactSketch from correct components * @param hashArr compacted hash array @@ -64,10 +59,11 @@ protected final void finalize() { * @param empty empty flag */ CompactSketch(final long[] hashArr, final S[] summaryArr, final long thetaLong, final boolean empty) { + super(thetaLong, empty, null); + super.thetaLong_ = thetaLong; + super.empty_ = empty; hashArr_ = hashArr; summaryArr_ = summaryArr; - thetaLong_ = thetaLong; - empty_ = empty; } /** @@ -77,6 +73,7 @@ protected final void finalize() { * @param deserializer the SummaryDeserializer */ CompactSketch(final Memory mem, final SummaryDeserializer deserializer) { + super(Long.MAX_VALUE, true, null); int offset = 0; final byte preambleLongs = mem.getByte(offset++); final byte version = mem.getByte(offset++); @@ -114,6 +111,7 @@ protected final void finalize() { offset += classNameLength; } hashArr_ = new long[count]; + for (int i = 0; i < count; i++) { hashArr_[i] = mem.getLong(offset); offset += Long.BYTES; @@ -121,6 +119,9 @@ protected final void finalize() { for (int i = 0; i < count; i++) { offset += readSummary(mem, offset, i, count, deserializer); } + } else { + hashArr_ = new long[0]; + summaryArr_ = null; } } else { // current serial format offset++; //skip unused byte @@ -143,6 +144,7 @@ protected final void finalize() { } } hashArr_ = new long[count]; + for (int i = 0; i < count; i++) { hashArr_[i] = mem.getLong(offset); offset += Long.BYTES; diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java index 2cc0d989e..59e27a429 100644 --- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java @@ -47,24 +47,18 @@ class QuickSelectSketch extends Sketch { private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED } - static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg(); + private static final int DEFAULT_LG_RESIZE_FACTOR = ResizeFactor.X8.lg(); private final int nomEntries_; - private int lgCurrentCapacity_; private final int lgResizeFactor_; - private int count_; private final float samplingProbability_; + private int lgCurrentCapacity_; + private int retEntries_; private int rebuildThreshold_; private long[] hashTable_; S[] summaryTable_; - @Override - protected final void finalize() throws Throwable { - super.finalize(); - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** - * This is to create an instance of a QuickSelectSketch with default resize factor. + * This is to create a new instance of a QuickSelectSketch with default resize factor. * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than * given value. * @param summaryFactory An instance of a SummaryFactory. @@ -76,7 +70,7 @@ protected final void finalize() throws Throwable { } /** - * This is to create an instance of a QuickSelectSketch with custom resize factor + * This is to create a new instance of a QuickSelectSketch with custom resize factor * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than * given value. * @param lgResizeFactor log2(resizeFactor) - value from 0 to 3: @@ -96,7 +90,7 @@ protected final void finalize() throws Throwable { } /** - * This is to create an instance of a QuickSelectSketch with custom resize factor and sampling + * This is to create a new instance of a QuickSelectSketch with custom resize factor and sampling * probability * @param nomEntries Nominal number of entries. Forced to the nearest power of 2 greater than * or equal to the given value. @@ -124,38 +118,50 @@ protected final void finalize() throws Throwable { ); } - QuickSelectSketch( + /** + * Target constructor for above constructors for a new instance. + * @param nomEntries Nominal number of entries. + * @param lgResizeFactor log2(resizeFactor) + * @param samplingProbability the given sampling probability + * @param summaryFactory An instance of a SummaryFactory. + * @param startingSize starting size of the sketch. + */ + private QuickSelectSketch( final int nomEntries, final int lgResizeFactor, final float samplingProbability, final SummaryFactory summaryFactory, final int startingSize) { + super( + (long) (Long.MAX_VALUE * (double) samplingProbability), + true, + summaryFactory); nomEntries_ = ceilingIntPowerOf2(nomEntries); lgResizeFactor_ = lgResizeFactor; samplingProbability_ = samplingProbability; - summaryFactory_ = summaryFactory; //super - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability); //super lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingSize); - hashTable_ = new long[startingSize]; + retEntries_ = 0; + hashTable_ = new long[startingSize]; //must be before setRebuildThreshold + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); summaryTable_ = null; // wait for the first summary to call Array.newInstance() - setRebuildThreshold(); } /** * Copy constructor - * @param sketch the QuickSelectSketch to be copied. + * @param sketch the QuickSelectSketch to be deep copied. */ QuickSelectSketch(final QuickSelectSketch sketch) { + super( + sketch.thetaLong_, + sketch.empty_, + sketch.summaryFactory_); nomEntries_ = sketch.nomEntries_; - lgCurrentCapacity_ = sketch.lgCurrentCapacity_; lgResizeFactor_ = sketch.lgResizeFactor_; - count_ = sketch.count_; samplingProbability_ = sketch.samplingProbability_; - rebuildThreshold_ = sketch.rebuildThreshold_; - thetaLong_ = sketch.thetaLong_; - empty_ = sketch.empty_; - summaryFactory_ = sketch.summaryFactory_; + lgCurrentCapacity_ = sketch.lgCurrentCapacity_; + retEntries_ = sketch.retEntries_; hashTable_ = sketch.hashTable_.clone(); + rebuildThreshold_ = sketch.rebuildThreshold_; summaryTable_ = Util.copySummaryArray(sketch.summaryTable_); } @@ -164,75 +170,128 @@ protected final void finalize() throws Throwable { * @param mem Memory object with serialized QuickSelectSketch * @param deserializer the SummaryDeserializer * @param summaryFactory the SummaryFactory - * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. */ - @Deprecated QuickSelectSketch( final Memory mem, final SummaryDeserializer deserializer, final SummaryFactory summaryFactory) { - Objects.requireNonNull(mem, "SourceMemory must not be null."); - Objects.requireNonNull(deserializer, "Deserializer must not be null."); - checkBounds(0, 8, mem.getCapacity()); - summaryFactory_ = summaryFactory; - int offset = 0; - final byte preambleLongs = mem.getByte(offset++); //byte 0 PreLongs - final byte version = mem.getByte(offset++); //byte 1 SerVer - final byte familyId = mem.getByte(offset++); //byte 2 FamID - SerializerDeserializer.validateFamily(familyId, preambleLongs); - if (version > serialVersionUID) { - throw new SketchesArgumentException( - "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " - + version); - } - SerializerDeserializer.validateType(mem.getByte(offset++), //byte 3 - SerializerDeserializer.SketchType.QuickSelectSketch); - final byte flags = mem.getByte(offset++); //byte 4 - final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0; - if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { - throw new SketchesArgumentException("Endian byte order mismatch"); - } - nomEntries_ = 1 << mem.getByte(offset++); //byte 5 - lgCurrentCapacity_ = mem.getByte(offset++); //byte 6 - lgResizeFactor_ = mem.getByte(offset++); //byte 7 - - checkBounds(0, preambleLongs * 8L, mem.getCapacity()); - final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0; - samplingProbability_ = isInSamplingMode ? mem.getFloat(offset) : 1f; //bytes 8 - 11 - if (isInSamplingMode) { - offset += Float.BYTES; - } + this(new Validate<>(), mem, deserializer, summaryFactory); + } - final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0; - if (isThetaIncluded) { - thetaLong_ = mem.getLong(offset); - offset += Long.BYTES; - } else { - thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); - } + /* + * This private constructor is used to protect against "Finalizer attacks". + * The private static inner class Validate performs validation and deserialization + * from the input Memory and may throw exceptions. In order to protect against the attack, we must + * perform this validation prior to the constructor's super reaches the Object class. + * Making QuickSelectSketch final won't work here because UpdatableSketch is a subclass. + * Using an empty final finalizer() is not recommended and is deprecated as of Java9. + */ + private QuickSelectSketch( + final Validate val, + final Memory mem, + final SummaryDeserializer deserializer, + final SummaryFactory summaryFactory) { + super(val.validate(mem, deserializer), val.myEmpty, summaryFactory); + nomEntries_ = val.myNomEntries; + lgResizeFactor_ = val.myLgResizeFactor; + samplingProbability_ = val.mySamplingProbability; + lgCurrentCapacity_ = val.myLgCurrentCapacity; + retEntries_ = val.myRetEntries; + rebuildThreshold_ = val.myRebuildThreshold; + hashTable_ = val.myHashTable; + summaryTable_ = val.mySummaryTable; + } + + private static final class Validate { + //super fields + long myThetaLong; + boolean myEmpty; + //this fields + int myNomEntries; + int myLgResizeFactor; + float mySamplingProbability; + int myLgCurrentCapacity; + int myRetEntries; + int myRebuildThreshold; + long[] myHashTable; + S[] mySummaryTable; + + @SuppressWarnings("unchecked") + long validate( + final Memory mem, + final SummaryDeserializer deserializer) { + Objects.requireNonNull(mem, "SourceMemory must not be null."); + Objects.requireNonNull(deserializer, "Deserializer must not be null."); + checkBounds(0, 8, mem.getCapacity()); + + int offset = 0; + final byte preambleLongs = mem.getByte(offset++); //byte 0 PreLongs + final byte version = mem.getByte(offset++); //byte 1 SerVer + final byte familyId = mem.getByte(offset++); //byte 2 FamID + SerializerDeserializer.validateFamily(familyId, preambleLongs); + if (version > serialVersionUID) { + throw new SketchesArgumentException( + "Unsupported serial version. Expected: " + serialVersionUID + " or lower, actual: " + + version); + } + SerializerDeserializer.validateType(mem.getByte(offset++), //byte 3 + SerializerDeserializer.SketchType.QuickSelectSketch); + final byte flags = mem.getByte(offset++); //byte 4 + final boolean isBigEndian = (flags & 1 << Flags.IS_BIG_ENDIAN.ordinal()) > 0; + if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { + throw new SketchesArgumentException("Endian byte order mismatch"); + } + myNomEntries = 1 << mem.getByte(offset++); //byte 5 + myLgCurrentCapacity = mem.getByte(offset++); //byte 6 + myLgResizeFactor = mem.getByte(offset++); //byte 7 + + checkBounds(0, preambleLongs * 8L, mem.getCapacity()); + final boolean isInSamplingMode = (flags & 1 << Flags.IS_IN_SAMPLING_MODE.ordinal()) > 0; + mySamplingProbability = isInSamplingMode ? mem.getFloat(offset) : 1f; //bytes 8 - 11 + if (isInSamplingMode) { + offset += Float.BYTES; + } - int count = 0; - final boolean hasEntries = (flags & 1 << Flags.HAS_ENTRIES.ordinal()) > 0; - if (hasEntries) { - count = mem.getInt(offset); - offset += Integer.BYTES; - } - final int currentCapacity = 1 << lgCurrentCapacity_; - hashTable_ = new long[currentCapacity]; - for (int i = 0; i < count; i++) { - final long hash = mem.getLong(offset); - offset += Long.BYTES; - final Memory memRegion = mem.region(offset, mem.getCapacity() - offset); - final DeserializeResult summaryResult = deserializer.heapifySummary(memRegion); - final S summary = summaryResult.getObject(); - offset += summaryResult.getSize(); - insert(hash, summary); + final boolean isThetaIncluded = (flags & 1 << Flags.IS_THETA_INCLUDED.ordinal()) > 0; + if (isThetaIncluded) { + myThetaLong = mem.getLong(offset); + offset += Long.BYTES; + } else { + myThetaLong = (long) (Long.MAX_VALUE * (double) mySamplingProbability); + } + + int count = 0; + final boolean hasEntries = (flags & (1 << Flags.HAS_ENTRIES.ordinal())) > 0; + if (hasEntries) { + count = mem.getInt(offset); + offset += Integer.BYTES; + } + final int currentCapacity = 1 << myLgCurrentCapacity; + myHashTable = new long[currentCapacity]; + for (int i = 0; i < count; i++) { + final long hash = mem.getLong(offset); + offset += Long.BYTES; + final Memory memRegion = mem.region(offset, mem.getCapacity() - offset); + final DeserializeResult summaryResult = deserializer.heapifySummary(memRegion); + final S summary = (S) summaryResult.getObject(); + offset += summaryResult.getSize(); + //in-place equivalent to insert(hash, summary): + final int index = HashOperations.hashInsertOnly(myHashTable, myLgCurrentCapacity, hash); + if (mySummaryTable == null) { + mySummaryTable = (S[]) Array.newInstance(summary.getClass(), myHashTable.length); + } + mySummaryTable[index] = summary; + myRetEntries++; + myEmpty = false; + } + myEmpty = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; + myRebuildThreshold = setRebuildThreshold(myHashTable, myNomEntries); + return myThetaLong; } - empty_ = (flags & 1 << Flags.IS_EMPTY.ordinal()) > 0; - setRebuildThreshold(); - } + + } //end class Validate /** * @return a deep copy of this sketch @@ -247,7 +306,7 @@ long[] getHashTable() { @Override public int getRetainedEntries() { - return count_; + return retEntries_; } @Override @@ -303,7 +362,7 @@ public ResizeFactor getResizeFactor() { * Rebuilds reducing the actual number of entries to the nominal number of entries if needed */ public void trim() { - if (count_ > nomEntries_) { + if (retEntries_ > nomEntries_) { updateTheta(); resize(hashTable_.length); } @@ -314,13 +373,13 @@ public void trim() { */ public void reset() { empty_ = true; - count_ = 0; + retEntries_ = 0; thetaLong_ = (long) (Long.MAX_VALUE * (double) samplingProbability_); final int startingCapacity = Util.getStartingCapacity(nomEntries_, lgResizeFactor_); lgCurrentCapacity_ = Integer.numberOfTrailingZeros(startingCapacity); hashTable_ = new long[startingCapacity]; summaryTable_ = null; // wait for the first summary to call Array.newInstance() - setRebuildThreshold(); + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); } /** @@ -364,8 +423,8 @@ public CompactSketch compact() { public byte[] toByteArray() { byte[][] summariesBytes = null; int summariesBytesLength = 0; - if (count_ > 0) { - summariesBytes = new byte[count_][]; + if (retEntries_ > 0) { + summariesBytes = new byte[retEntries_][]; int i = 0; for (int j = 0; j < summaryTable_.length; j++) { if (summaryTable_[j] != null) { @@ -392,10 +451,10 @@ public byte[] toByteArray() { if (isThetaIncluded) { sizeBytes += Long.BYTES; } - if (count_ > 0) { + if (retEntries_ > 0) { sizeBytes += Integer.BYTES; // count } - sizeBytes += Long.BYTES * count_ + summariesBytesLength; + sizeBytes += Long.BYTES * retEntries_ + summariesBytesLength; final byte[] bytes = new byte[sizeBytes]; int offset = 0; bytes[offset++] = PREAMBLE_LONGS; @@ -407,7 +466,7 @@ public byte[] toByteArray() { (isBigEndian ? 1 << Flags.IS_BIG_ENDIAN.ordinal() : 0) | (isInSamplingMode() ? 1 << Flags.IS_IN_SAMPLING_MODE.ordinal() : 0) | (empty_ ? 1 << Flags.IS_EMPTY.ordinal() : 0) - | (count_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) + | (retEntries_ > 0 ? 1 << Flags.HAS_ENTRIES.ordinal() : 0) | (isThetaIncluded ? 1 << Flags.IS_THETA_INCLUDED.ordinal() : 0) ); bytes[offset++] = (byte) Integer.numberOfTrailingZeros(nomEntries_); @@ -421,11 +480,11 @@ public byte[] toByteArray() { ByteArrayUtil.putLongLE(bytes, offset, thetaLong_); offset += Long.BYTES; } - if (count_ > 0) { - ByteArrayUtil.putIntLE(bytes, offset, count_); + if (retEntries_ > 0) { + ByteArrayUtil.putIntLE(bytes, offset, retEntries_); offset += Integer.BYTES; } - if (count_ > 0) { + if (retEntries_ > 0) { int i = 0; for (int j = 0; j < hashTable_.length; j++) { if (summaryTable_[j] != null) { @@ -473,13 +532,13 @@ void setEmpty(final boolean value) { int findOrInsert(final long hash) { final int index = HashOperations.hashSearchOrInsert(hashTable_, lgCurrentCapacity_, hash); if (index < 0) { - count_++; + retEntries_++; } return index; } boolean rebuildIfNeeded() { - if (count_ <= rebuildThreshold_) { + if (retEntries_ <= rebuildThreshold_) { return false; } if (hashTable_.length > nomEntries_) { @@ -498,12 +557,12 @@ void rebuild() { void insert(final long hash, final S summary) { final int index = HashOperations.hashInsertOnly(hashTable_, lgCurrentCapacity_, hash); insertSummary(index, summary); - count_++; + retEntries_++; empty_ = false; } private void updateTheta() { - final long[] hashArr = new long[count_]; + final long[] hashArr = new long[retEntries_]; int i = 0; //Because of the association of the hashTable with the summaryTable we cannot destroy the // hashTable structure. So we must copy. May as well compact at the same time. @@ -514,7 +573,7 @@ private void updateTheta() { hashArr[i++] = hashTable_[j]; } } - thetaLong_ = QuickSelect.select(hashArr, 0, count_ - 1, nomEntries_); + thetaLong_ = QuickSelect.select(hashArr, 0, retEntries_ - 1, nomEntries_); } private void resize(final int newSize) { @@ -523,20 +582,20 @@ private void resize(final int newSize) { hashTable_ = new long[newSize]; summaryTable_ = Util.newSummaryArray(summaryTable_, newSize); lgCurrentCapacity_ = Integer.numberOfTrailingZeros(newSize); - count_ = 0; + retEntries_ = 0; for (int i = 0; i < oldHashTable.length; i++) { if (oldSummaryTable[i] != null && oldHashTable[i] < thetaLong_) { insert(oldHashTable[i], oldSummaryTable[i]); } } - setRebuildThreshold(); + rebuildThreshold_ = setRebuildThreshold(hashTable_, nomEntries_); } - private void setRebuildThreshold() { - if (hashTable_.length > nomEntries_) { - rebuildThreshold_ = (int) (hashTable_.length * ThetaUtil.REBUILD_THRESHOLD); + private static int setRebuildThreshold(final long[] hashTable, final int nomEntries) { + if (hashTable.length > nomEntries) { + return (int) (hashTable.length * ThetaUtil.REBUILD_THRESHOLD); } else { - rebuildThreshold_ = (int) (hashTable_.length * ThetaUtil.RESIZE_THRESHOLD); + return (int) (hashTable.length * ThetaUtil.RESIZE_THRESHOLD); } } diff --git a/src/main/java/org/apache/datasketches/tuple/Sketch.java b/src/main/java/org/apache/datasketches/tuple/Sketch.java index b4aeb13fb..dcc7a2c16 100644 --- a/src/main/java/org/apache/datasketches/tuple/Sketch.java +++ b/src/main/java/org/apache/datasketches/tuple/Sketch.java @@ -35,9 +35,13 @@ public abstract class Sketch { long thetaLong_; boolean empty_ = true; - protected SummaryFactory summaryFactory_; + protected SummaryFactory summaryFactory_ = null; - Sketch() {} + Sketch(final long thetaLong, final boolean empty, final SummaryFactory summaryFactory) { + this.thetaLong_ = thetaLong; + this.empty_ = empty; + this.summaryFactory_ = summaryFactory; + } /** * Converts this sketch to a CompactSketch on the Java heap. diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java index f49872787..135ce6d01 100644 --- a/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSketch.java @@ -69,7 +69,9 @@ public UpdatableSketch(final int nomEntries, final int lgResizeFactor, * Heapifying a CompactSketch is not deprecated. */ @Deprecated - public UpdatableSketch(final Memory srcMem, final SummaryDeserializer deserializer, + public UpdatableSketch( + final Memory srcMem, + final SummaryDeserializer deserializer, final SummaryFactory summaryFactory) { super(srcMem, deserializer, summaryFactory); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java index c4a896a25..9a9962b16 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -57,7 +57,9 @@ abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSk int rebuildThreshold_; //absolute value relative to current capacity int lgCurrentCapacity_; - ArrayOfDoublesQuickSelectSketch(final int numValues, final long seed) { + ArrayOfDoublesQuickSelectSketch( + final int numValues, + final long seed) { super(numValues, seed); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 8c08b3a2f..60a3a909a 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -45,11 +45,6 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke private int keysOffset_; private int valuesOffset_; - @Override - protected final void finalize() { - // SpotBugs CT_CONSTUCTOR_THROW, OBJ11-J - } - /** * Construct a new sketch using the given Memory as its backing store. * @@ -66,12 +61,33 @@ protected final void finalize() { * @param seed See seed * @param dstMem See Memory */ - DirectArrayOfDoublesQuickSelectSketch(final int nomEntries, final int lgResizeFactor, - final float samplingProbability, final int numValues, final long seed, final WritableMemory dstMem) { + DirectArrayOfDoublesQuickSelectSketch( + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final int numValues, + final long seed, + final WritableMemory dstMem) { + this(validate1(nomEntries, lgResizeFactor, numValues, dstMem), + nomEntries, + lgResizeFactor, + samplingProbability, + numValues, + seed, + dstMem); + } + + private DirectArrayOfDoublesQuickSelectSketch( + final boolean secure, + final int nomEntries, + final int lgResizeFactor, + final float samplingProbability, + final int numValues, + final long seed, + final WritableMemory dstMem) { super(numValues, seed); mem_ = dstMem; final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); - checkIfEnoughMemory(dstMem, startingCapacity, numValues); mem_.putByte(PREAMBLE_LONGS_BYTE, (byte) 1); mem_.putByte(SERIAL_VERSION_BYTE, serialVersionUID); mem_.putByte(FAMILY_ID_BYTE, (byte) Family.TUPLE.getID()); @@ -99,19 +115,52 @@ protected final void finalize() { setRebuildThreshold(); } + private static final boolean validate1( + final int nomEntries, + final int lgResizeFactor, + final int numValues, + final WritableMemory dstMem) { + final int startingCapacity = Util.getStartingCapacity(nomEntries, lgResizeFactor); + checkIfEnoughMemory(dstMem, startingCapacity, numValues); + return true; + } + /** * Wraps the given Memory. * @param mem See Memory * @param seed update seed */ - DirectArrayOfDoublesQuickSelectSketch(final WritableMemory mem, final long seed) { + DirectArrayOfDoublesQuickSelectSketch( + final WritableMemory mem, + final long seed) { + this(validate2(mem), mem, seed); + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J + } + + private DirectArrayOfDoublesQuickSelectSketch( + final boolean secure, + final WritableMemory mem, + final long seed) { super(mem.getByte(NUM_VALUES_BYTE), seed); mem_ = mem; SerializerDeserializer.validateFamily(mem.getByte(FAMILY_ID_BYTE), mem.getByte(PREAMBLE_LONGS_BYTE)); SerializerDeserializer.validateType(mem_.getByte(SKETCH_TYPE_BYTE), SerializerDeserializer.SketchType.ArrayOfDoublesQuickSelectSketch); - final byte version = mem_.getByte(SERIAL_VERSION_BYTE); + + Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed)); + keysOffset_ = ENTRIES_START; + valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); + // to do: make parent take care of its own parts + lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity()); + thetaLong_ = mem_.getLong(THETA_LONG); + isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; + setRebuildThreshold(); + } + + private static final boolean validate2(final Memory mem) { + final byte version = mem.getByte(SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID + ", actual: " + version); @@ -121,14 +170,7 @@ protected final void finalize() { if (isBigEndian ^ ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { throw new SketchesArgumentException("Byte order mismatch"); } - Util.checkSeedHashes(mem.getShort(SEED_HASH_SHORT), Util.computeSeedHash(seed)); - keysOffset_ = ENTRIES_START; - valuesOffset_ = keysOffset_ + (SIZE_OF_KEY_BYTES * getCurrentCapacity()); - // to do: make parent take care of its own parts - lgCurrentCapacity_ = Integer.numberOfTrailingZeros(getCurrentCapacity()); - thetaLong_ = mem_.getLong(THETA_LONG); - isEmpty_ = (mem_.getByte(FLAGS_BYTE) & (1 << Flags.IS_EMPTY.ordinal())) != 0; - setRebuildThreshold(); + return true; } @Override diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java index 2253be043..803c2f591 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java @@ -46,11 +46,11 @@ public void checkSketch() { sketch1.update(strArrArr[i], strArrArr[i]); } sketch1.update(strArrArr[0], strArrArr[0]); //insert duplicate - //printSummaries(sketch1.iterator()); + printSummaries(sketch1.iterator()); byte[] array = sketch1.toByteArray(); WritableMemory wmem = WritableMemory.writableWrap(array); ArrayOfStringsSketch sketch2 = new ArrayOfStringsSketch(wmem); - //printSummaries(sketch2.iterator()); + printSummaries(sketch2.iterator()); checkSummaries(sketch2, sketch2); String[] strArr3 = {"g", "h" }; diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml index 32d176d7f..993bbbcf4 100644 --- a/tools/FindBugsExcludeFilter.xml +++ b/tools/FindBugsExcludeFilter.xml @@ -79,10 +79,27 @@ under the License. - + - + + + + + + + + + + + + + + + + + + From f268943a2ecb424366491cb2fbf4c9075a5de3c7 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Sat, 24 Feb 2024 09:23:47 -0800 Subject: [PATCH 4/7] Removed commented-out code --- .../kll/KllItemsSketchSortedView.java | 2 +- .../quantiles/ItemsSketchSortedView.java | 100 ------------------ 2 files changed, 1 insertion(+), 101 deletions(-) diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java index 553b93af4..0fc91fb68 100644 --- a/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/kll/KllItemsSketchSortedView.java @@ -55,7 +55,7 @@ public class KllItemsSketchSortedView implements GenericSortedView, Partit private final Class clazz; /** - * Construct from elements for testing only. + * Construct from elements, also used in testing. * @param quantiles sorted array of quantiles * @param cumWeights sorted, monotonically increasing cumulative weights. * @param totalN the total number of items presented to the sketch. diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index e9117b93f..08df7210c 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -80,40 +80,6 @@ public class ItemsSketchSortedView implements GenericSortedView, Partition this.clazz = (Class)quantiles[0].getClass(); } -// /** -// * Constructs this Sorted View given the sketch -// * @param sketch the given Classic Quantiles ItemsSketch -// */ -// @SuppressWarnings("unchecked") -// ItemsSketchSortedView(final ItemsSketch sketch) { -// if (sketch.isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } -// this.totalN = sketch.getN(); -// final int numQuantiles = sketch.getNumRetained(); -// this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles); -// this.minItem = sketch.minItem_; -// this.maxItem = sketch.maxItem_; -// this.cumWeights = new long[numQuantiles]; -// this.comparator = sketch.getComparator(); -// this.clazz = sketch.clazz; -// this.k = sketch.getK(); -// -// final Object[] combinedBuffer = sketch.getCombinedBuffer(); -// final int baseBufferCount = sketch.getBaseBufferCount(); -// -// // Populate from ItemsSketch: -// // copy over the "levels" and then the base buffer, all with appropriate weights -// populateFromItemsSketch(k, totalN, sketch.getBitPattern(), (T[]) combinedBuffer, baseBufferCount, -// numQuantiles, quantiles, cumWeights, sketch.getComparator()); -// -// // Sort the first "numSamples" slots of the two arrays in tandem, -// // taking advantage of the already sorted blocks of length k -// ItemsMergeImpl.blockyTandemMergeSort(quantiles, cumWeights, numQuantiles, k, sketch.getComparator()); -// -// if (convertToCumulative(cumWeights) != totalN) { -// throw new SketchesStateException("Sorted View is misconfigured. TotalN does not match cumWeights."); -// } -// } - //end of constructors @Override @@ -249,70 +215,4 @@ public GenericSortedViewIterator iterator() { return new GenericSortedViewIterator<>(quantiles, cumWeights); } - //restricted methods - -// /** -// * Populate the arrays and registers from an ItemsSketch -// * @param the data type -// * @param k K parameter of sketch -// * @param n The current size of the stream -// * @param bitPattern the bit pattern for valid log levels -// * @param combinedBuffer the combined buffer reference -// * @param baseBufferCount the count of the base buffer -// * @param numQuantiles number of retained quantiles in the sketch -// * @param quantilesArr the consolidated array of all quantiles from the sketch -// * @param weightsArr the weights for each item from the sketch -// * @param comparator the given comparator for data type T -// */ -// private final static void populateFromItemsSketch( -// final int k, final long n, final long bitPattern, final T[] combinedBuffer, -// final int baseBufferCount, final int numQuantiles, final T[] quantilesArr, final long[] weightsArr, -// final Comparator comparator) { -// long weight = 1; -// int nxt = 0; -// long bits = bitPattern; -// assert bits == (n / (2L * k)); // internal consistency check -// for (int lvl = 0; bits != 0L; lvl++, bits >>>= 1) { -// weight *= 2; -// if ((bits & 1L) > 0L) { -// final int offset = (2 + lvl) * k; -// for (int i = 0; i < k; i++) { -// quantilesArr[nxt] = combinedBuffer[i + offset]; -// weightsArr[nxt] = weight; -// nxt++; -// } -// } -// } -// -// weight = 1; //NOT a mistake! We just copied the highest level; now we need to copy the base buffer -// final int startOfBaseBufferBlock = nxt; -// -// // Copy BaseBuffer over, along with weight = 1 -// for (int i = 0; i < baseBufferCount; i++) { -// quantilesArr[nxt] = combinedBuffer[i]; -// weightsArr[nxt] = weight; -// nxt++; -// } -// assert nxt == numQuantiles; -// -// // Must sort the items that came from the base buffer. -// // Don't need to sort the corresponding weights because they are all the same. -// Arrays.sort(quantilesArr, startOfBaseBufferBlock, numQuantiles, comparator); -// } -// -// /** -// * Convert the individual weights into cumulative weights. -// * An array of {1,1,1,1} becomes {1,2,3,4} -// * @param array of actual weights from the sketch, none of the weights may be zero -// * @return total weight -// */ -// private static long convertToCumulative(final long[] array) { -// long subtotal = 0; -// for (int i = 0; i < array.length; i++) { -// final long newSubtotal = subtotal + array[i]; -// subtotal = array[i] = newSubtotal; -// } -// return subtotal; -// } - } From c91c2a66706449352cf06b7d3443d66ffef59e09 Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Mon, 26 Feb 2024 11:21:11 -0800 Subject: [PATCH 5/7] While reviewing these changes again, I found some commented out lines that could be removed and some unnecessary minor changes that could be reverted. --- .../apache/datasketches/quantiles/ItemsSketchSortedView.java | 2 -- .../java/org/apache/datasketches/tuple/QuickSelectSketch.java | 2 ++ .../tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index 08df7210c..b4a8fd98d 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -26,11 +26,9 @@ import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; import java.lang.reflect.Array; -//import java.util.Arrays; import java.util.Comparator; import org.apache.datasketches.common.SketchesArgumentException; -//import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView; diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java index 59e27a429..1759f55a9 100644 --- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java @@ -170,9 +170,11 @@ private QuickSelectSketch( * @param mem Memory object with serialized QuickSelectSketch * @param deserializer the SummaryDeserializer * @param summaryFactory the SummaryFactory + * @deprecated As of 3.0.0, heapifying an UpdatableSketch is deprecated. * This capability will be removed in a future release. * Heapifying a CompactSketch is not deprecated. */ + @Deprecated QuickSelectSketch( final Memory mem, final SummaryDeserializer deserializer, diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java index 9a9962b16..c4a896a25 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/ArrayOfDoublesQuickSelectSketch.java @@ -57,9 +57,7 @@ abstract class ArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesUpdatableSk int rebuildThreshold_; //absolute value relative to current capacity int lgCurrentCapacity_; - ArrayOfDoublesQuickSelectSketch( - final int numValues, - final long seed) { + ArrayOfDoublesQuickSelectSketch(final int numValues, final long seed) { super(numValues, seed); } From bbcdd154dc261c6eb5f52219897549b27f9e4f9f Mon Sep 17 00:00:00 2001 From: Lee Rhodes Date: Wed, 28 Feb 2024 16:50:30 -0800 Subject: [PATCH 6/7] Fixing issues raised in PR review. --- .../quantiles/DoublesSketchAccessor.java | 4 +++- .../apache/datasketches/quantiles/ItemsSketch.java | 1 - .../theta/DirectQuickSelectSketch.java | 2 +- .../DirectArrayOfDoublesQuickSelectSketch.java | 14 ++++++++------ 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java index 1c5913b75..6195552a8 100644 --- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java +++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketchAccessor.java @@ -45,10 +45,12 @@ abstract class DoublesSketchAccessor extends DoublesBufferAccessor { final boolean forceSize, final int level) { this(checkLvl(level), ds, forceSize, level); + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J } private DoublesSketchAccessor( - final boolean secure, + final boolean secure, //required part of Finalizer Attack prevention final DoublesSketch ds, final boolean forceSize, final int level) { diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java index 8a6b7831f..3d51363fa 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java @@ -36,7 +36,6 @@ import static org.apache.datasketches.quantiles.PreambleUtil.extractN; import static org.apache.datasketches.quantiles.PreambleUtil.extractPreLongs; import static org.apache.datasketches.quantiles.PreambleUtil.extractSerVer; -//import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; import java.lang.reflect.Array; import java.util.Arrays; diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index 33efe064e..8397c130b 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -121,7 +121,7 @@ private DirectQuickSelectSketch( } private DirectQuickSelectSketch( - final boolean secure, + final boolean secure, //required part of Finalizer Attack prevention final int lgNomLongs, final long seed, final float p, diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 60a3a909a..1b4e86904 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -68,7 +68,9 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke final int numValues, final long seed, final WritableMemory dstMem) { - this(validate1(nomEntries, lgResizeFactor, numValues, dstMem), + this(checkMemory(nomEntries, lgResizeFactor, numValues, dstMem), + //SpotBugs CT_CONSTRUCTOR_THROW is false positive. + //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J nomEntries, lgResizeFactor, samplingProbability, @@ -78,7 +80,7 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke } private DirectArrayOfDoublesQuickSelectSketch( - final boolean secure, + final boolean secure, //required part of Finalizer Attack prevention final int nomEntries, final int lgResizeFactor, final float samplingProbability, @@ -115,7 +117,7 @@ private DirectArrayOfDoublesQuickSelectSketch( setRebuildThreshold(); } - private static final boolean validate1( + private static final boolean checkMemory( final int nomEntries, final int lgResizeFactor, final int numValues, @@ -133,13 +135,13 @@ private static final boolean validate1( DirectArrayOfDoublesQuickSelectSketch( final WritableMemory mem, final long seed) { - this(validate2(mem), mem, seed); + this(checkSerVer_Endianness(mem), mem, seed); //SpotBugs CT_CONSTRUCTOR_THROW is false positive. //this construction scheme is compliant with SEI CERT Oracle Coding Standard for Java / OBJ11-J } private DirectArrayOfDoublesQuickSelectSketch( - final boolean secure, + final boolean secure, //required part of Finalizer Attack prevention final WritableMemory mem, final long seed) { super(mem.getByte(NUM_VALUES_BYTE), seed); @@ -159,7 +161,7 @@ private DirectArrayOfDoublesQuickSelectSketch( setRebuildThreshold(); } - private static final boolean validate2(final Memory mem) { + private static final boolean checkSerVer_Endianness(final Memory mem) { final byte version = mem.getByte(SERIAL_VERSION_BYTE); if (version != serialVersionUID) { throw new SketchesArgumentException("Serial version mismatch. Expected: " + serialVersionUID From 14cf83b46032590eb01477baec0f427dc13c5aa3 Mon Sep 17 00:00:00 2001 From: jmalkin <786705+jmalkin@users.noreply.github.com> Date: Thu, 29 Feb 2024 20:29:36 -0800 Subject: [PATCH 7/7] backport security fixes PR so we can create a 5.0.2 release --- .../java/org/apache/datasketches/quantiles/ItemsSketch.java | 2 +- .../apache/datasketches/quantiles/ItemsSketchSortedView.java | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java index 3d51363fa..aa446ccd7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java @@ -682,7 +682,7 @@ ItemsSketchSortedView getSV() { } return new ItemsSketchSortedView( - quantiles, cumWeights, getN(), comparator, getMaxItem(), getMinItem(), getK()); + quantiles, cumWeights, getN(), comparator, getMaxItem(), getMinItem()); } } diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java index b4a8fd98d..a1935cffc 100644 --- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java +++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java @@ -28,7 +28,6 @@ import java.lang.reflect.Array; import java.util.Comparator; -import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.quantilescommon.GenericInequalitySearch.Inequality; import org.apache.datasketches.quantilescommon.GenericPartitionBoundaries; import org.apache.datasketches.quantilescommon.GenericSortedView;