This aligns 6.1.X with master

apache · Dec 12, 2024 · 9fa8799 · 9fa8799
1 parent 6724a39
commit 9fa8799
Show file tree

Hide file tree

Showing 48 changed files with 173 additions and 93 deletions.
diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/check_cpp_files.yml
@@ -12,12 +12,17 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Checkout C++
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: apache/datasketches-cpp
           path: cpp
+      - name: Setup Java
+        uses: actions/setup-java@v2
+        with:
+          java-version: '11'
+          distribution: 'temurin'
       - name: Configure C++ build
         run: cd cpp/build && cmake .. -DGENERATE=true
       - name: Build C++ unit tests

diff --git a/pom.xml b/pom.xml
@@ -33,7 +33,7 @@ under the License.
 
   <groupId>org.apache.datasketches</groupId>
   <artifactId>datasketches-java</artifactId>
-  <version>6.1.1</version>
+  <version>6.2.0-SNAPSHOT</version>
   <packaging>jar</packaging>
 
   <name>${project.artifactId}</name>

diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
@@ -33,8 +33,8 @@
 import org.apache.datasketches.memory.XxHash;
 
 /**
- * <p>A Bloom filter is a data structure that can be used for probabilistic
- * set membership.</p>
+ * A Bloom filter is a data structure that can be used for probabilistic
+ * set membership.
  *
  * <p>When querying a Bloom filter, there are no false positives. Specifically:
  * When querying an item that has already been inserted to the filter, the filter will

diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
@@ -25,8 +25,8 @@
 import org.apache.datasketches.memory.WritableMemory;
 
 /**
- * <p>This class provides methods to help estimate the correct parameters when
- * creating a Bloom filter, and methods to create the filter using those values.</p>
+ * This class provides methods to help estimate the correct parameters when
+ * creating a Bloom filter, and methods to create the filter using those values.
  *
  * <p>The underlying math is described in the
  * <a href='https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions'>

diff --git a/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java b/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java
@@ -55,10 +55,10 @@
 import org.apache.datasketches.memory.WritableMemory;
 
 /**
- * <p>This sketch is useful for tracking approximate frequencies of items of type <i>&lt;T&gt;</i>
+ * This sketch is useful for tracking approximate frequencies of items of type <i>&lt;T&gt;</i>
  * with optional associated counts (<i>&lt;T&gt;</i> item, <i>long</i> count) that are members of a
  * multiset of such items. The true frequency of an item is defined to be the sum of associated
- * counts.</p>
+ * counts.
  *
  * <p>This implementation provides the following capabilities:</p>
  * <ul>

diff --git a/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java b/src/main/java/org/apache/datasketches/frequencies/LongsSketch.java
@@ -54,9 +54,9 @@
 import org.apache.datasketches.memory.WritableMemory;
 
 /**
- * <p>This sketch is useful for tracking approximate frequencies of <i>long</i> items with optional
+ * This sketch is useful for tracking approximate frequencies of <i>long</i> items with optional
  * associated counts (<i>long</i> item, <i>long</i> count) that are members of a multiset of
- * such items. The true frequency of an item is defined to be the sum of associated counts.</p>
+ * such items. The true frequency of an item is defined to be the sum of associated counts.
  *
  * <p>This implementation provides the following capabilities:</p>
  * <ul>

diff --git a/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java b/src/main/java/org/apache/datasketches/frequencies/PreambleUtil.java
@@ -31,12 +31,11 @@
 /**
  * This class defines the preamble data structure and provides basic utilities for some of the key
  * fields.
- * <p>
- * The intent of the design of this class was to isolate the detailed knowledge of the bit and byte
+ *
+ * <p>The intent of the design of this class was to isolate the detailed knowledge of the bit and byte
  * layout of the serialized form of the sketches derived from the Sketch class into one place. This
  * allows the possibility of the introduction of different serialization schemes with minimal impact
- * on the rest of the library.
- * </p>
+ * on the rest of the library.</p>
  *
  * <p>
  * MAP: Low significance bytes of this <i>long</i> data structure are on the right. However, the

diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java
@@ -29,10 +29,8 @@
 import org.apache.datasketches.memory.Memory;
 
 /**
- * <p>
  * The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has
  * excellent avalanche and 2-way bit independence properties.
- * </p>
  *
  * <p>
  * Austin Appleby's C++

diff --git a/src/main/java/org/apache/datasketches/hash/package-info.java b/src/main/java/org/apache/datasketches/hash/package-info.java
@@ -18,12 +18,11 @@
  */
 
 /**
- * <p>The hash package contains a high-performing and extended Java implementations
+ * The hash package contains a high-performing and extended Java implementations
  * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C.
  * This core MurmurHash3.java class is used throughout many of the sketch classes for consistency
  * and as long as the user specifies the same seed will result in coordinated hash operations.
  * This package also contains an adaptor class that extends the basic class with more functions
  * commonly associated with hashing.
- * </p>
  */
 package org.apache.datasketches.hash;
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
@@ -312,6 +312,7 @@ private static void randomlyHalveUpDoubles(final double[] buf, final int start,
 
   /**
    * Compression algorithm used to merge higher levels.
+   *
    * <p>Here is what we do for each level:</p>
    * <ul><li>If it does not need to be compacted, then simply copy it over.</li>
    * <li>Otherwise, it does need to be compacted, so...

diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
@@ -278,6 +278,7 @@ public final void merge(final KllSketch other) {
 
   /**
    * {@inheritDoc}
+   *
    * <p>The parameter <i>k</i> will not change.</p>
    */
   @Override

diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
@@ -312,6 +312,7 @@ private static void randomlyHalveUpFloats(final float[] buf, final int start, fi
 
   /**
    * Compression algorithm used to merge higher levels.
+   *
    * <p>Here is what we do for each level:</p>
    * <ul><li>If it does not need to be compacted, then simply copy it over.</li>
    * <li>Otherwise, it does need to be compacted, so...

diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
@@ -278,6 +278,7 @@ public final void merge(final KllSketch other) {
 
   /**
    * {@inheritDoc}
+   *
    * <p>The parameter <i>k</i> will not change.</p>
    */
   @Override

diff --git a/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java b/src/main/java/org/apache/datasketches/kll/KllItemsHelper.java
@@ -346,6 +346,7 @@ static <T> void updateItem(final KllItemsSketch<T> itmSk, final T item, final lo
 
   /**
    * Compression algorithm used to merge higher levels.
+   *
    * <p>Here is what we do for each level:</p>
    * <ul><li>If it does not need to be compacted, then simply copy it over.</li>
    * <li>Otherwise, it does need to be compacted, so...

diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java b/src/main/java/org/apache/datasketches/kll/KllLongsHelper.java
@@ -312,6 +312,7 @@ private static void randomlyHalveUpLongs(final long[] buf, final int start, fina
 
   /**
    * Compression algorithm used to merge higher levels.
+   *
    * <p>Here is what we do for each level:</p>
    * <ul><li>If it does not need to be compacted, then simply copy it over.</li>
    * <li>Otherwise, it does need to be compacted, so...

diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java
@@ -278,6 +278,7 @@ public final void merge(final KllSketch other) {
 
   /**
    * {@inheritDoc}
+   *
    * <p>The parameter <i>k</i> will not change.</p>
    */
   @Override

diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java
@@ -506,6 +506,7 @@ public QuantilesDoublesSketchIterator iterator() {
 
   /**
    * {@inheritDoc}
+   *
    * <p>The parameter <i>k</i> will not change.</p>
    */
   @Override

diff --git a/src/main/java/org/apache/datasketches/quantiles/package-info.java b/src/main/java/org/apache/datasketches/quantiles/package-info.java
@@ -18,9 +18,8 @@
  */
 
 /**
- * <p>The quantiles package contains stochastic streaming algorithms that enable single-pass
+ * The quantiles package contains stochastic streaming algorithms that enable single-pass
  * analysis of the distribution of a stream of quantiles.
- * </p>
  *
  * @see org.apache.datasketches.quantiles.DoublesSketch
  * @see org.apache.datasketches.quantiles.ItemsSketch

diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java
@@ -38,7 +38,7 @@ public interface DoublesSortedView extends SortedView {
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> overlapping intervals.
-   *
+   * <blockquote>
    * <p>The start of each interval is below the lowest item retained by the sketch
    * corresponding to a zero rank or zero probability, and the end of the interval
    * is the rank or cumulative probability corresponding to the split point.</p>
@@ -55,7 +55,7 @@ public interface DoublesSortedView extends SortedView {
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.
@@ -100,7 +100,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit)
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals.
-   *
+   * <blockquote>
    * <p>Each interval except for the end intervals starts with a split point and ends with the next split
    * point in sequence.</p>
    *
@@ -124,7 +124,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit)
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.

diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java
@@ -38,7 +38,7 @@ public interface FloatsSortedView extends SortedView {
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> overlapping intervals.
-   *
+   * <blockquote>
    * <p>The start of each interval is below the lowest item retained by the sketch
    * corresponding to a zero rank or zero probability, and the end of the interval
    * is the rank or cumulative probability corresponding to the split point.</p>
@@ -55,7 +55,7 @@ public interface FloatsSortedView extends SortedView {
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.
@@ -100,7 +100,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit)
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals.
-   *
+   * <blockquote>
    * <p>Each interval except for the end intervals starts with a split point and ends with the next split
    * point in sequence.</p>
    *
@@ -124,7 +124,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit)
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.

diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java
@@ -47,7 +47,7 @@ public interface GenericSortedView<T>  extends PartitioningFeature<T>, SketchPar
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> overlapping intervals.
-   *
+   * <blockquote>
    * <p>The start of each interval is below the lowest item retained by the sketch
    * corresponding to a zero rank or zero probability, and the end of the interval
    * is the rank or cumulative probability corresponding to the split point.</p>
@@ -64,7 +64,7 @@ public interface GenericSortedView<T>  extends PartitioningFeature<T>, SketchPar
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.
@@ -116,7 +116,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals.
-   *
+   * <blockquote>
    * <p>Each interval except for the end intervals starts with a split point and ends with the next split
    * point in sequence.</p>
    *
@@ -140,7 +140,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.

diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java
@@ -38,7 +38,7 @@ public interface LongsSortedView extends SortedView {
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> overlapping intervals.
-   *
+   * <blockquote>
    * <p>The start of each interval is below the lowest item retained by the sketch
    * corresponding to a zero rank or zero probability, and the end of the interval
    * is the rank or cumulative probability corresponding to the split point.</p>
@@ -55,7 +55,7 @@ public interface LongsSortedView extends SortedView {
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.
@@ -100,7 +100,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
    * @param splitPoints an array of <i>m</i> unique, monotonically increasing items
    * (of the same type as the input items)
    * that divide the item input domain into <i>m+1</i> consecutive, non-overlapping intervals.
-   *
+   * <blockquote>
    * <p>Each interval except for the end intervals starts with a split point and ends with the next split
    * point in sequence.</p>
    *
@@ -124,7 +124,7 @@ default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) {
    * </ul>
    *
    * <p>It is not recommended to include either the minimum or maximum items of the input stream.</p>
-   *
+   * </blockquote>
    * @param searchCrit the desired search criteria.
    * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0].
    * @throws IllegalArgumentException if sketch is empty.

diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java
@@ -20,12 +20,12 @@
 package org.apache.datasketches.quantilescommon;
 
 /**
- * <p>This is a stochastic streaming sketch that enables near-real time analysis of the
+ * This is a stochastic streaming sketch that enables near-real time analysis of the
  * approximate distribution of items from a very large stream in a single pass, requiring only
  * that the items are comparable.
  * The analysis is obtained using the <i>getQuantile()</i> function or the
  * inverse functions getRank(), getPMF() (the Probability Mass Function), and getCDF()
- * (the Cumulative Distribution Function).</p>
+ * (the Cumulative Distribution Function).
  *
  * <p>Given an input stream of <i>N</i> items, the <i>natural rank</i> of any specific
  * item is defined as its index <i>(1 to N)</i> in the hypothetical sorted stream of all
-Original file line number
+Diff line change
@@ Expand Up / @@ -278,6 +278,7 @@ public final void merge(final KllSketch other) { @@
       /**
        * {@inheritDoc}
+       *
        * <p>The parameter <i>k</i> will not change.</p>
        */
       @Override
@@ Expand Down @@