diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/auto-check_cpp_files.yml
similarity index 76%
rename from .github/workflows/check_cpp_files.yml
rename to .github/workflows/auto-check_cpp_files.yml
index 59ae5824a..858e760fb 100644
--- a/.github/workflows/check_cpp_files.yml
+++ b/.github/workflows/auto-check_cpp_files.yml
@@ -1,9 +1,9 @@
name: Serialization Compatibility Test
on:
+ pull_request:
push:
- branches:
- - master
+ branches: [ 7.0.X ]
workflow_dispatch:
jobs:
@@ -12,21 +12,34 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
+
- name: Checkout C++
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
repository: apache/datasketches-cpp
path: cpp
+
+ - name: Setup Java
+ uses: actions/setup-java@v4
+ with:
+ java-version: '17'
+ distribution: 'temurin'
+
- name: Configure C++ build
run: cd cpp/build && cmake .. -DGENERATE=true
+
- name: Build C++ unit tests
run: cd cpp && cmake --build build --config Release
+
- name: Run C++ tests
run: cd cpp && cmake --build build --config Release --target test
+
- name: Make dir
run: mkdir -p serialization_test_data/cpp_generated_files
+
- name: Copy files
run: cp cpp/build/*/test/*_cpp.sk serialization_test_data/cpp_generated_files
+
- name: Run Java tests
run: mvn test -P check-cpp-files
diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml
index 83da580ca..49be43018 100644
--- a/.github/workflows/auto-jdk-matrix.yml
+++ b/.github/workflows/auto-jdk-matrix.yml
@@ -1,65 +1,72 @@
name: DataSketches-Java Auto JDK Matrix Test & Install
on:
- pull_request:
- push:
- branches: [ master ]
- workflow_dispatch:
+ pull_request:
+ push:
+ branches: [ 7.0.X ]
+ workflow_dispatch:
env:
- MAVEN_OPTS: -Xmx4g -Xms1g
+ MAVEN_OPTS: -Xmx4g -Xms1g
jobs:
- build:
- name: Build, Test, Install
- runs-on: ubuntu-latest
- strategy:
- fail-fast: false
- matrix:
- jdk: [ 8,11 ]
- env:
- JDK_VERSION: ${{ matrix.jdk }}
+ build:
+ name: Build, Test, Install
+ runs-on: ubuntu-latest
- steps:
- - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
- uses: actions/checkout@v3
- with:
- persist-credentials: false
+ strategy:
+ fail-fast: false
+ matrix:
+ jdk: [ 17 ]
- - name: Cache local Maven repository
- uses: actions/cache@v3
- with:
- path: ~/.m2/repository
- key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: build-${{ runner.os }}-maven-
+ env:
+ JDK_VERSION: ${{ matrix.jdk }}
- - name: Install Matrix JDK
- uses: actions/setup-java@v3
- with:
- java-version: ${{ matrix.jdk }}
- distribution: 'temurin'
- java-package: jdk
- architecture: x64
-# Architecture options: x86, x64, armv7, aarch64, ppc64le
-# setup-java@v3 has a "with cache" option
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v4
+ with:
+ persist-credentials: false
+
+ - name: Print Current workflow
+ run: >
+ cat .github/workflows/auto-jdk-matrix.yml
+
+ - name: Cache local Maven repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: build-${{ runner.os }}-maven-
- - name: Echo Java Version
- run: >
- java -version
+ - name: Install Matrix JDK
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ matrix.jdk }}
+ distribution: 'temurin'
+ java-package: jdk
+ architecture: x64
- - name: Test
- run: >
- mvn clean test
- -Dmaven.javadoc.skip=true
- -Dgpg.skip=true
+ - name: Echo Java Version
+ run: >
+ java -version
- - name: Install
- run: >
- mvn clean install -B
- -DskipTests=true
- -Dgpg.skip=true
+ - name: Test
+ run: >
+ mvn clean test -B
+ -Dmaven.javadoc.skip=true
+ -Dgpg.skip=true
+ - name: Install
+ run: >
+ mvn clean install -B
+ -DskipTests=true
+ -Dgpg.skip=true
+
+# Architecture options: x86, x64, armv7, aarch64, ppc64le
+# setup-java@v4 has a "with cache" option
# Lifecycle: validate, compile, test, package, verify, install, deploy
-# -B batch mode
+# -B batch mode, never stops for user input
# -V show Version without stopping
+# -X debug mode
# -q quiet, only show errors
diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml
new file mode 100644
index 000000000..b7d703a08
--- /dev/null
+++ b/.github/workflows/auto-os-matrix.yml
@@ -0,0 +1,81 @@
+name: DataSketches-Java Auto OS Matrix Test & Install
+
+on:
+ pull_request:
+ push:
+ branches: [ 7.0.X ]
+ workflow_dispatch:
+
+env:
+ MAVEN_OPTS: -Xmx1g -Xms1g
+
+jobs:
+ build:
+ name: Build, Test, Install
+
+ strategy:
+ fail-fast: false
+
+ matrix:
+ jdk: [ 17 ]
+ os: [ windows-latest, ubuntu-latest, macos-latest ]
+ include:
+ - os: windows-latest
+ skip_javadoc: "`-Dmaven`.javadoc`.skip=true"
+ skip_gpg: "`-Dgpg`.skip=true"
+ - os: ubuntu-latest
+ skip_javadoc: -Dmaven.javadoc.skip=true
+ skip_gpg: -Dgpg.skip=true
+ - os: macos-latest
+ skip_javadoc: -Dmaven.javadoc.skip=true
+ skip_gpg: -Dgpg.skip=true
+
+ runs-on: ${{matrix.os}}
+
+ env:
+ JDK_VERSION: ${{ matrix.jdk }}
+
+ steps:
+ - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
+ uses: actions/checkout@v4
+ with:
+ persist-credentials: false
+
+ - name: Cache local Maven repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: build-${{ runner.os }}-maven-
+
+ - name: Install Matrix JDK
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ matrix.jdk }}
+ distribution: 'temurin'
+ java-package: jdk
+ architecture: x64
+
+ - name: Echo Java Version
+ run: >
+ java -version
+
+ - name: Test
+ run: >
+ mvn clean test
+ ${{matrix.os.skip_javadoc}}
+ ${{matrix.os.skip_gpg}}
+
+ - name: Install
+ run: >
+ mvn clean install -B
+ ${{matrix.os.skip_javadoc}}
+ -D skipTests=true
+ ${{matrix.os.skip_gpg}}
+
+# Architecture options: x86, x64, armv7, aarch64, ppc64le
+# setup-java@v4 has a "with cache" option
+# Lifecycle: validate, compile, test, package, verify, install, deploy
+# -B batch mode
+# -V show Version without stopping
+# -q quiet, only show errors
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
deleted file mode 100644
index 7cb4c8976..000000000
--- a/.github/workflows/codeql-analysis.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: "CodeQL"
-
-on:
- push:
- branches: [ 'master' ]
- pull_request:
- # The branches below must be a subset of the branches above
- branches: [ 'master' ]
- schedule:
- - cron: '10 17 * * 4'
- workflow_dispatch:
-
-jobs:
- analyze:
- name: Analyze
- runs-on: ubuntu-latest
- permissions:
- actions: read
- contents: read
- security-events: write
-
- strategy:
- fail-fast: false
- matrix:
- language: [ 'java' ]
- # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
- # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
-
- steps:
- - name: Checkout repository
- uses: actions/checkout@v3
-
- # Initializes the CodeQL tools for scanning.
- - name: Initialize CodeQL
- uses: github/codeql-action/init@v3
- with:
- languages: ${{ matrix.language }}
- # If you wish to specify custom queries, you can do so here or in a config file.
- # By default, queries listed here will override any specified in a config file.
- # Prefix the list here with "+" to use these queries and those in the config file.
-
- # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
- queries: +security-and-quality
-
-
- # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
- # If this step fails, then you should remove it and run the build manually (see below)
- - name: Autobuild
- uses: github/codeql-action/autobuild@v3
-
- # âšī¸ Command-line programs to run using the OS shell.
- # đ See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
-
- # If the Autobuild fails above, remove it and uncomment the following three lines.
- # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
-
- # - run: |
- # echo "Run, Build Application using script"
- # ./location_of_script_within_repo/buildscript.sh
-
- - name: Perform CodeQL Analysis
- uses: github/codeql-action/analyze@v3
- with:
- category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml
deleted file mode 100644
index 6033d3273..000000000
--- a/.github/workflows/javadoc.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: JavaDoc
-
-on:
- push:
- branches:
- - master
- workflow_dispatch:
-
-jobs:
- javadoc:
- runs-on: ubuntu-latest
- steps:
- - name: Checkout
- uses: actions/checkout@v3
- - name: Generate JavaDoc
- run: mvn javadoc:javadoc
- - name: Deploy JavaDoc
- uses: JamesIves/github-pages-deploy-action@5dc1d5a192aeb5ab5b7d5a77b7d36aea4a7f5c92
- with:
- token: ${{ secrets.GITHUB_TOKEN }}
- folder: target/site/apidocs
- target-folder: docs/${{ github.ref_name }}
- branch: gh-pages
diff --git a/.github/workflows/manual-codeql-analysis.yml b/.github/workflows/manual-codeql-analysis.yml
new file mode 100644
index 000000000..a68d4cfff
--- /dev/null
+++ b/.github/workflows/manual-codeql-analysis.yml
@@ -0,0 +1,57 @@
+name: "CodeQL"
+
+on:
+ workflow_dispatch:
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-latest
+ permissions:
+ actions: read
+ contents: read
+ security-events: write
+
+ strategy:
+ fail-fast: false
+ matrix:
+ language: [ 'java' ]
+ # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+ # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ # Initializes the CodeQL tools for scanning.
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v4
+ with:
+ languages: ${{ matrix.language }}
+ # If you wish to specify custom queries, you can do so here or in a config file.
+ # By default, queries listed here will override any specified in a config file.
+ # Prefix the list here with "+" to use these queries and those in the config file.
+
+ # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+ queries: +security-and-quality
+
+
+ # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
+ # If this step fails, then you should remove it and run the build manually (see below)
+ - name: Autobuild
+ uses: github/codeql-action/autobuild@v4
+
+ # Command-line programs to run using the OS shell.
+ # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+
+ # If the Autobuild fails above, remove it and uncomment the following three lines.
+ # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
+
+ # - run: |
+ # echo "Run, Build Application using script"
+ # ./location_of_script_within_repo/buildscript.sh
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v4
+ with:
+ category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/manual-coverage.yml b/.github/workflows/manual-coverage.yml
deleted file mode 100644
index e7b6ebc81..000000000
--- a/.github/workflows/manual-coverage.yml
+++ /dev/null
@@ -1,71 +0,0 @@
-name: Datasketches-Java Manual Coverage Report
-
-on:
- workflow_dispatch:
-
-env:
- MAVEN_OPTS: -Xmx4g -Xms1g
-
-jobs:
- build:
- name: Build, Test, Coverage
- runs-on: ${{matrix.os}}
- strategy:
- fail-fast: false
- matrix:
- jdk: [ 8 ]
- os: [ ubuntu-latest ]
- include:
-# - os: windows-latest
-# skip_javadoc: "`-Dmaven`.javadoc`.skip=true"
-# skip_gpg: "`-Dgpg`.skip=true"
- - os: ubuntu-latest
- skip_javadoc: -Dmaven.javadoc.skip=true
- skip_gpg: -Dgpg.skip=true
-# - os: macos-latest
-# skip_javadoc: -Dmaven.javadoc.skip=true
-# skip_gpg: -Dgpg.skip=true
-
- env:
- JDK_VERSION: ${{ matrix.jdk }}
-
- steps:
- - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
- uses: actions/checkout@v3
- with:
- persist-credentials: false
-
- - name: Cache local Maven repository
- uses: actions/cache@v3
- with:
- path: ~/.m2/repository
- key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: build-${{ runner.os }}-maven-
-
- - name: Install Matrix JDK
- uses: actions/setup-java@v3
- with:
- java-version: ${{ matrix.jdk }}
- distribution: 'temurin'
- java-package: jdk
- architecture: x64
-# Architecture options: x86, x64, armv7, aarch64, ppc64le
-# setup-java@v3 has a "with cache" option
-
- - name: Echo Java Version
- run: >
- java -version
-
- - name: Test, Package, Verify, Coverage Report
- if: ${{ matrix.jdk == 8 && success() }}
- run:
- mvn verify coveralls:report -B
- -DrepoToken=${{secrets.coveralls_token}}
- ${{matrix.os.skip_javadoc}}
- ${{matrix.os.skip_gpg}}
-
-# Lifecycle: validate, compile, test, package, verify, install, deploy
-# Coverage reports are available after the verify phase
-# -B batch mode
-# -V show Version without stopping
-# -q quiet, only show errors
diff --git a/.github/workflows/manual-javadoc.yml b/.github/workflows/manual-javadoc.yml
new file mode 100644
index 000000000..dfcfb1152
--- /dev/null
+++ b/.github/workflows/manual-javadoc.yml
@@ -0,0 +1,36 @@
+name: JavaDoc
+
+on:
+ workflow_dispatch:
+
+jobs:
+ javadoc:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Setup Java
+ uses: actions/setup-java@v4
+ with:
+ java-version: '17'
+ distribution: 'temurin'
+
+ - name: Echo Java Version
+ run: java -version
+
+ - name: Print Current workflow
+ run: >
+ cat .github/workflows/manual-javadoc.yml
+
+ - name: Generate JavaDoc
+ run: mvn clean javadoc:javadoc
+
+ - name: Deploy JavaDoc
+ uses: JamesIves/github-pages-deploy-action@v4.6.8
+ with:
+ token: ${{ secrets.GITHUB_TOKEN }}
+ folder: target/reports/apidocs
+ target-folder: docs/${{ github.ref_name }}
+ branch: gh-pages
diff --git a/.github/workflows/manual-os-matrix.yml b/.github/workflows/manual-os-matrix.yml
deleted file mode 100644
index 2d5537841..000000000
--- a/.github/workflows/manual-os-matrix.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: DataSketches-Java Manual OS Matrix Test & Install
-
-on:
- workflow_dispatch:
-
-env:
- MAVEN_OPTS: -Xmx4g -Xms1g
-
-jobs:
- build:
- name: Build, Test, Install
- runs-on: ${{matrix.os}}
- strategy:
- fail-fast: false
- matrix:
- jdk: [ 8, 11 ]
- os: [ windows-latest, ubuntu-latest, macos-latest ]
- include:
- - os: windows-latest
- skip_javadoc: "`-Dmaven`.javadoc`.skip=true"
- skip_gpg: "`-Dgpg`.skip=true"
- - os: ubuntu-latest
- skip_javadoc: -Dmaven.javadoc.skip=true
- skip_gpg: -Dgpg.skip=true
- - os: macos-latest
- skip_javadoc: -Dmaven.javadoc.skip=true
- skip_gpg: -Dgpg.skip=true
-
- env:
- JDK_VERSION: ${{ matrix.jdk }}
-
- steps:
- - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
- uses: actions/checkout@v3
- with:
- persist-credentials: false
-
- - name: Cache local Maven repository
- uses: actions/cache@v3
- with:
- path: ~/.m2/repository
- key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
- restore-keys: build-${{ runner.os }}-maven-
-
- - name: Install Matrix JDK
- uses: actions/setup-java@v3
- with:
- java-version: ${{ matrix.jdk }}
- distribution: 'temurin'
- java-package: jdk
- architecture: x64
-# Architecture options: x86, x64, armv7, aarch64, ppc64le
-# setup-java@v3 has a "with cache" option
-
- - name: Echo Java Version
- run: >
- java -version
-
- - name: Test
- run: >
- mvn clean test
- ${{matrix.os.skip_javadoc}}
- ${{matrix.os.skip_gpg}}
-
- - name: Install
- run: >
- mvn clean install -B
- ${{matrix.os.skip_javadoc}}
- -D skipTests=true
- ${{matrix.os.skip_gpg}}
-
-# Lifecycle: validate, compile, test, package, verify, install, deploy
-# -B batch mode
-# -V show Version without stopping
-# -q quiet, only show errors
diff --git a/.gitignore b/.gitignore
index 96b25576b..f03c5078f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,9 @@
*.ipr
*.iws
+# VSCode project files
+**/.vscode/
+
# Additional tools
.clover/
diff --git a/README.md b/README.md
index 3190036d1..b6db7f89e 100644
--- a/README.md
+++ b/README.md
@@ -18,8 +18,6 @@
-->
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-java/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-java)
-[![Language grade: Java](https://img.shields.io/lgtm/grade/java/g/apache/datasketches-java.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/datasketches-java/context:java)
-[![Total alerts](https://img.shields.io/lgtm/alerts/g/apache/datasketches-java.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/datasketches-java/alerts/)
[![Coverage Status](https://coveralls.io/repos/github/apache/datasketches-java/badge.svg)](https://coveralls.io/github/apache/datasketches-java)
=================
@@ -27,10 +25,10 @@
# Apache® DataSketches™ Core Java Library Component
This is the core Java component of the DataSketches library. It contains all of the sketching algorithms and can be accessed directly from user applications.
-This component is also a dependency of other components of the library that create adaptors for target systems, such as the [Apache Pig adaptor](https://github.com/apache/datasketches-pig) and the [Apache Hive adaptor](https://github.com/apache/datasketches-hive).
+This component is also a dependency of other components of the library that create adaptors for target systems, such as the [Apache Pig adaptor](https://github.com/apache/datasketches-pig), the [Apache Hive adaptor](https://github.com/apache/datasketches-hive), and others.
Note that we have a parallel core component for C++ and Python implementations of the same sketch algorithms,
-[datasketches-cpp](https://github.com/apache/datasketches-cpp).
+[datasketches-cpp](https://github.com/apache/datasketches-cpp) and [datasketches-python](https://github.com/apache/datasketches-python)
Please visit the main [DataSketches website](https://datasketches.apache.org) for more information.
@@ -41,30 +39,29 @@ If you are interested in making contributions to this site please see our [Commu
## Maven Build Instructions
__NOTE:__ This component accesses resource files for testing. As a result, the directory elements of the full absolute path of the target installation directory must qualify as Java identifiers. In other words, the directory elements must not have any space characters (or non-Java identifier characters) in any of the path elements. This is required by the Oracle Java Specification in order to ensure location-independent access to resources: [See Oracle Location-Independent Access to Resources](https://docs.oracle.com/javase/8/docs/technotes/guides/lang/resources.html)
-### A JDK8 with Hotspot or JDK11 with Hotspot is required to compile
-This component depends on the [datasketches-memory](https://github.com/apache/datasketches-memory) component,
-and, as a result, must be compiled with one of the above JDKs.
-If your application only relies on the APIs of this component no special JVM arguments are required.
-However, if your application also directly relies on the APIs of the *datasketches-memory* component,
-you may need additional JVM arguments.
-Please refer to the [datasketches-memory README](https://github.com/apache/datasketches-memory/blob/master/README.md) for details.
+### JDK17 is required to compile
+This component depends on the [datasketches-memory-4.1.X](https://github.com/apache/datasketches-memory/tree/4.1.X) component,
+and, as a result, must be compiled with JDK17 and this dependency:
-If your application uses Maven, you can also use the *pom.xml* of this component as an example of how to automatically
-configure the JVM arguments for compilation and testing based on the version of the JDK.
+```
+ A Bloom filter is a data structure that can be used for probabilistic
- * set membership. When querying a Bloom filter, there are no false positives. Specifically:
* When querying an item that has already been inserted to the filter, the filter will
diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
index f865a3350..ee17a9918 100644
--- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
+++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
@@ -25,8 +25,8 @@
import org.apache.datasketches.memory.WritableMemory;
/**
- * This class provides methods to help estimate the correct parameters when
- * creating a Bloom filter, and methods to create the filter using those values.
- * MAP: Low significance bytes of this long data structure are on the right. However, the
+ * MAP: Low significance bytes of this long data structure are on the right. However, the
* multi-byte integers (int and long) are stored in native byte order. The byte
- * values are treated as unsigned.
- *
- * An empty FrequentItems only requires 8 bytes. All others require 32 bytes of preamble. - *
+ *An empty FrequentItems only requires 8 bytes. All others require 32 bytes of preamble.
* ** * Long || Start Byte Adr: diff --git a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java index 9fb2ab948..a708e0077 100644 --- a/src/main/java/org/apache/datasketches/hash/MurmurHash3.java +++ b/src/main/java/org/apache/datasketches/hash/MurmurHash3.java @@ -29,10 +29,8 @@ import org.apache.datasketches.memory.Memory; /** - ** The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has * excellent avalanche and 2-way bit independence properties. - *
* ** Austin Appleby's C++ diff --git a/src/main/java/org/apache/datasketches/hash/package-info.java b/src/main/java/org/apache/datasketches/hash/package-info.java index 2d97afeb6..5744b2776 100644 --- a/src/main/java/org/apache/datasketches/hash/package-info.java +++ b/src/main/java/org/apache/datasketches/hash/package-info.java @@ -18,12 +18,11 @@ */ /** - *
The hash package contains a high-performing and extended Java implementations + * The hash package contains a high-performing and extended Java implementations * of Austin Appleby's 128-bit MurmurHash3 hash function originally coded in C. * This core MurmurHash3.java class is used throughout many of the sketch classes for consistency * and as long as the user specifies the same seed will result in coordinated hash operations. * This package also contains an adaptor class that extends the basic class with more functions * commonly associated with hashing. - *
*/ package org.apache.datasketches.hash; diff --git a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java index 5b3e1a4ff..e0c66c0aa 100644 --- a/src/main/java/org/apache/datasketches/hll/DirectHllArray.java +++ b/src/main/java/org/apache/datasketches/hll/DirectHllArray.java @@ -56,7 +56,6 @@ abstract class DirectHllArray extends AbstractHllArray { WritableMemory wmem; Memory mem; Object memObj; - long memAdd; final boolean compact; private static int checkMemCompactFlag(final WritableMemory wmem, final int lgConfigK) { @@ -70,7 +69,6 @@ private static int checkMemCompactFlag(final WritableMemory wmem, final int lgCo this.wmem = wmem; mem = wmem; memObj = wmem.getArray(); - memAdd = wmem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); insertEmptyFlag(wmem, false); } @@ -81,7 +79,6 @@ private static int checkMemCompactFlag(final WritableMemory wmem, final int lgCo wmem = null; this.mem = mem; memObj = ((WritableMemory) mem).getArray(); - memAdd = mem.getCumulativeOffset(0L); compact = extractCompactFlag(mem); } @@ -90,7 +87,6 @@ final void updateMemory(final WritableMemory newWmem) { wmem = newWmem; mem = newWmem; memObj = wmem.getArray(); - memAdd = wmem.getCumulativeOffset(0L); } @Override diff --git a/src/main/java/org/apache/datasketches/hll/package-info.java b/src/main/java/org/apache/datasketches/hll/package-info.java index 9ffafa7bd..a17a9f646 100644 --- a/src/main/java/org/apache/datasketches/hll/package-info.java +++ b/src/main/java/org/apache/datasketches/hll/package-info.java @@ -18,7 +18,7 @@ */ /** - *The DataSketches™ HLL sketch family package
+ *The DataSketches™ HLL sketch family package
* {@link org.apache.datasketches.hll.HllSketch HllSketch} and {@link org.apache.datasketches.hll.Union Union} * are the public facing classes of this high performance implementation of Phillipe Flajolet's * HyperLogLog algorithm[1] but with significantly improved error behavior and important features that can be diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectLongsSketch.java new file mode 100644 index 000000000..bf91baabd --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllDirectLongsSketch.java @@ -0,0 +1,391 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.common.ByteArrayUtil.copyBytes; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR; +import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryMinK; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryN; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts; +import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_EMPTY; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_FULL; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.COMPACT_SINGLE; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; + +import org.apache.datasketches.common.ByteArrayUtil; +import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class implements an off-heap, updatable KllLongsSketch using WritableMemory. + * + *Please refer to the documentation in the package-info:
+ * + * @author Lee Rhodes, Kevin Lang + */ +class KllDirectLongsSketch extends KllLongsSketch { + private WritableMemory wmem; + private MemoryRequestServer memReqSvr; + + /** + * Constructs from Memory or WritableMemory already initialized with a sketch image and validated. + * @param wmem the current WritableMemory + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @param memVal the MemoryValadate object + */ + KllDirectLongsSketch( + final SketchStructure sketchStructure, + final WritableMemory wmem, + final MemoryRequestServer memReqSvr, + final KllMemoryValidate memVal) { + super(sketchStructure); + this.wmem = wmem; + this.memReqSvr = memReqSvr; + readOnly = (wmem != null && wmem.isReadOnly()) || sketchStructure != UPDATABLE; + levelsArr = memVal.levelsArr; //always converted to writable form. + } + + /** + * Create a new updatable, direct instance of this sketch. + * @param k parameter that controls size of the sketch and accuracy of estimates + * @param m parameter that controls the minimum level width in items. + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new instance of this sketch + */ + static KllDirectLongsSketch newDirectUpdatableInstance( + final int k, + final int m, + final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + setMemoryPreInts(dstMem, UPDATABLE.getPreInts()); + setMemorySerVer(dstMem, UPDATABLE.getSerVer()); + setMemoryFamilyID(dstMem, Family.KLL.getID()); + setMemoryK(dstMem, k); + setMemoryM(dstMem, m); + setMemoryN(dstMem, 0); + setMemoryMinK(dstMem, k); + setMemoryNumLevels(dstMem, 1); + int offset = DATA_START_ADR; + //new Levels array + dstMem.putIntArray(offset, new int[] {k, k}, 0, 2); + offset += 2 * Integer.BYTES; + //new min/max array + dstMem.putLongArray(offset, new long[] {Long.MAX_VALUE, Long.MIN_VALUE}, 0, 2); + offset += 2 * ITEM_BYTES; + //new empty items array + dstMem.putLongArray(offset, new long[k], 0, k); + + final KllMemoryValidate memVal = new KllMemoryValidate(dstMem, LONGS_SKETCH, null); + final WritableMemory wMem = dstMem; + return new KllDirectLongsSketch(UPDATABLE, wMem, memReqSvr, memVal); + } + + //End of Constructors + + @Override + String getItemAsString(final int index) { + if (isEmpty()) { return "Null"; } + return Long.toString(getLongItemsArray()[index]); + } + + @Override + public int getK() { + return getMemoryK(wmem); + } + + //MinMax Methods + + @Override + public long getMaxItem() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + return wmem.getLong(offset); + } + + @Override + long getMaxItemInternal() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Long.MAX_VALUE; } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + return wmem.getLong(offset); + } + + @Override + String getMaxItemAsString() { + final long maxItem = getMaxItemInternal(); + return Long.toString(maxItem); + } + + @Override + public long getMinItem() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + return wmem.getLong(offset); + } + + @Override + long getMinItemInternal() { + if (sketchStructure == COMPACT_EMPTY || isEmpty()) { return Long.MAX_VALUE; } + if (sketchStructure == COMPACT_SINGLE) { return getLongSingleItem(); } + //either compact-full or updatable + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + return wmem.getLong(offset); + } + + @Override + String getMinItemAsString() { + final long minItem = getMinItemInternal(); + return Long.toString(minItem); + } + + @Override + void setMaxItem(final long item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + ITEM_BYTES; + wmem.putLong(offset, item); + } + + @Override + void setMinItem(final long item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + wmem.putLong(offset, item); + } + + //END MinMax Methods + + @Override + public long getN() { + if (sketchStructure == COMPACT_EMPTY) { return 0; } + else if (sketchStructure == COMPACT_SINGLE) { return 1; } + else { return getMemoryN(wmem); } + } + + //other restricted + + @Override //returns updatable, expanded array including free space at bottom + long[] getLongItemsArray() { + final int k = getK(); + if (sketchStructure == COMPACT_EMPTY) { return new long[k]; } + if (sketchStructure == COMPACT_SINGLE) { + final long[] itemsArr = new long[k]; + itemsArr[k - 1] = getLongSingleItem(); + return itemsArr; + } + final int capacityItems = KllHelper.computeTotalItemCapacity(k, getM(), getNumLevels()); + final long[] longItemsArr = new long[capacityItems]; + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES; + final int shift = (sketchStructure == COMPACT_FULL) ? levelsArr[0] : 0; + final int numItems = (sketchStructure == COMPACT_FULL) ? getNumRetained() : capacityItems; + wmem.getLongArray(offset, longItemsArr, shift, numItems); + return longItemsArr; + } + + @Override //returns compact items array of retained items, no free space. + long[] getLongRetainedItemsArray() { + if (sketchStructure == COMPACT_EMPTY) { return new long[0]; } + if (sketchStructure == COMPACT_SINGLE) { return new long[] { getLongSingleItem() }; } + final int numRetained = getNumRetained(); + final long[] longItemsArr = new long[numRetained]; + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES + + (sketchStructure == COMPACT_FULL ? 0 : levelsArr[0] * ITEM_BYTES); + wmem.getLongArray(offset, longItemsArr, 0, numRetained); + return longItemsArr; + } + + @Override + long getLongSingleItem() { + if (!isSingleItem()) { throw new SketchesArgumentException(NOT_SINGLE_ITEM_MSG); } + if (sketchStructure == COMPACT_SINGLE) { + return wmem.getLong(DATA_START_ADR_SINGLE_ITEM); + } + final int offset; + if (sketchStructure == COMPACT_FULL) { + offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES; + } else { //sketchStructure == UPDATABLE + offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (2 + getK() - 1) * ITEM_BYTES; + } + return wmem.getLong(offset); + } + + @Override + int getM() { + return getMemoryM(wmem); + } + + @Override + MemoryRequestServer getMemoryRequestServer() { return memReqSvr; } + + @Override + int getMinK() { + if (sketchStructure == COMPACT_FULL || sketchStructure == UPDATABLE) { return getMemoryMinK(wmem); } + return getK(); + } + + @Override + byte[] getMinMaxByteArr() { + final byte[] bytesOut = new byte[2 * ITEM_BYTES]; + if (sketchStructure == COMPACT_EMPTY) { + ByteArrayUtil.putLongLE(bytesOut, 0, Long.MAX_VALUE); + ByteArrayUtil.putLongLE(bytesOut, ITEM_BYTES, Long.MIN_VALUE); + return bytesOut; + } + final int offset; + if (sketchStructure == COMPACT_SINGLE) { + offset = DATA_START_ADR_SINGLE_ITEM; + wmem.getByteArray(offset, bytesOut, 0, ITEM_BYTES); + copyBytes(bytesOut, 0, bytesOut, ITEM_BYTES, ITEM_BYTES); + return bytesOut; + } + //sketchStructure == UPDATABLE OR COMPACT_FULL + offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure); + wmem.getByteArray(offset, bytesOut, 0, ITEM_BYTES); + wmem.getByteArray(offset + ITEM_BYTES, bytesOut, ITEM_BYTES, ITEM_BYTES); + return bytesOut; + } + + @Override + byte[] getRetainedItemsByteArr() { + if (sketchStructure == COMPACT_EMPTY) { return new byte[0]; } + final long[] lngArr = getLongRetainedItemsArray(); + final byte[] lngByteArr = new byte[lngArr.length * ITEM_BYTES]; + final WritableMemory wmem2 = WritableMemory.writableWrap(lngByteArr); + wmem2.putLongArray(0, lngArr, 0, lngArr.length); + return lngByteArr; + } + + @Override + byte[] getTotalItemsByteArr() { + final long[] lngArr = getLongItemsArray(); + final byte[] lngByteArr = new byte[lngArr.length * ITEM_BYTES]; + final WritableMemory wmem2 = WritableMemory.writableWrap(lngByteArr); + wmem2.putLongArray(0, lngArr, 0, lngArr.length); + return lngByteArr; + } + + @Override + WritableMemory getWritableMemory() { + return wmem; + } + + @Override + void incN(final int increment) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryN(wmem, getMemoryN(wmem) + increment); + } + + @Override + void incNumLevels() { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + int numLevels = getMemoryNumLevels(wmem); + setMemoryNumLevels(wmem, ++numLevels); + } + + @Override + boolean isLevelZeroSorted() { + return getMemoryLevelZeroSortedFlag(wmem); + } + + @Override + void setLongItemsArray(final long[] longItems) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + 2 * ITEM_BYTES; + wmem.putLongArray(offset, longItems, 0, longItems.length); + } + + @Override + void setLongItemsArrayAt(final int index, final long item) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = + DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (index + 2) * ITEM_BYTES; + wmem.putLong(offset, item); + } + + @Override + void setLongItemsArrayAt(final int index, final long[] items, final int srcOffset, final int length) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int offset = DATA_START_ADR + getLevelsArrSizeBytes(sketchStructure) + (index + 2) * ITEM_BYTES; + wmem.putLongArray(offset, items, srcOffset, length); + } + + @Override + void setLevelZeroSorted(final boolean sorted) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryLevelZeroSortedFlag(wmem, sorted); + } + + @Override + void setMinK(final int minK) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryMinK(wmem, minK); + } + + @Override + void setN(final long n) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryN(wmem, n); + } + + @Override + void setNumLevels(final int numLevels) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + setMemoryNumLevels(wmem, numLevels); + } + + @Override + void setWritableMemory(final WritableMemory wmem) { + this.wmem = wmem; + } + + final static class KllDirectCompactLongsSketch extends KllDirectLongsSketch { + + KllDirectCompactLongsSketch( + final SketchStructure sketchStructure, + final Memory srcMem, + final KllMemoryValidate memVal) { + super(sketchStructure, (WritableMemory) srcMem, null, memVal); + } + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java index 67035b45f..acbecdf07 100644 --- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java @@ -312,6 +312,7 @@ private static void randomlyHalveUpDoubles(final double[] buf, final int start, /** * Compression algorithm used to merge higher levels. + * *
+ * {@link org.apache.datasketches.kll}Here is what we do for each level:
*
The parameter k will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java index 50cadeb3e..69045f78c 100644 --- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java +++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java @@ -312,6 +312,7 @@ private static void randomlyHalveUpFloats(final float[] buf, final int start, fi /** * Compression algorithm used to merge higher levels. + * *Here is what we do for each level:
*The parameter k will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java index e58516fd3..87e3a542a 100644 --- a/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java +++ b/src/main/java/org/apache/datasketches/kll/KllHeapItemsSketch.java @@ -36,7 +36,7 @@ import org.apache.datasketches.memory.WritableMemory; /** - * This class implements an on-heap doubles KllSketch. + * This class implements an on-heap items KllSketch. * *Please refer to the documentation in the package-info:
* {@link org.apache.datasketches.kll}
Please refer to the documentation in the package-info:
+ * {@link org.apache.datasketches.kll}
Here is what we do for each level:
*Here is what we do for each level:
+ *It can be proved that generalCompress returns a sketch that satisfies the space constraints + * no matter how much data is passed in. + * We are pretty sure that it works correctly when inBuf and outBuf are the same. + * All levels except for level zero must be sorted before calling this, and will still be + * sorted afterwards. + * Level zero is not required to be sorted before, and may not be sorted afterwards.
+ * + *This trashes inBuf and inLevels and modifies outBuf and outLevels.
+ * + * @param k The sketch parameter k + * @param m The minimum level size + * @param numLevelsIn provisional number of number of levels = max(this.numLevels, other.numLevels) + * @param inBuf work buffer of size = this.getNumRetained() + other.getNumRetainedAboveLevelZero(). + * This contains the long[] of the other sketch + * @param inLevels work levels array size = ubOnNumLevels(this.n + other.n) + 2 + * @param outBuf the same array as inBuf + * @param outLevels the same size as inLevels + * @param isLevelZeroSorted true if this.level 0 is sorted + * @param random instance of java.util.Random + * @return int array of: {numLevels, targetItemCount, currentItemCount) + */ + // + private static int[] generalLongsCompress( + final int k, + final int m, + final int numLevelsIn, + final long[] inBuf, + final int[] inLevels, + final long[] outBuf, + final int[] outLevels, + final boolean isLevelZeroSorted, + final Random random) { + assert numLevelsIn > 0; // things are too weird if zero levels are allowed + int numLevels = numLevelsIn; + int currentItemCount = inLevels[numLevels] - inLevels[0]; // decreases with each compaction + int targetItemCount = KllHelper.computeTotalItemCapacity(k, m, numLevels); // increases if we add levels + boolean doneYet = false; + outLevels[0] = 0; + int curLevel = -1; + while (!doneYet) { + curLevel++; // start out at level 0 + + // If we are at the current top level, add an empty level above it for convenience, + // but do not actually increment numLevels until later + if (curLevel == (numLevels - 1)) { + inLevels[curLevel + 2] = inLevels[curLevel + 1]; + } + + final int rawBeg = inLevels[curLevel]; + final int rawLim = inLevels[curLevel + 1]; + final int rawPop = rawLim - rawBeg; + + if ((currentItemCount < targetItemCount) || (rawPop < KllHelper.levelCapacity(k, numLevels, curLevel, m))) { + // copy level over as is + // because inBuf and outBuf could be the same, make sure we are not moving data upwards! + assert (rawBeg >= outLevels[curLevel]); + System.arraycopy(inBuf, rawBeg, outBuf, outLevels[curLevel], rawPop); + outLevels[curLevel + 1] = outLevels[curLevel] + rawPop; + } + else { + // The sketch is too full AND this level is too full, so we compact it + // Note: this can add a level and thus change the sketch's capacity + + final int popAbove = inLevels[curLevel + 2] - rawLim; + final boolean oddPop = isOdd(rawPop); + final int adjBeg = oddPop ? 1 + rawBeg : rawBeg; + final int adjPop = oddPop ? rawPop - 1 : rawPop; + final int halfAdjPop = adjPop / 2; + + if (oddPop) { // copy one guy over + outBuf[outLevels[curLevel]] = inBuf[rawBeg]; + outLevels[curLevel + 1] = outLevels[curLevel] + 1; + } else { // copy zero guys over + outLevels[curLevel + 1] = outLevels[curLevel]; + } + + // level zero might not be sorted, so we must sort it if we wish to compact it + if ((curLevel == 0) && !isLevelZeroSorted) { + Arrays.sort(inBuf, adjBeg, adjBeg + adjPop); + } + + if (popAbove == 0) { // Level above is empty, so halve up + randomlyHalveUpLongs(inBuf, adjBeg, adjPop, random); + } else { // Level above is nonempty, so halve down, then merge up + randomlyHalveDownLongs(inBuf, adjBeg, adjPop, random); + mergeSortedLongArrays(inBuf, adjBeg, halfAdjPop, inBuf, rawLim, popAbove, inBuf, adjBeg + halfAdjPop); + } + + // track the fact that we just eliminated some data + currentItemCount -= halfAdjPop; + + // Adjust the boundaries of the level above + inLevels[curLevel + 1] = inLevels[curLevel + 1] - halfAdjPop; + + // Increment numLevels if we just compacted the old top level + // This creates some more capacity (the size of the new bottom level) + if (curLevel == (numLevels - 1)) { + numLevels++; + targetItemCount += KllHelper.levelCapacity(k, numLevels, 0, m); + } + } // end of code for compacting a level + + // determine whether we have processed all levels yet (including any new levels that we created) + if (curLevel == (numLevels - 1)) { doneYet = true; } + } // end of loop over levels + + assert (outLevels[numLevels] - outLevels[0]) == currentItemCount; + return new int[] {numLevels, targetItemCount, currentItemCount}; + } + + private static void populateLongWorkArrays( //workBuf and workLevels are modified + final long[] workBuf, final int[] workLevels, final int provisionalNumLevels, + final int myCurNumLevels, final int[] myCurLevelsArr, final long[] myCurLongItemsArr, + final int otherNumLevels, final int[] otherLevelsArr, final long[] otherLongItemsArr) { + + workLevels[0] = 0; + + // Note: the level zero data from "other" was already inserted into "self". + // This copies into workbuf. + final int selfPopZero = KllHelper.currentLevelSizeItems(0, myCurNumLevels, myCurLevelsArr); + System.arraycopy(myCurLongItemsArr, myCurLevelsArr[0], workBuf, workLevels[0], selfPopZero); + workLevels[1] = workLevels[0] + selfPopZero; + + for (int lvl = 1; lvl < provisionalNumLevels; lvl++) { + final int selfPop = KllHelper.currentLevelSizeItems(lvl, myCurNumLevels, myCurLevelsArr); + final int otherPop = KllHelper.currentLevelSizeItems(lvl, otherNumLevels, otherLevelsArr); + workLevels[lvl + 1] = workLevels[lvl] + selfPop + otherPop; + assert selfPop >= 0 && otherPop >= 0; + if (selfPop == 0 && otherPop == 0) { continue; } + if (selfPop > 0 && otherPop == 0) { + System.arraycopy(myCurLongItemsArr, myCurLevelsArr[lvl], workBuf, workLevels[lvl], selfPop); + } + else if (selfPop == 0 && otherPop > 0) { + System.arraycopy(otherLongItemsArr, otherLevelsArr[lvl], workBuf, workLevels[lvl], otherPop); + } + else if (selfPop > 0 && otherPop > 0) { + mergeSortedLongArrays( //only workBuf is modified + myCurLongItemsArr, myCurLevelsArr[lvl], selfPop, + otherLongItemsArr, otherLevelsArr[lvl], otherPop, + workBuf, workLevels[lvl]); + } + } + } + + /* + * Validation Method. + * The following must be enabled for use with the KllDoublesValidationTest, + * which is only enabled for manual testing. In addition, two Validation Methods + * above need to be modified. + */ //NOTE Validation Method: Need to uncomment to use + // static int nextOffset = 0; + // + // private static int deterministicOffset() { + // final int result = nextOffset; + // nextOffset = 1 - nextOffset; + // return result; + // } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java new file mode 100644 index 000000000..f5688ad70 --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllLongsSketch.java @@ -0,0 +1,670 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.max; +import static java.lang.Math.min; +import static org.apache.datasketches.common.ByteArrayUtil.putLongLE; +import static org.apache.datasketches.kll.KllSketch.SketchStructure.UPDATABLE; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; + +import java.util.Arrays; +import java.util.Objects; + +import org.apache.datasketches.common.ArrayOfItemsSerDe; +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.common.SuppressFBWarnings; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSketchSortedView; +import org.apache.datasketches.quantilescommon.QuantileSearchCriteria; +import org.apache.datasketches.quantilescommon.QuantilesLongsAPI; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; + +/** + * This variation of the KllSketch implements primitive longs. + * + * @see org.apache.datasketches.kll.KllSketch + */ +public abstract class KllLongsSketch extends KllSketch implements QuantilesLongsAPI { + private LongsSketchSortedView longsSV = null; + final static int ITEM_BYTES = Long.BYTES; + + KllLongsSketch( + final SketchStructure sketchStructure) { + super(SketchType.LONGS_SKETCH, sketchStructure); + } + + //Factories for new heap instances. + + /** + * Create a new heap instance of this sketch with the default k = 200. + * The default k = 200 results in a normalized rank error of about + * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). + * @return new KllLongsSketch on the Java heap. + */ + public static KllLongsSketch newHeapInstance() { + return newHeapInstance(DEFAULT_K); + } + + /** + * Create a new heap instance of this sketch with a given parameter k. + * k can be between 8, inclusive, and 65535, inclusive. + * The default k = 200 results in a normalized rank error of about + * 1.65%. Larger K will have smaller error but the sketch will be larger (and slower). + * @param k parameter that controls size of the sketch and accuracy of estimates. + * @return new KllLongsSketch on the Java heap. + */ + public static KllLongsSketch newHeapInstance(final int k) { + return new KllHeapLongsSketch(k, DEFAULT_M); + } + + //Factories for new direct instances. + + /** + * Create a new direct updatable instance of this sketch with the default k. + * The default k = 200 results in a normalized rank error of about + * 1.65%. Larger k will have smaller error but the sketch will be larger (and slower). + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new direct instance of this sketch + */ + public static KllLongsSketch newDirectInstance( + final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + return newDirectInstance(DEFAULT_K, dstMem, memReqSvr); + } + + /** + * Create a new direct updatable instance of this sketch with a given k. + * @param k parameter that controls size of the sketch and accuracy of estimates. + * @param dstMem the given destination WritableMemory object for use by the sketch + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return a new direct instance of this sketch + */ + public static KllLongsSketch newDirectInstance( + final int k, + final WritableMemory dstMem, + final MemoryRequestServer memReqSvr) { + Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null"); + Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); + return KllDirectLongsSketch.newDirectUpdatableInstance(k, DEFAULT_M, dstMem, memReqSvr); + } + + //Factory to create an heap instance from a Memory image + + /** + * Factory heapify takes a compact sketch image in Memory and instantiates an on-heap sketch. + * The resulting sketch will not retain any link to the source Memory. + * @param srcMem a compact Memory image of a sketch serialized by this sketch. + * See Memory + * @return a heap-based sketch based on the given Memory. + */ + public static KllLongsSketch heapify(final Memory srcMem) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + return KllHeapLongsSketch.heapifyImpl(srcMem); + } + + //Factory to wrap a Read-Only Memory + + /** + * Wrap a sketch around the given read only compact source Memory containing sketch data + * that originated from this sketch. + * @param srcMem the read only source Memory + * @return instance of this sketch + */ + public static KllLongsSketch wrap(final Memory srcMem) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, LONGS_SKETCH, null); + if (memVal.sketchStructure == UPDATABLE) { + final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); //dummy + return new KllDirectLongsSketch(memVal.sketchStructure, (WritableMemory)srcMem, memReqSvr, memVal); + } else { + return new KllDirectCompactLongsSketch(memVal.sketchStructure, srcMem, memVal); + } + } + + //Factory to wrap a WritableMemory image + + /** + * Wrap a sketch around the given source Writable Memory containing sketch data + * that originated from this sketch. + * @param srcMem a WritableMemory that contains data. + * @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory + * @return instance of this sketch + */ + public static KllLongsSketch writableWrap( + final WritableMemory srcMem, + final MemoryRequestServer memReqSvr) { + Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null"); + Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null"); + final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, LONGS_SKETCH); + if (memVal.sketchStructure == UPDATABLE) { + return new KllDirectLongsSketch(UPDATABLE, srcMem, memReqSvr, memVal); + } else { + return new KllDirectCompactLongsSketch(memVal.sketchStructure, srcMem, memVal); + } + } + + //END of Constructors + + @Override + public double[] getCDF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getCDF(splitPoints, searchCrit); + } + + @Override + public double[] getPMF(final long[] splitPoints, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getPMF(splitPoints, searchCrit); + } + + @Override + public long getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getQuantile(rank, searchCrit); + } + + @Override + public long[] getQuantiles(final double[] ranks, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + final int len = ranks.length; + final long[] quantiles = new long[len]; + for (int i = 0; i < len; i++) { + quantiles[i] = longsSV.getQuantile(ranks[i], searchCrit); + } + return quantiles; + } + + /** + * {@inheritDoc} + * The approximate probability that the true quantile is within the confidence interval + * specified by the upper and lower quantile bounds for this sketch is 0.99. + */ + @Override + public long getQuantileLowerBound(final double rank) { + return getQuantile(max(0, rank - KllHelper.getNormalizedRankError(getMinK(), false))); + } + + /** + * {@inheritDoc} + * The approximate probability that the true quantile is within the confidence interval + * specified by the upper and lower quantile bounds for this sketch is 0.99. + */ + @Override + public long getQuantileUpperBound(final double rank) { + return getQuantile(min(1.0, rank + KllHelper.getNormalizedRankError(getMinK(), false))); + } + + @Override + public double getRank(final long quantile, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + return longsSV.getRank(quantile, searchCrit); + } + + /** + * {@inheritDoc} + * The approximate probability that the true rank is within the confidence interval + * specified by the upper and lower rank bounds for this sketch is 0.99. + */ + @Override + public double getRankLowerBound(final double rank) { + return max(0.0, rank - KllHelper.getNormalizedRankError(getMinK(), false)); + } + + /** + * {@inheritDoc} + * The approximate probability that the true rank is within the confidence interval + * specified by the upper and lower rank bounds for this sketch is 0.99. + */ + @Override + public double getRankUpperBound(final double rank) { + return min(1.0, rank + KllHelper.getNormalizedRankError(getMinK(), false)); + } + + @Override + public double[] getRanks(final long[] quantiles, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + refreshSortedView(); + final int len = quantiles.length; + final double[] ranks = new double[len]; + for (int i = 0; i < len; i++) { + ranks[i] = longsSV.getRank(quantiles[i], searchCrit); + } + return ranks; + } + + @Override + public QuantilesLongsSketchIterator iterator() { + return new KllLongsSketchIterator( + getLongItemsArray(), getLevelsArray(SketchStructure.UPDATABLE), getNumLevels()); + } + + @Override + public final void merge(final KllSketch other) { + if (readOnly || sketchStructure != UPDATABLE) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (this == other) { throw new SketchesArgumentException(SELF_MERGE_MSG); } + final KllLongsSketch otherLngSk = (KllLongsSketch)other; + if (otherLngSk.isEmpty()) { return; } + KllLongsHelper.mergeLongsImpl(this, otherLngSk); + longsSV = null; + } + + /** + * {@inheritDoc} + * + *The parameter k will not change.
+ */ + @Override + public final void reset() { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + final int k = getK(); + setN(0); + setMinK(k); + setNumLevels(1); + setLevelZeroSorted(false); + setLevelsArray(new int[] {k, k}); + setMinItem(Long.MAX_VALUE); + setMaxItem(Long.MIN_VALUE); + setLongItemsArray(new long[k]); + longsSV = null; + } + + @Override + public byte[] toByteArray() { + return KllHelper.toByteArray(this, false); + } + + @Override + public String toString(final boolean withLevels, final boolean withLevelsAndItems) { + KllSketch sketch = this; + if (withLevelsAndItems && sketchStructure != UPDATABLE) { + final Memory mem = getWritableMemory(); + assert mem != null; + sketch = KllLongsSketch.heapify(getWritableMemory()); + } + return KllHelper.toStringImpl(sketch, withLevels, withLevelsAndItems, getSerDe()); + } + + //SINGLE UPDATE + + @Override + public void update(final long item) { + // Align with KllDoublesSketch + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + updateLong(this, item); + longsSV = null; + } + + //Also Called from KllLongsHelper::merge + static void updateLong(final KllLongsSketch lngSk, final long item) { + lngSk.updateMinMax(item); + int freeSpace = lngSk.levelsArr[0]; + assert (freeSpace >= 0); + if (freeSpace == 0) { + KllLongsHelper.compressWhileUpdatingSketch(lngSk); + freeSpace = lngSk.levelsArr[0]; + assert (freeSpace > 0); + } + lngSk.incN(1); + lngSk.setLevelZeroSorted(false); + final int nextPos = freeSpace - 1; + lngSk.setLevelsArrayAt(0, nextPos); + lngSk.setLongItemsArrayAt(nextPos, item); + } + + /** + * Single update of min and max + * @param item the source item, it must not be a NaN. + */ + final void updateMinMax(final long item) { + if (isEmpty()) { + setMinItem(item); + setMaxItem(item); + } else { + setMinItem(min(getMinItemInternal(), item)); + setMaxItem(max(getMaxItemInternal(), item)); + } + } + + //WEIGHTED UPDATE + + /** + * Weighted update. Updates this sketch with the given item the number of times specified by the given integer weight. + * @param item the item to be repeated. NaNs are ignored. + * @param weight the number of times the update of item is to be repeated. It must be ≥ one. + */ + public void update(final long item, final long weight) { + // + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (weight < 1L) { throw new SketchesArgumentException("Weight is less than one."); } + if (weight == 1L) { updateLong(this, item); } + else { + if (weight < levelsArr[0]) { + for (int i = 0; i < (int)weight; i++) { updateLong(this, item); } + } else { + final KllHeapLongsSketch tmpSk = new KllHeapLongsSketch(getK(), DEFAULT_M, item, weight); + merge(tmpSk); + } + } + longsSV = null; + } + + // VECTOR UPDATE + + /** + * Vector update. Updates this sketch with the given array (vector) of items, starting at the items + * offset for a length number of items. This is not supported for direct sketches. + * @param items the vector of items + * @param offset the starting index of the items[] array + * @param length the number of items + */ + public void update(final long[] items, final int offset, final int length) { + if (readOnly) { throw new SketchesArgumentException(TGT_IS_READ_ONLY_MSG); } + if (length == 0) { return; } + updateLong(items, offset, length); + longsSV = null; + } + /* Align with KllDoublesSketch + + + + + + + + + + + + + */ + private void updateLong(final long[] srcItems, final int srcOffset, final int length) { + if (isEmpty()) { + setMinItem(srcItems[srcOffset]); //initialize with a real value + setMaxItem(srcItems[srcOffset]); + } + + int count = 0; + while (count < length) { + if (levelsArr[0] == 0) { + KllLongsHelper.compressWhileUpdatingSketch(this); + } + final int spaceNeeded = length - count; + final int freeSpace = levelsArr[0]; + assert (freeSpace > 0); + final int numItemsToCopy = min(spaceNeeded, freeSpace); + final int dstOffset = freeSpace - numItemsToCopy; + final int localSrcOffset = srcOffset + count; + setLongItemsArrayAt(dstOffset, srcItems, localSrcOffset, numItemsToCopy); + updateMinMax(srcItems, localSrcOffset, numItemsToCopy); + count += numItemsToCopy; + incN(numItemsToCopy); + setLevelsArrayAt(0, dstOffset); + } + setLevelZeroSorted(false); + } + + /** + * Vector update of min and max. + * @param srcItems the input source array of values, no NaNs allowed. + * @param srcOffset the starting offset in srcItems + * @param length the number of items to update min and max + */ + private void updateMinMax(final long[] srcItems, final int srcOffset, final int length) { + final int end = srcOffset + length; + for (int i = srcOffset; i < end; i++) { + setMinItem(min(getMinItemInternal(), srcItems[i])); + setMaxItem(max(getMaxItemInternal(), srcItems[i])); + } + } + /* Align with KllDoublesSketch + + + + + + + + + */ + // END ALL UPDATE METHODS + + /** + * @return full size of internal items array including empty space at bottom. + */ + abstract long[] getLongItemsArray(); + + /** + * @return items array of retained items. + */ + abstract long[] getLongRetainedItemsArray(); + + abstract long getLongSingleItem(); + + // Min & Max Methods + + abstract long getMaxItemInternal(); + + abstract void setMaxItem(long item); + + abstract long getMinItemInternal(); + + abstract void setMinItem(long item); + + @Override + abstract byte[] getMinMaxByteArr(); + + @Override + int getMinMaxSizeBytes() { + return Long.BYTES * 2; + } + + //END Min & Max Methods + + @Override + abstract byte[] getRetainedItemsByteArr(); + + @Override + int getRetainedItemsSizeBytes() { + return getNumRetained() * Long.BYTES; + } + + @Override + ArrayOfItemsSerDe> getSerDe() { return null; } + + @Override + final byte[] getSingleItemByteArr() { + final byte[] bytes = new byte[ITEM_BYTES]; + putLongLE(bytes, 0, getLongSingleItem()); + return bytes; + } + + @Override + int getSingleItemSizeBytes() { + return Long.BYTES; + } + + @Override + abstract byte[] getTotalItemsByteArr(); + + @Override + int getTotalItemsNumBytes() { + return levelsArr[getNumLevels()] * Long.BYTES; + } + + abstract void setLongItemsArray(long[] longItems); + + abstract void setLongItemsArrayAt(int index, long item); + + abstract void setLongItemsArrayAt(int dstIndex, long[] srcItems, int srcOffset, int length); + + // SORTED VIEW + + @Override + @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "OK in this case.") + public LongsSketchSortedView getSortedView() { + refreshSortedView(); + return longsSV; + } + + private final LongsSketchSortedView refreshSortedView() { + if (longsSV == null) { + final CreateSortedView csv = new CreateSortedView(); + longsSV = csv.getSV(); + } + return longsSV; + } + + private final class CreateSortedView { + long[] quantiles; + long[] cumWeights; + + LongsSketchSortedView getSV() { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final long[] srcQuantiles = getLongItemsArray(); + final int[] srcLevels = levelsArr; + final int srcNumLevels = getNumLevels(); + + if (!isLevelZeroSorted()) { + Arrays.sort(srcQuantiles, srcLevels[0], srcLevels[1]); + if (!hasMemory()) { setLevelZeroSorted(true); } + //we don't sort level0 in Memory, only our copy. + } + final int numQuantiles = getNumRetained(); + quantiles = new long[numQuantiles]; + cumWeights = new long[numQuantiles]; + populateFromSketch(srcQuantiles, srcLevels, srcNumLevels, numQuantiles); + return new LongsSketchSortedView( + quantiles, cumWeights, KllLongsSketch.this); + } + + private void populateFromSketch(final long[] srcQuantiles, final int[] srcLevels, + final int srcNumLevels, final int numItems) { + final int[] myLevels = new int[srcNumLevels + 1]; + final int offset = srcLevels[0]; + System.arraycopy(srcQuantiles, offset, quantiles, 0, numItems); + int srcLevel = 0; + int dstLevel = 0; + long weight = 1; + while (srcLevel < srcNumLevels) { + final int fromIndex = srcLevels[srcLevel] - offset; + final int toIndex = srcLevels[srcLevel + 1] - offset; // exclusive + if (fromIndex < toIndex) { // if equal, skip empty level + Arrays.fill(cumWeights, fromIndex, toIndex, weight); + myLevels[dstLevel] = fromIndex; + myLevels[dstLevel + 1] = toIndex; + dstLevel++; + } + srcLevel++; + weight *= 2; + } + final int numLevels = dstLevel; + blockyTandemMergeSort(quantiles, cumWeights, myLevels, numLevels); //create unit weights + KllHelper.convertToCumulative(cumWeights); + } + } //End of class CreateSortedView + + private static void blockyTandemMergeSort(final long[] quantiles, final long[] weights, + final int[] levels, final int numLevels) { + if (numLevels == 1) { return; } + + // duplicate the input in preparation for the "ping-pong" copy reduction strategy. + final long[] quantilesTmp = Arrays.copyOf(quantiles, quantiles.length); + final long[] weightsTmp = Arrays.copyOf(weights, quantiles.length); // don't need the extra one + + blockyTandemMergeSortRecursion(quantilesTmp, weightsTmp, quantiles, weights, levels, 0, numLevels); + } + + private static void blockyTandemMergeSortRecursion( + final long[] quantilesSrc, final long[] weightsSrc, + final long[] quantilesDst, final long[] weightsDst, + final int[] levels, final int startingLevel, final int numLevels) { + if (numLevels == 1) { return; } + final int numLevels1 = numLevels / 2; + final int numLevels2 = numLevels - numLevels1; + assert numLevels1 >= 1; + assert numLevels2 >= numLevels1; + final int startingLevel1 = startingLevel; + final int startingLevel2 = startingLevel + numLevels1; + // swap roles of src and dst + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel1, numLevels1); + blockyTandemMergeSortRecursion( + quantilesDst, weightsDst, + quantilesSrc, weightsSrc, + levels, startingLevel2, numLevels2); + tandemMerge( + quantilesSrc, weightsSrc, + quantilesDst, weightsDst, + levels, + startingLevel1, numLevels1, + startingLevel2, numLevels2); + } + + private static void tandemMerge( + final long[] quantilesSrc, final long[] weightsSrc, + final long[] quantilesDst, final long[] weightsDst, + final int[] levelStarts, + final int startingLevel1, final int numLevels1, + final int startingLevel2, final int numLevels2) { + final int fromIndex1 = levelStarts[startingLevel1]; + final int toIndex1 = levelStarts[startingLevel1 + numLevels1]; // exclusive + final int fromIndex2 = levelStarts[startingLevel2]; + final int toIndex2 = levelStarts[startingLevel2 + numLevels2]; // exclusive + int iSrc1 = fromIndex1; + int iSrc2 = fromIndex2; + int iDst = fromIndex1; + + while (iSrc1 < toIndex1 && iSrc2 < toIndex2) { + if (quantilesSrc[iSrc1] < quantilesSrc[iSrc2]) { + quantilesDst[iDst] = quantilesSrc[iSrc1]; + weightsDst[iDst] = weightsSrc[iSrc1]; + iSrc1++; + } else { + quantilesDst[iDst] = quantilesSrc[iSrc2]; + weightsDst[iDst] = weightsSrc[iSrc2]; + iSrc2++; + } + iDst++; + } + if (iSrc1 < toIndex1) { + System.arraycopy(quantilesSrc, iSrc1, quantilesDst, iDst, toIndex1 - iSrc1); + System.arraycopy(weightsSrc, iSrc1, weightsDst, iDst, toIndex1 - iSrc1); + } else if (iSrc2 < toIndex2) { + System.arraycopy(quantilesSrc, iSrc2, quantilesDst, iDst, toIndex2 - iSrc2); + System.arraycopy(weightsSrc, iSrc2, weightsDst, iDst, toIndex2 - iSrc2); + } + } + + // END SORTED VIEW + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java b/src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java new file mode 100644 index 000000000..8922ec70e --- /dev/null +++ b/src/main/java/org/apache/datasketches/kll/KllLongsSketchIterator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; + +/** + * Iterator over KllLongsSketch. The order is not defined. + */ +public final class KllLongsSketchIterator extends KllSketchIterator implements QuantilesLongsSketchIterator { + private final long[] quantiles; + + KllLongsSketchIterator(final long[] quantiles, final int[] levelsArr, final int numLevels) { + super(levelsArr, numLevels); + this.quantiles = quantiles; + } + + @Override + public long getQuantile() { + return quantiles[index]; + } + +} diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java index 18e4de3f0..f2c3847ae 100644 --- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java +++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java @@ -28,6 +28,7 @@ import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH; import static org.apache.datasketches.kll.KllSketch.SketchType.ITEMS_SKETCH; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; import java.util.Objects; @@ -210,7 +211,7 @@ static String toString(final Memory mem, final SketchType sketchType, final bool * Used primarily in testing. * * @param mem the given Memory - * @param sketchType the sketch type: FLOATS_SKETCH, DOUBLES_SKETCH, or ITEMS_SKETCH. + * @param sketchType the sketch type: FLOATS_SKETCH, DOUBLES_SKETCH, LONGS_SKETCH, or ITEMS_SKETCH. * @param includeData if true, includes detail of retained data. * @param serDe must be supplied for KllItemsSketch, otherwise can be null. * @return the summary string. @@ -226,10 +227,9 @@ staticThe parameter k will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java index 155001c87..f56c3b352 100644 --- a/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java +++ b/src/main/java/org/apache/datasketches/quantiles/UpdateDoublesSketch.java @@ -49,6 +49,13 @@ public static UpdateDoublesSketch wrap(final WritableMemory srcMem) { @Override public abstract void update(double item); + /** + * Factory heapify takes a compact sketch image in Memory and instantiates an on-heap sketch. + * The resulting sketch will not retain any link to the source Memory. + * @param srcMem a compact Memory image of a sketch serialized by this sketch. + * See Memory + * @return a heap-based sketch based on the given Memory. + */ public static UpdateDoublesSketch heapify(final Memory srcMem) { return HeapUpdateDoublesSketch.heapifyInstance(srcMem); } diff --git a/src/main/java/org/apache/datasketches/quantiles/package-info.java b/src/main/java/org/apache/datasketches/quantiles/package-info.java index c44e2e316..8767757b7 100644 --- a/src/main/java/org/apache/datasketches/quantiles/package-info.java +++ b/src/main/java/org/apache/datasketches/quantiles/package-info.java @@ -18,9 +18,8 @@ */ /** - *The quantiles package contains stochastic streaming algorithms that enable single-pass + * The quantiles package contains stochastic streaming algorithms that enable single-pass * analysis of the distribution of a stream of quantiles. - *
* * @see org.apache.datasketches.quantiles.DoublesSketch * @see org.apache.datasketches.quantiles.ItemsSketch diff --git a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java index bdc3cc75c..1427f6279 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/DoublesSortedView.java @@ -38,7 +38,7 @@ public interface DoublesSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + **The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -55,7 +55,7 @@ public interface DoublesSortedView extends SortedView { *
It is not recommended to include either the minimum or maximum items of the input stream.
- * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + **Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -124,7 +124,7 @@ default double[] getCDF(double[] splitPoints, QuantileSearchCriteria searchCrit) *
It is not recommended to include either the minimum or maximum items of the input stream.
- * + * * @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -148,7 +148,7 @@ default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit * the quantile directly corresponding to the given rank. * @return the approximate quantile given the normalized rank. * @throws IllegalArgumentException if sketch is empty. - * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + * @see QuantileSearchCriteria */ double getQuantile(double rank, QuantileSearchCriteria searchCrit); @@ -165,7 +165,7 @@ default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit * @param searchCrit if INCLUSIVE the given quantile is included into the rank. * @return the normalized rank corresponding to the given quantile. * @throws IllegalArgumentException if sketch is empty. - * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + * @see QuantileSearchCriteria */ double getRank(double quantile, QuantileSearchCriteria searchCrit); @@ -173,4 +173,3 @@ default double[] getPMF(double[] splitPoints, QuantileSearchCriteria searchCrit DoublesSortedViewIterator iterator(); } - diff --git a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java index 0a0c54b5a..eec699d94 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/FloatsSortedView.java @@ -38,7 +38,7 @@ public interface FloatsSortedView extends SortedView { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + **The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -55,7 +55,7 @@ public interface FloatsSortedView extends SortedView { *
It is not recommended to include either the minimum or maximum items of the input stream.
- * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -100,7 +100,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java index e650fef9b..7b9d6d665 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericInequalitySearch.java @@ -41,7 +41,7 @@ *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -124,7 +124,7 @@ default double[] getCDF(float[] splitPoints, QuantileSearchCriteria searchCrit) * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
Given a sorted array of values arr[] and a search key value v, the algorithms for * the searching criteria are given with each enum criterion.
* - * @see + * @see * Sketching Quantiles and Ranks Tutorial * @author Lee Rhodes */ diff --git a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java index 0d27ce78c..f0dc81151 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/GenericSortedView.java @@ -47,7 +47,7 @@ public interface GenericSortedView** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -116,7 +116,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -64,7 +64,7 @@ public interface GenericSortedViewextends PartitioningFeature , SketchPar * * * It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java b/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java index 203e338d2..1edfc2054 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java @@ -63,6 +63,24 @@ public FloatsPair(final float[] quantiles, final long[] cumWeights) { } } + /** A simple structure to hold a pair of arrays */ + public static class LongsPair { + /** the array of quantiles */ + public long[] quantiles; + /** the array of associated cumulative weights */ + public long[] cumWeights; + + /** + * Constructor. + * @param quantiles the array of quantiles + * @param cumWeights the array of associated cumulative weights + */ + public LongsPair(final long[] quantiles, final long[] cumWeights) { + this.quantiles = quantiles; + this.cumWeights = cumWeights; + } + } + /** * A simple structure to hold a pair of arrays * @paramEach interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -140,7 +140,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
Given a sorted array of values arr[] and a search key value v, the algorithms for * the searching criteria are given with each enum criterion.
* - * @see + * @see * Sketching Quantiles and Ranks Tutorial * @author Lee Rhodes */ diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java new file mode 100644 index 000000000..efb4006f6 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.IncludeMinMax.LongsPair; + +/** + * The SortedView of the KllLongsSketch. + * @author Lee Rhodes + * @author Zac Blanco + */ +public final class LongsSketchSortedView implements LongsSortedView { + private final long[] quantiles; + private final long[] cumWeights; //cumulative natural weights + private final long totalN; + + /** + * Construct from elements, also used in testing. + * @param quantiles sorted array of quantiles + * @param cumWeights sorted, monotonically increasing cumulative weights. + * @param sk the underlying quantile sketch. + */ + public LongsSketchSortedView( + final long[] quantiles, + final long[] cumWeights, + final QuantilesLongsAPI sk) { + final LongsPair dPair = + IncludeMinMax.includeLongsMinMax(quantiles, cumWeights, sk.getMaxItem(), sk.getMinItem()); + this.quantiles = dPair.quantiles; + this.cumWeights = dPair.cumWeights; + this.totalN = sk.getN(); + } + + //Used for testing + LongsSketchSortedView( + final long[] quantiles, + final long[] cumWeights, + final long totalN, + final long maxItem, + final long minItem) { + final LongsPair dPair = + IncludeMinMax.includeLongsMinMax(quantiles, cumWeights, maxItem, minItem); + this.quantiles = dPair.quantiles; + this.cumWeights = dPair.cumWeights; + this.totalN = totalN; + } + + @Override + public long[] getCumulativeWeights() { + return cumWeights.clone(); + } + + @Override + public long getMaxItem() { + final int top = quantiles.length - 1; + return quantiles[top]; + } + + @Override + public long getMinItem() { + return quantiles[0]; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public int getNumRetained() { + return quantiles.length; + } + + @Override + public long getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + QuantilesUtil.checkNormalizedRankBounds(rank); + final int len = cumWeights.length; + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); + final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; + final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); + if (index == -1) { + return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + } + return quantiles[index]; + } + + @Override + public long[] getQuantiles() { + return quantiles.clone(); + } + + @Override + public double getRank(final long quantile, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final int len = quantiles.length; + final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT; + final int index = InequalitySearch.find(quantiles, 0, len - 1, quantile, crit); + if (index == -1) { + return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile + } + return (double)cumWeights[index] / totalN; + } + + @Override + public boolean isEmpty() { + return totalN == 0; + } + + @Override + public LongsSortedViewIterator iterator() { + return new LongsSortedViewIterator(quantiles, cumWeights); + } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java new file mode 100644 index 000000000..e7e3521c7 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * The Sorted View for quantile sketches of primitive type long. + * @see SortedView + * @author Lee Rhodes + * @author Zac Blanco + */ +public interface LongsSortedView extends SortedView { + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF) of the input stream + * as a monotonically increasing array of double ranks (or cumulative probabilities) on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function.
+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 overlapping intervals. + *+ *+ * @param searchCrit the desired search criteria. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + QuantilesUtil.checkLongsSplitPointsOrder(splitPoints); + final int len = splitPoints.length + 1; + final double[] buckets = new double[len]; + for (int i = 0; i < len - 1; i++) { + buckets[i] = getRank(splitPoints[i], searchCrit); + } + buckets[len - 1] = 1; + return buckets; + } + + /** + * Returns the maximum item of the stream. This may be distinct from the largest item retained by the + * sketch algorithm. + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMaxItem(); + + /** + * Returns the minimum item of the stream. This may be distinct from the smallest item retained by the + * sketch algorithm. + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMinItem(); + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * as an array of probability masses as doubles on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *The start of each interval is below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and the end of the interval + * is the rank or cumulative probability corresponding to the split point.
+ * + *The (m+1)th interval represents 100% of the distribution represented by the sketch + * and consistent with the definition of a cumulative probability distribution, thus the (m+1)th + * rank or probability in the returned array is always 1.0.
+ * + *If a split point exactly equals a retained item of the sketch and the search criterion is:
+ * + *+ *
+ * + *- INCLUSIVE, the resulting cumulative probability will include that item.
+ *- EXCLUSIVE, the resulting cumulative probability will not include the weight of that split point.
+ *It is not recommended to include either the minimum or maximum items of the input stream.
+ *
The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function.
+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 consecutive, non-overlapping intervals. + *+ *+ * @param searchCrit the desired search criteria. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + final double[] buckets = getCDF(splitPoints, searchCrit); + final int len = buckets.length; + for (int i = len; i-- > 1; ) { + buckets[i] -= buckets[i - 1]; + } + return buckets; + } + + /** + * Gets the approximate quantile of the given normalized rank and the given search criterion. + * + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ + * the quantile directly corresponding to the given rank. + * If EXCLUSIVE, he given rank includes all quantiles < + * the quantile directly corresponding to the given rank. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + * @see QuantileSearchCriteria + */ + long getQuantile(double rank, QuantileSearchCriteria searchCrit); + + /** + * Returns an array of all retained quantiles by the sketch. + * @return an array of all retained quantiles by the sketch. + */ + long[] getQuantiles(); + + /** + * Gets the normalized rank corresponding to the given a quantile. + * + * @param quantile the given quantile + * @param searchCrit if INCLUSIVE the given quantile is included into the rank. + * @return the normalized rank corresponding to the given quantile. + * @throws IllegalArgumentException if sketch is empty. + * @see QuantileSearchCriteria + */ + double getRank(long quantile, QuantileSearchCriteria searchCrit); + + @Override + LongsSortedViewIterator iterator(); + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java new file mode 100644 index 000000000..77510cd5a --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * Iterator over quantile sketches of primitive type long. + */ +public final class LongsSortedViewIterator extends SortedViewIterator { + private final long[] quantiles; + + /** + * Constructor. + * @param quantiles the given array of quantiles, which must be ordered. + * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and + * the last value must be equal to N, the total number of items updated to the sketch. + */ + public LongsSortedViewIterator(final long[] quantiles, final long[] cumWeights) { + super(cumWeights); + this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter + } + + /** + * Gets the quantile at the current index. + * + *Each interval except for the end intervals starts with a split point and ends with the next split + * point in sequence.
+ * + *The first interval starts below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and ends with the first split point
+ * + *The last (m+1)th interval starts with the last split point and ends after the last + * item retained by the sketch corresponding to a rank or probability of 1.0.
+ * + *The sum of the probability masses of all (m+1) intervals is 1.0.
+ * + *If the search criterion is:
+ * + *+ *
+ * + *- INCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will include that item. If the lower split point equals an item retained by the sketch, the interval will exclude + * that item.
+ *- EXCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will exclude that item. If the lower split point equals an item retained by the sketch, the interval will include + * that item.
+ *It is not recommended to include either the minimum or maximum items of the input stream.
+ *
Don't call this before calling next() for the first time + * or after getting false from next().
+ * + * @return the quantile at the current index. + */ + public long getQuantile() { + return quantiles[index]; + } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java index 8ea3c3415..e7b9e6ef6 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java @@ -22,7 +22,7 @@ /** * These search criteria are used by the KLL, REQ and Classic Quantiles sketches in the DataSketches library. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index b70843bb4..a082fc27a 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -20,12 +20,12 @@ package org.apache.datasketches.quantilescommon; /** - *This is a stochastic streaming sketch that enables near-real time analysis of the + * This is a stochastic streaming sketch that enables near-real time analysis of the * approximate distribution of items from a very large stream in a single pass, requiring only * that the items are comparable. * The analysis is obtained using the getQuantile() function or the * inverse functions getRank(), getPMF() (the Probability Mass Function), and getCDF() - * (the Cumulative Distribution Function).
+ * (the Cumulative Distribution Function). * *Given an input stream of N items, the natural rank of any specific * item is defined as its index (1 to N) in the hypothetical sorted stream of all @@ -194,7 +194,7 @@ *
[*] Note that obtaining epsilon may require using a similar function but with more parameters * based on the specific sketch implementation.
* - * @see + * @see * Sketching Quantiles and Ranks, Tutorial * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria * @@ -205,11 +205,22 @@ @SuppressWarnings("javadoc") public interface QuantilesAPI { + /** The sketch must not be empty for this operation. */ static String EMPTY_MSG = "The sketch must not be empty for this operation. "; + + /** Unsupported operation for this Sketch Type. */ static String UNSUPPORTED_MSG = "Unsupported operation for this Sketch Type. "; + + /** Sketch does not have just one item. */ static String NOT_SINGLE_ITEM_MSG = "Sketch does not have just one item. "; + + /** MemoryRequestServer must not be null. */ static String MEM_REQ_SVR_NULL_MSG = "MemoryRequestServer must not be null. "; + + /** Target sketch is Read Only, cannot write. */ static String TGT_IS_READ_ONLY_MSG = "Target sketch is Read Only, cannot write. "; + + /** A sketch cannot merge with itself. */ static String SELF_MERGE_MSG = "A sketch cannot merge with itself. "; /** diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index e8e5310f5..8c4f6620f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -50,7 +50,7 @@ default double[] getCDF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -113,7 +113,7 @@ default double[] getPMF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -67,7 +67,7 @@ default double[] getCDF(double[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 986780444..8b8a91bdd 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -49,7 +49,7 @@ default double[] getCDF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -137,7 +137,7 @@ default double[] getPMF(double[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -112,7 +112,7 @@ default double[] getPMF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -66,7 +66,7 @@ default double[] getCDF(float[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index 459e58cdd..bc0881282 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -53,7 +53,7 @@ default double[] getCDF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -136,7 +136,7 @@ default double[] getPMF(float[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -132,7 +132,7 @@ default double[] getPMF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.
@@ -70,7 +70,7 @@ default double[] getCDF(T[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
** @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java new file mode 100644 index 000000000..fb1ca5817 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +/** + * The Quantiles API for item type long. + * @see QuantilesAPI + * @author Lee Rhodes + * @author Zac Blanco + */ +public interface QuantilesLongsAPI extends QuantilesAPI { + + /** + * This is equivalent to {@link #getCDF(long[], QuantileSearchCriteria) getCDF(splitPoints, INCLUSIVE)} + * @param splitPoints an array of m unique, monotonically increasing items. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getCDF(long[] splitPoints) { + return getCDF(splitPoints, INCLUSIVE); + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF) of the input stream + * as a monotonically increasing array of double ranks (or cumulative probabilities) on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.
* @@ -156,7 +156,7 @@ default double[] getPMF(T[] splitPoints) { * * *It is not recommended to include either the minimum or maximum items of the input stream.
- * + *
The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function.
+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 overlapping intervals. + *+ *+ * @param searchCrit the desired search criteria. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit); + + /** + * Returns the maximum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(1.0). + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMaxItem(); + + /** + * Returns the minimum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(0.0). + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMinItem(); + + /** + * This is equivalent to {@link #getPMF(long[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} + * @param splitPoints an array of m unique, monotonically increasing items. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getPMF(long[] splitPoints) { + return getPMF(splitPoints, INCLUSIVE); + } + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * as an array of probability masses as doubles on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *The start of each interval is below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and the end of the interval + * is the rank or cumulative probability corresponding to the split point.
+ * + *The (m+1)th interval represents 100% of the distribution represented by the sketch + * and consistent with the definition of a cumulative probability distribution, thus the (m+1)th + * rank or probability in the returned array is always 1.0.
+ * + *If a split point exactly equals a retained item of the sketch and the search criterion is:
+ * + *+ *
+ * + *- INCLUSIVE, the resulting cumulative probability will include that item.
+ *- EXCLUSIVE, the resulting cumulative probability will not include the weight of that split point.
+ *It is not recommended to include either the minimum or maximum items of the input stream.
+ *
The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function.
+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 consecutive, non-overlapping intervals. + *+ *+ * @param searchCrit the desired search criteria. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getQuantile(double, QuantileSearchCriteria) getQuantile(rank, INCLUSIVE)} + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + */ + default long getQuantile(double rank) { + return getQuantile(rank, INCLUSIVE); + } + + /** + * Gets the approximate quantile of the given normalized rank and the given search criterion. + * + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ + * the quantile directly corresponding to the given rank. + * If EXCLUSIVE, he given rank includes all quantiles < + * the quantile directly corresponding to the given rank. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + long getQuantile(double rank, QuantileSearchCriteria searchCrit); + + /** + * Gets the lower bound of the quantile confidence interval in which the quantile of the + * given rank exists. + * + *Each interval except for the end intervals starts with a split point and ends with the next split + * point in sequence.
+ * + *The first interval starts below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and ends with the first split point
+ * + *The last (m+1)th interval starts with the last split point and ends after the last + * item retained by the sketch corresponding to a rank or probability of 1.0.
+ * + *The sum of the probability masses of all (m+1) intervals is 1.0.
+ * + *If the search criterion is:
+ * + *+ *
+ * + *- INCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will include that item. If the lower split point equals an item retained by the sketch, the interval will exclude + * that item.
+ *- EXCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will exclude that item. If the lower split point equals an item retained by the sketch, the interval will include + * that item.
+ *It is not recommended to include either the minimum or maximum items of the input stream.
+ *
Although it is possible to estimate the probability that the true quantile + * exists within the quantile confidence interval specified by the upper and lower quantile bounds, + * it is not possible to guarantee the width of the quantile confidence interval + * as an additive or multiplicative percent of the true quantile.
+ * + * @param rank the given normalized rank + * @return the lower bound of the quantile confidence interval in which the quantile of the + * given rank exists. + * @throws IllegalArgumentException if sketch is empty. + */ + long getQuantileLowerBound(double rank); + + /** + * Gets the upper bound of the quantile confidence interval in which the true quantile of the + * given rank exists. + * + *Although it is possible to estimate the probability that the true quantile + * exists within the quantile confidence interval specified by the upper and lower quantile bounds, + * it is not possible to guarantee the width of the quantile interval + * as an additive or multiplicative percent of the true quantile.
+ * + * @param rank the given normalized rank + * @return the upper bound of the quantile confidence interval in which the true quantile of the + * given rank exists. + * @throws IllegalArgumentException if sketch is empty. + */ + long getQuantileUpperBound(double rank); + + /** + * This is equivalent to {@link #getQuantiles(double[], QuantileSearchCriteria) getQuantiles(ranks, INCLUSIVE)} + * @param ranks the given array of normalized ranks, each of which must be + * in the interval [0.0,1.0]. + * @return an array of quantiles corresponding to the given array of normalized ranks. + * @throws IllegalArgumentException if sketch is empty. + */ + default long[] getQuantiles(double[] ranks) { + return getQuantiles(ranks, INCLUSIVE); + } + + /** + * Gets an array of quantiles from the given array of normalized ranks. + * + * @param ranks the given array of normalized ranks, each of which must be + * in the interval [0.0,1.0]. + * @param searchCrit if INCLUSIVE, the given ranks include all quantiles ≤ + * the quantile directly corresponding to each rank. + * @return an array of quantiles corresponding to the given array of normalized ranks. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + long[] getQuantiles(double[] ranks, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getRank(long, QuantileSearchCriteria) getRank(quantile, INCLUSIVE)} + * @param quantile the given quantile + * @return the normalized rank corresponding to the given quantile + * @throws IllegalArgumentException if sketch is empty. + */ + default double getRank(long quantile) { + return getRank(quantile, INCLUSIVE); + } + + /** + * Gets the normalized rank corresponding to the given a quantile. + * + * @param quantile the given quantile + * @param searchCrit if INCLUSIVE the given quantile is included into the rank. + * @return the normalized rank corresponding to the given quantile + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + double getRank(long quantile, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getRanks(long[], QuantileSearchCriteria) getRanks(quantiles, INCLUSIVE)} + * @param quantiles the given array of quantiles + * @return an array of normalized ranks corresponding to the given array of quantiles. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getRanks(long[] quantiles) { + return getRanks(quantiles, INCLUSIVE); + } + + /** + * Gets an array of normalized ranks corresponding to the given array of quantiles and the given + * search criterion. + * + * @param quantiles the given array of quantiles + * @param searchCrit if INCLUSIVE, the given quantiles include the rank directly corresponding to each quantile. + * @return an array of normalized ranks corresponding to the given array of quantiles. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + double[] getRanks(long[] quantiles, QuantileSearchCriteria searchCrit); + + /** + * Returns the current number of bytes this Sketch would require if serialized. + * @return the number of bytes this sketch would require if serialized. + */ + int getSerializedSizeBytes(); + + /** + * Gets the sorted view of this sketch + * @return the sorted view of this sketch + */ + LongsSortedView getSortedView(); + + /** + * Gets the iterator for this sketch, which is not sorted. + * @return the iterator for this sketch + */ + QuantilesLongsSketchIterator iterator(); + + /** + * Returns a byte array representation of this sketch. + * @return a byte array representation of this sketch. + */ + byte[] toByteArray(); + + /** + * Updates this sketch with the given item. + * @param item from a stream of items. NaNs are ignored. + */ + void update(long item); + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java new file mode 100644 index 000000000..7ed0d9805 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * The quantiles sketch iterator for primitive type long. + * @see QuantilesSketchIterator + * @author Zac Blanco + */ +public interface QuantilesLongsSketchIterator extends QuantilesSketchIterator { + + /** + * Gets the long quantile at the current index. + * + *Don't call this before calling next() for the first time + * or after getting false from next().
+ * + * @return the long quantile at the current index. + */ + long getQuantile(); + +} + diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java index 75798c20f..529fd386d 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java @@ -67,6 +67,21 @@ public static final void checkDoublesSplitPointsOrder(final double[] values) { } } + /** + * Checks the sequential validity of the given array of double values. + * They must be unique and monotonically increasing. + * @param values the given array of double values + */ + public static void checkLongsSplitPointsOrder(final long[] values) { + Objects.requireNonNull(values); + final int len = values.length; + for (int j = 0; j < len - 1; j++) { + if (values[j] < values[j + 1]) { continue; } + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing."); + } + } + /** * Checks the sequential validity of the given array of float values. * They must be unique, monotonically increasing and not NaN. diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index e587cd633..feeba7739 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -28,7 +28,7 @@ * This abstract class provides a single place to define and document the public API * for the Relative Error Quantiles Sketch. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes @@ -89,11 +89,23 @@ public static double getRSE(final int k, final double rank, final boolean hra, f @Override public abstract float getQuantileLowerBound(double rank); + /** + * Gets an approximate lower bound of the quantile associated with the given rank. + * @param rank the given normalized rank, a number between 0 and 1.0. + * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. + * @return an approximate lower bound quantile, if it exists. + */ public abstract float getQuantileLowerBound(double rank, int numStdDev); @Override public abstract float getQuantileUpperBound(double rank); + /** + * Gets an approximate upper bound of the quantile associated with the given rank. + * @param rank the given normalized rank, a number between 0 and 1.0. + * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. + * @return an approximate upper bound quantile, if it exists. + */ public abstract float getQuantileUpperBound(double rank, int numStdDev); @Override @@ -101,7 +113,7 @@ public static double getRSE(final int k, final double rank, final boolean hra, f /** * Gets an approximate lower bound rank of the given normalized rank. - * @param rank the given rank, a number between 0 and 1.0. + * @param rank the given normalized rank, a number between 0 and 1.0. * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. * @return an approximate lower bound rank. */ @@ -160,6 +172,7 @@ public boolean isReadOnly() { /** * {@inheritDoc} + * *The parameters k, highRankAccuracy, and reqDebug will not change.
*/ @Override diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index 52b1371a9..952749deb 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -110,7 +110,8 @@ * 0 || (empty)| 0 | K | Flags |FamID=17| SerVer | PreInts = 2 | * *- ** - *Flags:
+ * + * Flags: * Bit 0 : Endianness, reserved * Bit 1 : ReadOnly, reserved * Bit 2 : Empty diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java index c3ef33957..b58317a9a 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java @@ -132,10 +132,9 @@ public static ReservoirLongsUnion heapify(final Memory srcMem) { /** * Union the given sketch. - *- * This method can be repeatedly called. If the given sketch is null it is interpreted as an empty - * sketch. - *
+ * + *This method can be repeatedly called. If the given sketch is null it is interpreted as an empty + * sketch.
* * @param sketchIn The incoming sketch. */ diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java index e6f361955..e12d31aa9 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java @@ -25,6 +25,7 @@ /** * This class provides a compact representation of reservoir size by encoding it into a * fixed-point 16-bit value. + * *The value itself is a fractional power of 2, with 5 bits of exponent and 11 bits of * mantissa. The exponent allows a choice of anywhere from 0-30, and there are 2048 possible * reservoir size values within each octave. Because reservoir size must be an integer, this diff --git a/src/main/java/org/apache/datasketches/sampling/package-info.java b/src/main/java/org/apache/datasketches/sampling/package-info.java index edfaa20a8..bbe446914 100644 --- a/src/main/java/org/apache/datasketches/sampling/package-info.java +++ b/src/main/java/org/apache/datasketches/sampling/package-info.java @@ -18,8 +18,8 @@ */ /** - *
This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of - * weighted and unweighted items from a stream.
+ * This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of + * weighted and unweighted items from a stream. * *These sketches are mergeable and can be serialized and deserialized to/from a compact * form.
diff --git a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java index 1e3408511..951bd7244 100644 --- a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java +++ b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java @@ -32,6 +32,7 @@ import org.apache.datasketches.memory.WritableBuffer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.QuantilesAPI; +import org.apache.datasketches.quantilescommon.QuantilesUtil; /** * t-Digest for estimating quantiles and ranks. @@ -125,7 +126,7 @@ public void merge(final TDigestDouble other) { /** * Process buffered values and merge centroids if needed */ - public void compress() { + private void compress() { if (numBuffered_ == 0) { return; } final int num = numBuffered_ + numCentroids_; final double[] values = new double[num]; @@ -277,6 +278,51 @@ public double getQuantile(final double rank) { return weightedAverage(centroidWeights_[numCentroids_ - 1], w1, maxValue_, w2); } + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of split points. + * + * @param splitPoints an array of m unique, monotonically increasing values + * that divide the input domain into m+1 consecutive disjoint intervals (bins). + * + * @return an array of m+1 doubles each of which is an approximation + * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getPMF(final double[] splitPoints) { + final double[] buckets = getCDF(splitPoints); + for (int i = buckets.length; i-- > 1; ) { + buckets[i] -= buckets[i - 1]; + } + return buckets; + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF), which is the + * cumulative analog of the PMF, of the input stream given a set of split points. + * + * @param splitPoints an array of m unique, monotonically increasing values + * that divide the input domain into m+1 consecutive disjoint intervals. + * + * @return an array of m+1 doubles, which are a consecutive approximation to the CDF + * of the input stream given the splitPoints. The value at array position j of the returned + * CDF array is the sum of the returned values in positions 0 through j of the returned PMF + * array. This can be viewed as array of ranks of the given split points plus one more value + * that is always 1. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getCDF(final double[] splitPoints) { + if (isEmpty()) { throw new SketchesStateException(QuantilesAPI.EMPTY_MSG); } + QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints); + final int len = splitPoints.length + 1; + final double[] ranks = new double[len]; + for (int i = 0; i < len - 1; i++) { + ranks[i] = getRank(splitPoints[i]); + } + ranks[len - 1] = 1.0; + return ranks; + } + /** * Computes size needed to serialize the current state. * @return size in bytes needed to serialize this tdigest diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java index d2161c995..e7b2c99eb 100644 --- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java @@ -26,7 +26,6 @@ import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -148,11 +147,6 @@ int getRetainedEntries() { return curCount_; } - @Override - public boolean isSameResource(final Memory that) { - return false; - } - //restricted private static long[] getHashArrA(final Sketch skA) { //returns a new array diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java index 741db2f72..e7b3ddaac 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java @@ -94,7 +94,7 @@ private static int computeLogBufferSize(final int lgNomLongs, final long exactSi * @param hash to be propagated */ private boolean propagateToSharedSketch(final long hash) { - //noinspection StatementWithEmptyBody + //no inspection StatementWithEmptyBody while (localPropagationInProgress.get()) { } //busy wait until previous propagation completed localPropagationInProgress.set(true); @@ -108,7 +108,7 @@ private boolean propagateToSharedSketch(final long hash) { * Propagates the content of the buffer as a sketch to the shared sketch */ private void propagateToSharedSketch() { - //noinspection StatementWithEmptyBody + //no inspection StatementWithEmptyBody while (localPropagationInProgress.get()) { } //busy wait until previous propagation completed diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java index 49a8140c3..cdc843f8b 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java @@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.memory.WritableMemory; /** @@ -30,7 +31,7 @@ * * @author eshcar */ -interface ConcurrentSharedThetaSketch { +interface ConcurrentSharedThetaSketch extends MemoryStatus { long NOT_SINGLE_HASH = -1L; double MIN_ERROR = 0.0000001; @@ -63,8 +64,8 @@ static long computeExactLimit(long k, double error) { void endPropagation(AtomicBoolean localPropagationInProgress, boolean isEager); /** - * Returns the value of the volatile theta manged by the shared sketch - * @return the value of the volatile theta manged by the shared sketch + * Returns the value of the volatile theta managed by the shared sketch + * @return the value of the volatile theta managed by the shared sketch */ long getVolatileTheta(); @@ -124,10 +125,10 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s // //For the external user all of the below methods can be obtained by casting the shared //sketch to UpdateSketch. However, these methods here also act as an alias so that an - //attempt to access these methods from the local buffer will be divered to the shared + //attempt to access these methods from the local buffer will be diverted to the shared //sketch. - //From Sketch + //From Sketch and MemoryStatus int getCompactBytes(); @@ -139,10 +140,6 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s double getUpperBound(int numStdDev); - boolean hasMemory(); - - boolean isDirect(); - boolean isEmpty(); boolean isEstimationMode(); diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java index de0e6e43b..0f69ec3c2 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java @@ -109,12 +109,12 @@ public long getThetaLong() { @Override public boolean hasMemory() { - return true; + return mem_ != null; } @Override public boolean isDirect() { - return mem_.isDirect(); + return hasMemory() ? mem_.isDirect() : false; } @Override @@ -132,7 +132,7 @@ public boolean isOrdered() { @Override public boolean isSameResource(final Memory that) { - return mem_.isSameResource(that); + return hasMemory() ? mem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index a1ac53c6d..ad9051a08 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -323,6 +323,10 @@ UpdateReturnState hashUpdate(final long hash) { memReqSvr_ = (memReqSvr_ == null) ? wmem_.getMemoryRequestServer() : memReqSvr_; + if (memReqSvr_ == null) { //in case the MRS is not enabled or null. + throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand."); + } + final WritableMemory newDstMem = memReqSvr_.request(wmem_,reqBytes); moveAndResize(wmem_, preambleLongs, lgArrLongs, newDstMem, tgtLgArrLongs, thetaLong); diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index a3ffebc14..fb2aed2a5 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -144,12 +144,12 @@ public long getThetaLong() { @Override public boolean hasMemory() { - return true; + return wmem_ != null; } @Override public boolean isDirect() { - return wmem_.isDirect(); + return hasMemory() ? wmem_.isDirect() : false; } @Override @@ -159,7 +159,7 @@ public boolean isEmpty() { @Override public boolean isSameResource(final Memory that) { - return wmem_.isSameResource(that); + return hasMemory() ? wmem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java index ae481a425..8f6e4972a 100644 --- a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java @@ -91,16 +91,6 @@ public long getThetaLong() { return Long.MAX_VALUE; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return true; diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java index 479aa3eeb..f394e9303 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java @@ -102,16 +102,6 @@ public long getThetaLong() { return thetaLong_; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return empty_; diff --git a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java index 1cc6d75cd..49734a9e8 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java @@ -66,16 +66,6 @@ public int getCurrentBytes() { return (preLongs + dataLongs) << 3; } - @Override - public boolean isDirect() { - return false; - } - - @Override - public boolean hasMemory() { - return false; - } - //UpdateSketch @Override diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java index 509ec2f93..fc81d1124 100644 --- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java @@ -336,14 +336,24 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds dstMem, compactCache); } + @Override + public boolean hasMemory() { + return wmem_ != null; + } + @Override public boolean hasResult() { - return wmem_ != null ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + return hasMemory() ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + } + + @Override + public boolean isDirect() { + return hasMemory() ? wmem_.isDirect() : false; } @Override public boolean isSameResource(final Memory that) { - return wmem_ != null ? wmem_.isSameResource(that) : false; + return hasMemory() ? wmem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index 562be982c..e1d9262e6 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -37,6 +37,7 @@ /** * This class defines the preamble data structure and provides basic utilities for some of the key * fields. + * *The intent of the design of this class was to isolate the detailed knowledge of the bit and * byte layout of the serialized form of the sketches derived from the Sketch class into one place. * This allows the possibility of the introduction of different serialization @@ -126,7 +127,7 @@ * 3 ||----------------------Start of Hash Table of longs---------------------------------| *
Union objects require 32 bytes of preamble plus a non-compact array of longs representing a + *
Union objects require 32 bytes of preamble plus a non-compact array of longs representing a * hash table.
* *diff --git a/src/main/java/org/apache/datasketches/theta/Rebuilder.java b/src/main/java/org/apache/datasketches/theta/Rebuilder.java index 07093f652..b6e3de342 100644 --- a/src/main/java/org/apache/datasketches/theta/Rebuilder.java +++ b/src/main/java/org/apache/datasketches/theta/Rebuilder.java @@ -84,7 +84,7 @@ static final void quickSelectAndRebuild(final WritableMemory mem, final int prea } /** - * Moves me (the entire sketch) to a new larger Memory location and rebuilds the hash table. + * Moves me (the entire updatable sketch) to a new larger Memory location and rebuilds the hash table. * This assumes a Memory preamble of standard form with the correct value of thetaLong. * Afterwards, the caller must update the local Memory reference, lgArrLongs * and hashTableThreshold from the dstMemory and free the source Memory. diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java index 4d8ebf0c4..b89ca9703 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperation.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java @@ -25,6 +25,7 @@ import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -35,7 +36,7 @@ * * @author Lee Rhodes */ -public abstract class SetOperation { +public abstract class SetOperation implements MemoryStatus { static final int CONST_PREAMBLE_LONGS = 3; SetOperation() {} @@ -237,20 +238,6 @@ public static int getMaxAnotBResultBytes(final int nomEntries) { */ public abstract Family getFamily(); - /** - * Returns true if the backing resource of this is identical with the backing resource - * of that. The capacities must be the same. If this is a region, - * the region offset must also be the same. - * - *Note: Only certain set operators during stateful operations can be serialized. - * Only when they are stored into Memory will this be relevant.
- * - * @param that A different non-null object - * @return true if the backing resource of this is the same as the backing resource - * of that. - */ - public abstract boolean isSameResource(Memory that); - //restricted /** diff --git a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java index d788418dc..4a35cf67d 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java @@ -66,7 +66,7 @@ public SetOperationBuilder() { * Sets the Maximum Nominal Entries (max K) for this set operation. The effective value of K of the result of a * Set Operation can be less than max K, but never greater. * The minimum value is 16 and the maximum value is 67,108,864, which is 2^26. - * @param nomEntries Nominal Entres + * @param nomEntries Nominal Entries * This will become the ceiling power of 2 if it is not a power of 2. * @return this SetOperationBuilder */ diff --git a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java index a4bac21c9..3cfc13b1e 100644 --- a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java @@ -343,16 +343,6 @@ public double getUpperBound(final int numStdDev) { return 1.0; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return false; diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index cc1fd4d23..89618bc23 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -32,6 +32,7 @@ import static org.apache.datasketches.thetacommon.HashOperations.count; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -44,7 +45,7 @@ * * @author Lee Rhodes */ -public abstract class Sketch { +public abstract class Sketch implements MemoryStatus { static final int DEFAULT_LG_RESIZE_FACTOR = 3; //Unique to Heap Sketch() {} @@ -292,14 +293,11 @@ public double getLowerBound(final int numStdDev) { /** * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. Note that this assumes the worse case of the sketch in - * estimation mode, which requires storing theta and count. - * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of entries. - * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + * of retained entries. */ - @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { if (numberOfEntries == 0) { return 8; } if (numberOfEntries == 1) { return 16; } @@ -311,11 +309,11 @@ public static int getMaxCompactSketchBytes(final int numberOfEntries) { * log_base2 of the number of nominal entries, which is a power of 2. * @param lgNomEntries Nominal Entries * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. */ public static int getCompactSketchMaxBytes(final int lgNomEntries) { - return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD) - + Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES; + return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD + + Family.QUICKSELECT.getMaxPreLongs()) * Long.BYTES; } /** @@ -386,26 +384,12 @@ public double getUpperBound(final int numStdDev) { : getRetainedEntries(true); } - /** - * Returns true if this sketch's data structure is backed by Memory or WritableMemory. - * @return true if this sketch's data structure is backed by Memory or WritableMemory. - */ - public abstract boolean hasMemory(); - /** * Returns true if this sketch is in compact form. * @return true if this sketch is in compact form. */ public abstract boolean isCompact(); - /** - * Returns true if the this sketch's internal data structure is backed by direct (off-heap) - * Memory. - * @return true if the this sketch's internal data structure is backed by direct (off-heap) - * Memory. - */ - public abstract boolean isDirect(); - /** * See Empty * @return true if empty. @@ -427,18 +411,6 @@ public boolean isEstimationMode() { */ public abstract boolean isOrdered(); - /** - * Returns true if the backing resource of this is identical with the backing resource - * of that. The capacities must be the same. If this is a region, - * the region offset must also be the same. - * @param that A different non-null object - * @return true if the backing resource of this is the same as the backing resource - * of that. - */ - public boolean isSameResource(final Memory that) { - return false; - } - /** * Returns a HashIterator that can be used to iterate over the retained hash values of the * Theta sketch. @@ -605,8 +577,8 @@ public static String toString(final Memory mem) { abstract int getCurrentPreambleLongs(); /** - * Returns the Memory object if it exists, otherwise null. - * @return the Memory object if it exists, otherwise null. + * Returns the backing Memory object if it exists, otherwise null. + * @return the backing Memory object if it exists, otherwise null. */ abstract Memory getMemory(); diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index 4b1461876..2e7fa0915 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -80,29 +80,25 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) { /** * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. Note that this assumes the worse case of the sketch in - * estimation mode, which requires storing theta and count. - * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of entries. - * @see Sketch#getMaxCompactSketchBytes(int) - * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + * of retained entries. */ - @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { return Sketch.getMaxCompactSketchBytes(numberOfEntries); } /** * Returns the maximum number of storage bytes required for a CompactSketch given the configured - * number of nominal entries (power of 2). - * @param nomEntries Nominal Entries + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries Nominal Entries * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. * @see Sketch#getCompactSketchMaxBytes(int) */ - public static int getCompactSketchMaxBytes(final int nomEntries) { - return Sketch.getCompactSketchMaxBytes(nomEntries); + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return Sketch.getCompactSketchMaxBytes(lgNomEntries); } /** diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 3ea5ca557..bac05de74 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -262,10 +262,22 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstMem, compactCacheOut); } + @Override + public boolean hasMemory() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.hasMemory() : false; + } + + @Override + public boolean isDirect() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.isDirect() : false; + } + @Override public boolean isSameResource(final Memory that) { return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.getMemory().isSameResource(that) : false; + ? gadget_.isSameResource(that) : false; } @Override @@ -321,7 +333,7 @@ public void union(final Sketch sketchIn) { if (sketchIn.isOrdered() && (sketchIn instanceof CompactSketch)) { //Use early stop //Ordered, thus compact if (sketchIn.hasMemory()) { - final Memory skMem = ((CompactSketch) sketchIn).getMemory(); + final Memory skMem = sketchIn.getMemory(); final int preambleLongs = skMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; for (int i = 0; i < curCountIn; i++ ) { final int offsetBytes = preambleLongs + i << 3; diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java index 882c5e2e9..cb6854b02 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java @@ -343,6 +343,7 @@ public UpdateReturnState update(final long[] data) { /** * All potential updates converge here. + * *Don't ever call this unless you really know what you are doing!
* * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. diff --git a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java index 20dd6ee7d..d9fda48bb 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java +++ b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java @@ -34,8 +34,11 @@ public class SetOperationCornerCases { /** Intersection actions */ public enum IntersectAction { + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Empty{1.0, 0, T */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Intersect */ FULL_INTERSECT("I", "Full Intersect"); private String actionId; @@ -46,10 +49,18 @@ private IntersectAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the Action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the Action Description + * @return the actionDescription + */ public String getActionDescription() { return actionDescription; } @@ -57,11 +68,17 @@ public String getActionDescription() { /** A not B actions */ public enum AnotbAction { + /** Sketch A Exact */ SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Empty{1.0, 0, T} */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full AnotB */ FULL_ANOTB("N", "Full AnotB"); private String actionId; @@ -72,24 +89,42 @@ private AnotbAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the Action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the action description + * @return the action description + */ public String getActionDescription() { return actionDescription; } } + /** List of union actions */ public enum UnionAction { + /** Sketch A Exactly */ SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Sketch B Exactly */ SKETCH_B("B", "Sketch B Exactly"), + /** Trim Sketch B by MinTheta */ TRIM_B("TB", "Trim Sketch B by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Degenerate{ThetaB, 0, F} */ DEGEN_THB_0_F("DB", "Degenerate{ThetaB, 0, F}"), + /** Empty{1.0, 0, T} */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Union */ FULL_UNION("N", "Full Union"); private String actionId; @@ -100,49 +135,74 @@ private UnionAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the action description + * @return the actionDescription + */ public String getActionDescription() { return actionDescription; } } + /** List of corner cases */ public enum CornerCase { + /** Empty Empty */ Empty_Empty(055, "A{ 1.0, 0, T} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.EMPTY_1_0_T), + /** Empty Exact */ Empty_Exact(056, "A{ 1.0, 0, T} ; B{ 1.0,>0, F}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Estimation */ Empty_Estimation(052, "A{ 1.0, 0, T} ; B{<1.0,>0, F", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Degen */ Empty_Degen(050, "A{ 1.0, 0, T} ; B{<1.0, 0, F}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.DEGEN_THB_0_F), + /** Exact Empty */ Exact_Empty(065, "A{ 1.0,>0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Exact Exact */ Exact_Exact(066, "A{ 1.0,>0, F} ; B{ 1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Estimation */ Exact_Estimation(062, "A{ 1.0,>0, F} ; B{<1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Degen */ Exact_Degen(060, "A{ 1.0,>0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + /** Estimation_Empty */ Estimation_Empty(025, "A{<1.0,>0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Estimation_Exact */ Estimation_Exact(026, "A{<1.0,>0, F} ; B{ 1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Estimation */ Estimation_Estimation(022, "A{<1.0,>0, F} ; B{<1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Degen */ Estimation_Degen(020, "A{<1.0,>0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + /** Degen_Empty */ Degen_Empty(005, "A{<1.0, 0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.DEGEN_THA_0_F, UnionAction.DEGEN_THA_0_F), + /** Degen_Exact */ Degen_Exact(006, "A{<1.0, 0, F} ; B{ 1.0,>0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_THA_0_F, UnionAction.TRIM_B), + /** Degen_Estimation */ Degen_Estimation(002, "A{<1.0, 0, F} ; B{<1.0,>0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.TRIM_B), + /** Degen_Degen */ Degen_Degen(000, "A{<1.0, 0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.DEGEN_MIN_0_F); @@ -168,27 +228,52 @@ private CornerCase(final int caseId, final String caseDescription, this.unionAction = unionAction; } + /** + * Gets the case ID + * @return the caseId + */ public int getId() { return caseId; } + /** + * Gets the case description + * @return the caseDescription + */ public String getCaseDescription() { return caseDescription; } + /** + * Gets the intersect action + * @return the intersectAction + */ public IntersectAction getIntersectAction() { return intersectAction; } + /** + * Gets the AnotB action + * @return the anotbAction + */ public AnotbAction getAnotbAction() { return anotbAction; } + /** + * Gets the union action + * @return the unionAction + */ public UnionAction getUnionAction() { return unionAction; } //See checkById test in /tuple/MiscTest. + /** + * Converts caseId to CornerCaseId + * @param id the case ID + * @return the Corner Case ID + */ public static CornerCase caseIdToCornerCase(final int id) { final CornerCase cc = caseIdToCornerCaseMap.get(id); if (cc == null) { @@ -198,12 +283,29 @@ public static CornerCase caseIdToCornerCase(final int id) { } } //end of enum CornerCase + /** + * Creates the CornerCase ID + * @param thetaLongA the theta of A as a long + * @param countA the count of A + * @param emptyA true if A is empty + * @param thetaLongB the theta of B as a long + * @param countB the count of B + * @param emptyB true if B is empty + * @return the Corner Case ID + */ public static int createCornerCaseId( final long thetaLongA, final int countA, final boolean emptyA, final long thetaLongB, final int countB, final boolean emptyB) { return (sketchStateId(emptyA, countA, thetaLongA) << 3) | sketchStateId(emptyB, countB, thetaLongB); } + /** + * Returns the sketch state ID + * @param isEmpty true if empty + * @param numRetained the number of items retained + * @param thetaLong the value of theta as a long + * @return the sketch state ID + */ public static int sketchStateId(final boolean isEmpty, final int numRetained, final long thetaLong) { // assume thetaLong = MAX if empty return (((thetaLong == MAX) || isEmpty) ? 4 : 0) | ((numRetained > 0) ? 2 : 0) | (isEmpty ? 1 : 0); diff --git a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java index 44d1d9cc0..9b0ca33cb 100644 --- a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java @@ -32,8 +32,17 @@ public final class SerializerDeserializer { * Defines the sketch classes that this SerializerDeserializer can handle. */ @SuppressWarnings("javadoc") - public static enum SketchType { QuickSelectSketch, CompactSketch, ArrayOfDoublesQuickSelectSketch, - ArrayOfDoublesCompactSketch, ArrayOfDoublesUnion } + public static enum SketchType { + /** QuickSelectSketch */ + QuickSelectSketch, + /** CompactSketch */ + CompactSketch, + /** ArrayOfDoublesQuickSelectSketch */ + ArrayOfDoublesQuickSelectSketch, + /** ArrayOfDoublesCompactSketch */ + ArrayOfDoublesCompactSketch, + /** ArrayOfDoublesUnion */ + ArrayOfDoublesUnion } static final int TYPE_BYTE_OFFSET = 3; diff --git a/src/main/java/org/apache/datasketches/tuple/Union.java b/src/main/java/org/apache/datasketches/tuple/Union.java index 653312fa0..acefa2ab5 100644 --- a/src/main/java/org/apache/datasketches/tuple/Union.java +++ b/src/main/java/org/apache/datasketches/tuple/Union.java @@ -100,8 +100,7 @@ public CompactSketchunion(final SketchtupleSketch, /** * Performs a stateful union of the internal set with the given tupleSketch. * @param tupleSketch input tuple sketch to merge with the internal set. - * - *Nulls and empty sketches are ignored.
+ * Nulls and empty sketches are ignored. */ public void union(final SketchtupleSketch) { if (tupleSketch == null || tupleSketch.isEmpty()) { return; } diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java index e7abae0d4..a54c11afc 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java @@ -41,24 +41,28 @@ public enum Mode { /** * The aggregation mode is the summation function. + * *New retained value = previous retained value + incoming value
*/ Sum, /** * The aggregation mode is the minimum function. + * *New retained value = min(previous retained value, incoming value)
*/ Min, /** * The aggregation mode is the maximum function. + * *New retained value = max(previous retained value, incoming value)
*/ Max, /** * The aggregation mode is always one. + * *New retained value = 1.0
*/ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java index 4c04fa2c2..72695355e 100644 --- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java @@ -41,24 +41,28 @@ public enum Mode { /** * The aggregation mode is the summation function. + * *New retained value = previous retained value + incoming value
*/ Sum, /** * The aggregation mode is the minimum function. + * *New retained value = min(previous retained value, incoming value)
*/ Min, /** * The aggregation mode is the maximum function. + * *New retained value = max(previous retained value, incoming value)
*/ Max, /** * The aggregation mode is always one. + * *New retained value = 1
*/ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index 91d4eade4..52f827149 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -30,6 +30,7 @@ /** * Direct Compact Sketch of type ArrayOfDoubles. + * *This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java index 3dd019d74..7c1b1bf07 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java @@ -23,6 +23,7 @@ /** * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. + * *This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
@@ -43,7 +44,7 @@ final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection } @Override - protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, + protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 1b4e86904..ae1aa3dc0 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -33,6 +33,7 @@ /** * Direct QuickSelect tuple sketch of type ArrayOfDoubles. + * *This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java index 15503fc04..dcdab1313 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java @@ -23,6 +23,7 @@ /** * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). + * *This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java index 00310f534..734019632 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java @@ -27,6 +27,7 @@ /** * Direct Union operation for tuple sketches of type ArrayOfDoubles. + * *This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.
diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java new file mode 100644 index 000000000..12c6f20df --- /dev/null +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.filters.bloomfilter; + +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; +import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.cppPath; +import static org.apache.datasketches.common.TestUtil.javaPath; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.io.IOException; +import java.nio.file.Files; + +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +/** + * Serialize binary sketches to be tested by C++ code. + * Test deserialization of binary sketches serialized by C++ code. + */ +public class BloomFilterCrossLanguageTest { + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generatBloomFilterBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {0, 10_000, 2_000_000, 300_000_00}; + final short[] hArr = {3, 5}; + for (int n : nArr) { + for (short numHashes : hArr) { + final long configBits = Math.max(n, 1000L); // so empty still has valid bit size + BloomFilter bf = BloomFilterBuilder.createBySize(configBits, numHashes); + for (int i = 0; i < n / 10; ++i) { + bf.update(i); + } + if (n > 0) { bf.update(Float.NaN); } + assertEquals(bf.isEmpty(), n == 0); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > (n / 10))); + Files.newOutputStream(javaPath.resolve("bf_n" + n + "_h" + numHashes + "_java.sk")).write(bf.toByteArray()); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void readBloomFilterBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {0, 10_000, 2_000_000, 300_000_00}; + final short[] hArr = {3, 5}; + for (int n : nArr) { + for (short numHashes : hArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("bf_n" + n + "_h" + numHashes + "_cpp.sk")); + final BloomFilter bf = BloomFilter.heapify(Memory.wrap(bytes)); + assertEquals(bf.isEmpty(), n == 0); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > (n / 10))); + + for (int i = 0; i < n / 10; ++i) { + assertTrue(bf.query(i)); + } + if (n > 0) { + assert(bf.query(Double.NaN)); + } + } + } + } +} diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java index dedaf9db3..25bef3643 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java @@ -24,14 +24,18 @@ import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + public class BloomFilterTest { @Test @@ -51,8 +55,8 @@ public void createNewFilterTest() throws Exception { assertFalse(bf1.isDirect()); assertFalse(bf1.isReadOnly()); - try (WritableHandle wh = WritableMemory.allocateDirect(sizeBytes)) { - final WritableMemory wmem = wh.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(sizeBytes)).scope()) { final BloomFilter bf2 = new BloomFilter(numBits, numHashes, seed, wmem); assertTrue(bf2.isEmpty()); assertTrue(bf2.hasMemory()); diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java index 521019e62..bbedd2fb7 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java @@ -142,8 +142,8 @@ public void bitAddressOutOfBoundsNonEmptyTest() { final Memory mem = bitArrayToMemory(hba); DirectBitArrayR dba = DirectBitArrayR.wrap(mem, hba.isEmpty()); - assertThrows(AssertionError.class, () -> dba.getBit(-10)); - assertThrows(AssertionError.class, () -> dba.getBit(2048)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(-10)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(2048)); } @Test diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java index a45bcbb82..8327a0d5e 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java @@ -139,7 +139,7 @@ public void basicWritableWrapTest() { @Test public void countWritableWrappedBitsWhenDirty() { // like basicOperationTest but with setBit which does - // not neecssarily track numBitsSet_ + // not necessarily track numBitsSet_ final HeapBitArray hba = new HeapBitArray(128); assertFalse(hba.getAndSetBit(1)); assertFalse(hba.getAndSetBit(2)); @@ -172,12 +172,12 @@ public void bitAddresOutOfBoundsNonEmptyTest() { dba.getAndSetBit(i); } - assertThrows(AssertionError.class, () -> dba.getBit(-10)); - assertThrows(AssertionError.class, () -> dba.getBit(2048)); - assertThrows(AssertionError.class, () -> dba.setBit(-20)); - assertThrows(AssertionError.class, () -> dba.setBit(4096)); - assertThrows(AssertionError.class, () -> dba.getAndSetBit(-30)); - assertThrows(AssertionError.class, () -> dba.getAndSetBit(8192)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(-10)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(2048)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.setBit(-20)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.setBit(4096)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getAndSetBit(-30)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getAndSetBit(8192)); } @Test diff --git a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java b/src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java similarity index 82% rename from src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java rename to src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java index 23f369e63..8699a091a 100644 --- a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java +++ b/src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java @@ -28,13 +28,13 @@ import org.testng.annotations.Test; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MurmurHash3v2; +import org.apache.datasketches.memory.internal.MurmurHash3v3; import org.apache.datasketches.memory.WritableMemory; /** * @author Lee Rhodes */ -public class MurmurHash3v2Test { +public class MurmurHash3v3Test { private Random rand = new Random(); private static final int trials = 1 << 20; @@ -154,33 +154,33 @@ private static final long[] hashV1(byte[] key, long seed) { } private static final long[] hashV2(long[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } private static final long[] hashV2(int[] key2, long seed) { - return MurmurHash3v2.hash(key2, seed); + return MurmurHash3v3.hash(key2, seed); } private static final long[] hashV2(char[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } private static final long[] hashV2(byte[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } //V2 single primitives private static final long[] hashV2(long key, long seed, long[] out) { - return MurmurHash3v2.hash(key, seed, out); + return MurmurHash3v3.hash(key, seed, out); } // private static final long[] hashV2(double key, long seed, long[] out) { -// return MurmurHash3v2.hash(key, seed, out); +// return MurmurHash3v3.hash(key, seed, out); // } // private static final long[] hashV2(String key, long seed, long[] out) { -// return MurmurHash3v2.hash(key, seed, out); +// return MurmurHash3v3.hash(key, seed, out); // } @@ -199,7 +199,7 @@ public void offsetChecks() { for (int offset = 0; offset < 16; offset++) { int arrLen = cap - offset; - hash1 = MurmurHash3v2.hash(wmem, offset, arrLen, seed, hash1); + hash1 = MurmurHash3v3.hash(wmem, offset, arrLen, seed, hash1); byte[] byteArr2 = new byte[arrLen]; wmem.getByteArray(offset, byteArr2, 0, arrLen); hash2 = MurmurHash3.hash(byteArr2, seed); @@ -222,8 +222,8 @@ public void byteArrChecks() { for (int i = 0; i < j; i++) { wmem.putByte(i, (byte) (-128 + i)); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -246,8 +246,8 @@ public void charArrChecks() { for (int i = 0; i < j; i++) { wmem.putInt(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -270,8 +270,8 @@ public void intArrChecks() { for (int i = 0; i < j; i++) { wmem.putInt(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -294,8 +294,8 @@ public void longArrChecks() { for (int i = 0; i < j; i++) { wmem.putLong(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -313,8 +313,8 @@ public void longCheck() { WritableMemory wmem = WritableMemory.writableWrap(in); long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -325,62 +325,57 @@ public void checkEmptiesNulls() { long seed = 123; long[] hashOut = new long[2]; try { - MurmurHash3v2.hash(Memory.wrap(new long[0]), 0, 0, seed, hashOut); //mem empty - fail(); - } catch (final IllegalArgumentException e) { } //OK - try { - Memory mem = null; - MurmurHash3v2.hash(mem, 0, 0, seed, hashOut); //mem null + MurmurHash3v3.hash(Memory.wrap(new long[0]), 0, 0, seed, hashOut); //mem empty fail(); } catch (final IllegalArgumentException e) { } //OK try { String s = ""; - MurmurHash3v2.hash(s, seed, hashOut); //string empty + MurmurHash3v3.hash(s, seed, hashOut); //string empty fail(); } catch (final IllegalArgumentException e) { } //OK try { String s = null; - MurmurHash3v2.hash(s, seed, hashOut); //string null + MurmurHash3v3.hash(s, seed, hashOut); //string null fail(); } catch (final IllegalArgumentException e) { } //OK try { byte[] barr = new byte[0]; - MurmurHash3v2.hash(barr, seed); //byte[] empty + MurmurHash3v3.hash(barr, seed); //byte[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { byte[] barr = null; - MurmurHash3v2.hash(barr, seed); //byte[] null + MurmurHash3v3.hash(barr, seed); //byte[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { char[] carr = new char[0]; - MurmurHash3v2.hash(carr, seed); //char[] empty + MurmurHash3v3.hash(carr, seed); //char[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { char[] carr = null; - MurmurHash3v2.hash(carr, seed); //char[] null + MurmurHash3v3.hash(carr, seed); //char[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { int[] iarr = new int[0]; - MurmurHash3v2.hash(iarr, seed); //int[] empty + MurmurHash3v3.hash(iarr, seed); //int[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { int[] iarr = null; - MurmurHash3v2.hash(iarr, seed); //int[] null + MurmurHash3v3.hash(iarr, seed); //int[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { long[] larr = new long[0]; - MurmurHash3v2.hash(larr, seed); //long[] empty + MurmurHash3v3.hash(larr, seed); //long[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { long[] larr = null; - MurmurHash3v2.hash(larr, seed); //long[] null + MurmurHash3v3.hash(larr, seed); //long[] null fail(); } catch (final IllegalArgumentException e) { } //OK } @@ -390,9 +385,9 @@ public void checkStringLong() { long seed = 123; long[] hashOut = new long[2]; String s = "123"; - assertTrue(MurmurHash3v2.hash(s, seed, hashOut)[0] != 0); + assertTrue(MurmurHash3v3.hash(s, seed, hashOut)[0] != 0); long v = 123; - assertTrue(MurmurHash3v2.hash(v, seed, hashOut)[0] != 0); + assertTrue(MurmurHash3v3.hash(v, seed, hashOut)[0] != 0); } @Test @@ -420,8 +415,8 @@ private static long[] checkDouble(double dbl) { WritableMemory wmem = WritableMemory.writableWrap(dataArr); long[] hash1 = MurmurHash3.hash(dataArr, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(dbl, seed, hash2); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(dbl, seed, hash2); assertEquals(hash1, hash2); assertEquals(hash1, hash3); diff --git a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java index 78b18c1cc..c913af378 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java @@ -31,12 +31,11 @@ import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.testng.annotations.Test; - import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; /** * @author Lee Rhodes @@ -48,53 +47,50 @@ public void checkGrow() { int lgConfigK = 4; TgtHllType tgtHllType = TgtHllType.HLL_4; int n = 8; //put lgConfigK == 4 into HLL mode - int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); + long bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); HllSketch hllSketch; - try (WritableHandle handle = WritableMemory.allocateDirect(bytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory wmem = handle.getWritable(); - hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); - for (int i = 0; i < n; i++) { - hllSketch.update(i); - } - hllSketch.couponUpdate(HllUtil.pair(7, 15)); //mock extreme values - hllSketch.couponUpdate(HllUtil.pair(8, 15)); - hllSketch.couponUpdate(HllUtil.pair(9, 15)); - //println(hllSketch.toString(true, true, true, true)); - DirectHllArray dha = (DirectHllArray) hllSketch.hllSketchImpl; - assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 2); - assertTrue(hllSketch.isMemory()); - assertTrue(hllSketch.isOffHeap()); - assertTrue(hllSketch.isSameResource(wmem)); - - //Check heapify - byte[] byteArray = hllSketch.toCompactByteArray(); - HllSketch hllSketch2 = HllSketch.heapify(byteArray); - HllArray ha = (HllArray) hllSketch2.hllSketchImpl; - assertEquals(ha.getAuxHashMap().getLgAuxArrInts(), 2); - assertEquals(ha.getAuxHashMap().getAuxCount(), 3); - - //Check wrap - byteArray = hllSketch.toUpdatableByteArray(); - WritableMemory wmem2 = WritableMemory.writableWrap(byteArray); - hllSketch2 = HllSketch.writableWrap(wmem2); - //println(hllSketch2.toString(true, true, true, true)); - DirectHllArray dha2 = (DirectHllArray) hllSketch2.hllSketchImpl; - assertEquals(dha2.getAuxHashMap().getLgAuxArrInts(), 2); - assertEquals(dha2.getAuxHashMap().getAuxCount(), 3); - - //Check grow to on-heap - hllSketch.couponUpdate(HllUtil.pair(10, 15)); //puts it over the edge, must grow - //println(hllSketch.toString(true, true, true, true)); - dha = (DirectHllArray) hllSketch.hllSketchImpl; - assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 3); - assertEquals(dha.getAuxHashMap().getAuxCount(), 4); - assertTrue(hllSketch.isMemory()); - assertFalse(hllSketch.isOffHeap()); - assertFalse(hllSketch.isSameResource(wmem)); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(bytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); + for (int i = 0; i < n; i++) { + hllSketch.update(i); } + hllSketch.couponUpdate(HllUtil.pair(7, 15)); //mock extreme values + hllSketch.couponUpdate(HllUtil.pair(8, 15)); + hllSketch.couponUpdate(HllUtil.pair(9, 15)); + //println(hllSketch.toString(true, true, true, true)); + DirectHllArray dha = (DirectHllArray) hllSketch.hllSketchImpl; + assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 2); + assertTrue(hllSketch.isMemory()); + assertTrue(hllSketch.isOffHeap()); + assertTrue(hllSketch.isSameResource(wmem)); + + //Check heapify + byte[] byteArray = hllSketch.toCompactByteArray(); + HllSketch hllSketch2 = HllSketch.heapify(byteArray); + HllArray ha = (HllArray) hllSketch2.hllSketchImpl; + assertEquals(ha.getAuxHashMap().getLgAuxArrInts(), 2); + assertEquals(ha.getAuxHashMap().getAuxCount(), 3); + + //Check wrap + byteArray = hllSketch.toUpdatableByteArray(); + WritableMemory wmem2 = WritableMemory.writableWrap(byteArray); + hllSketch2 = HllSketch.writableWrap(wmem2); + //println(hllSketch2.toString(true, true, true, true)); + DirectHllArray dha2 = (DirectHllArray) hllSketch2.hllSketchImpl; + assertEquals(dha2.getAuxHashMap().getLgAuxArrInts(), 2); + assertEquals(dha2.getAuxHashMap().getAuxCount(), 3); + + //Check grow to on-heap + hllSketch.couponUpdate(HllUtil.pair(10, 15)); //puts it over the edge, must grow + //println(hllSketch.toString(true, true, true, true)); + dha = (DirectHllArray) hllSketch.hllSketchImpl; + assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 3); + assertEquals(dha.getAuxHashMap().getAuxCount(), 4); + assertTrue(hllSketch.isMemory()); + assertFalse(hllSketch.isOffHeap()); + assertFalse(hllSketch.isSameResource(wmem)); + assertFalse(wmem.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java index 38cbc4977..09eebabf7 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java @@ -27,8 +27,10 @@ import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; /** @@ -69,11 +71,8 @@ private static void promotions(int lgConfigK, int n, TgtHllType tgtHllType, bool //println("DIRECT"); byte[] barr1; - WritableMemory wmem = null; - try (WritableHandle hand = WritableMemory.allocateDirect(bytes)) { - wmem = hand.getWritable(); - //byte[] byteArr = new byte[bytes]; - //WritableMemory wmem = WritableMemory.wrap(byteArr); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes)).scope()) { hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); assertTrue(hllSketch.isEmpty()); diff --git a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java index cd1e0cbcf..17f3d0d0f 100644 --- a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java @@ -109,7 +109,6 @@ public void checkCorruptMemoryInput() { HllSketch sk = new HllSketch(12); byte[] memObj = sk.toCompactByteArray(); WritableMemory wmem = WritableMemory.writableWrap(memObj); - long memAdd = wmem.getCumulativeOffset(0); HllSketch bad; //checkFamily @@ -148,7 +147,6 @@ public void checkCorruptMemoryInput() { for (int i = 1; i <= 15; i++) { sk.update(i); } memObj = sk.toCompactByteArray(); wmem = WritableMemory.writableWrap(memObj); - memAdd = wmem.getCumulativeOffset(0); //check wrong PreInts and SET try { @@ -162,7 +160,6 @@ public void checkCorruptMemoryInput() { for (int i = 15; i <= 1000; i++) { sk.update(i); } memObj = sk.toCompactByteArray(); wmem = WritableMemory.writableWrap(memObj); - memAdd = wmem.getCumulativeOffset(0); //check wrong PreInts and HLL try { @@ -179,7 +176,6 @@ public void checkExtractFlags() { int bytes = HllSketch.getMaxUpdatableSerializationBytes(4, TgtHllType.HLL_4); WritableMemory wmem = WritableMemory.allocate(bytes); Object memObj = wmem.getArray(); - long memAdd = wmem.getCumulativeOffset(0L); HllSketch sk = new HllSketch(4, TgtHllType.HLL_4, wmem); int flags = extractFlags(wmem); assertEquals(flags, EMPTY_FLAG_MASK); diff --git a/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java b/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java index 078f3503b..53b422b7c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java @@ -67,6 +67,16 @@ public void generateKllFloatsSketchBinaries() throws IOException { } } + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateKllLongsSketchBinaries() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000}; + for (int n: nArr) { + final KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= n; i++) { sk.update(i); } + Files.newOutputStream(javaPath.resolve("kll_long_n" + n + "_java.sk")).write(sk.toByteArray()); + } + } + @Test(groups = {GENERATE_JAVA_FILES}) public void generateKllItemsSketchBinaries() throws IOException { final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000}; diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java index 9831c2f57..7a4d061ad 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java @@ -110,13 +110,13 @@ public void checkDirectCompactGetDoubleItemsArray() { KllDoublesSketch sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); double[] itemsArr = sk2.getDoubleItemsArray(); - for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0F); } + for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0.0); } sk.update(1); sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); itemsArr = sk2.getDoubleItemsArray(); - for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0F); } - assertEquals(itemsArr[19], 1F); + for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0.0); } + assertEquals(itemsArr[19], 1.0); for (int i = 2; i <= 21; i++) { sk.update(i); } sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); @@ -169,12 +169,12 @@ public void checkMinAndMax() { try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} sk.update(1); sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); - assertEquals(sk2.getMaxItem(),1.0F); - assertEquals(sk2.getMinItem(),1.0F); + assertEquals(sk2.getMaxItem(),1.0); + assertEquals(sk2.getMinItem(),1.0); for (int i = 2; i <= 21; i++) { sk.update(i); } sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); - assertEquals(sk2.getMaxItem(),21.0F); - assertEquals(sk2.getMinItem(),1.0F); + assertEquals(sk2.getMaxItem(),21.0); + assertEquals(sk2.getMinItem(),1.0); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java new file mode 100644 index 000000000..6b57fccac --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class KllDirectCompactLongsSketchTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkRODirectUpdatable_ROandWritable() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); //request updatable + Memory srcMem = Memory.wrap(byteArr); //cast to Memory -> read only + KllLongsSketch sk2 = KllLongsSketch.wrap(srcMem); + assertTrue(sk2 instanceof KllDirectLongsSketch); + + assertTrue(sk2.isMemoryUpdatableFormat()); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + + WritableMemory srcWmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch sk3 = KllLongsSketch.writableWrap(srcWmem, memReqSvr); + assertTrue(sk3 instanceof KllDirectLongsSketch); + println(sk3.toString(true, false)); + assertFalse(sk3.isReadOnly()); + sk3.update(22); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 22L); + } + + @Test + public void checkRODirectCompact() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + Memory srcMem = Memory.wrap(sk.toByteArray()); //compact RO fmt + KllLongsSketch sk2 = KllLongsSketch.wrap(srcMem); + assertTrue(sk2 instanceof KllDirectCompactLongsSketch); + //println(sk2.toString(true, false)); + assertFalse(sk2.isMemoryUpdatableFormat()); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + Memory srcMem2 = Memory.wrap(sk2.toByteArray()); + KllLongsSketch sk3 = KllLongsSketch.writableWrap((WritableMemory)srcMem2, memReqSvr); + assertTrue(sk3 instanceof KllDirectCompactLongsSketch); + assertFalse(sk2.isMemoryUpdatableFormat()); + //println(sk3.toString(true, false)); + assertTrue(sk3.isReadOnly()); + assertEquals(sk3.getMinItem(), 1L); + assertEquals(sk3.getMaxItem(), 21L); + } + + @Test + public void checkDirectCompactSingleItem() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + + sk.update(1); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertTrue(sk2 instanceof KllDirectCompactLongsSketch); + //println(sk2.toString(true, false)); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getLongSingleItem(), 1L); + + sk.update(2); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getN(), 2); + try { + sk2.getLongSingleItem(); + fail(); + } catch (SketchesArgumentException e) { } + } + + @Test + public void checkDirectCompactGetLongItemsArray() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + long[] itemsArr = sk2.getLongItemsArray(); + for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0); } + + sk.update(1); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + itemsArr = sk2.getLongItemsArray(); + for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0); } + assertEquals(itemsArr[19], 1L); + + for (int i = 2; i <= 21; i++) { sk.update(i); } + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + itemsArr = sk2.getLongItemsArray(); + assertEquals(itemsArr.length, 33); + assertEquals(itemsArr[22], 21); + } + + @Test + public void checkHeapAndDirectCompactGetRetainedItemsArray() { + int k = 20; + + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + long[] retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, 0); + + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + retArr = sk2.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 0); + + sk.update(1); + retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 1); + assertEquals(retArr[0], 1L); + + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + retArr = sk2.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 1); + assertEquals(retArr[0], 1L); + + for (int i = 2; i <= 21; i++) { sk.update(i); } + retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 11); + + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(retArr.length, sk2.getNumRetained()); + assertEquals(retArr.length, 11); + } + + @Test + public void checkMinAndMax() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + sk.update(1); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getMaxItem(),1L); + assertEquals(sk2.getMinItem(),1L); + for (int i = 2; i <= 21; i++) { sk.update(i); } + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getMaxItem(),21L); + assertEquals(sk2.getMinItem(),1L); + } + + @Test + public void checkQuantile() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= 1000; i++) { sk1.update(i); } + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk1.toByteArray())); + long med2 = sk2.getQuantile(0.5); + long med1 = sk1.getQuantile(0.5); + assertEquals(med1, med2); + println("Med1: " + med1); + println("Med2: " + med2); + } + + @Test + public void checkCompactSingleItemMerge() { + int k = 20; + KllLongsSketch skH1 = KllLongsSketch.newHeapInstance(k); //Heap with 1 (single) + skH1.update(21); + KllLongsSketch skDC1 = KllLongsSketch.wrap(Memory.wrap(skH1.toByteArray())); //Direct Compact with 1 (single) + KllLongsSketch skH20 = KllLongsSketch.newHeapInstance(k); //Heap with 20 + for (int i = 1; i <= 20; i++) { skH20.update(i); } + skH20.merge(skDC1); + assertEquals(skH20.getN(), 21); + + WritableMemory wmem = WritableMemory.allocate(1000); + KllLongsSketch skDU20 = KllLongsSketch.newDirectInstance(k, wmem, memReqSvr);//Direct Updatable with 21 + for (int i = 1; i <= 20; i++) { skDU20.update(i); } + skDU20.merge(skDC1); + assertEquals(skDU20.getN(), 21); + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + private final static boolean enablePrinting = false; + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java index 78a3b9cd5..4bfdfa4fc 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java @@ -41,7 +41,7 @@ public void oneItemSketch() { sketch.update(0); QuantilesDoublesSketchIterator it = sketch.iterator(); Assert.assertTrue(it.next()); - Assert.assertEquals(it.getQuantile(), 0f); + Assert.assertEquals(it.getQuantile(), 0); Assert.assertEquals(it.getWeight(), 1); Assert.assertFalse(it.next()); } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index 33219a806..6342ac33d 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -189,11 +189,11 @@ public void mergeLowerK() { sketch2.update(2 * n - i - 1); } - assertEquals(sketch1.getMinItem(), 0.0f); - assertEquals(sketch1.getMaxItem(), n - 1f); + assertEquals(sketch1.getMinItem(), 0.0); + assertEquals(sketch1.getMaxItem(), n - 1.0); assertEquals(sketch2.getMinItem(), n); - assertEquals(sketch2.getMaxItem(), 2f * n - 1f); + assertEquals(sketch2.getMaxItem(), 2.0 * n - 1.0); assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); @@ -613,7 +613,7 @@ public void checkWritableWrapOfCompactForm() { public void checkReadOnlyExceptions() { int k = 20; double[] dblArr = new double[0]; - double dblV = 1.0f; + double dblV = 1.0; int idx = 1; boolean bool = true; KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java new file mode 100644 index 000000000..8be509f10 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class KllDirectLongsSketchIteratorTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void emptySketch() { + final KllLongsSketch sketch = getDLSketch(200, 0); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + final KllLongsSketch sketch = getDLSketch(200, 0); + sketch.update(0); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getQuantile(), 0); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + final KllLongsSketch sketch = getDLSketch(200, 0); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + QuantilesLongsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += (int)it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + + private static KllLongsSketch getDLSketch(final int k, final int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllLongsSketch dlsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dlsk; + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java new file mode 100644 index 000000000..f1784b7ce --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java @@ -0,0 +1,686 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllSketch.SketchStructure; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +public class KllDirectLongsSketchTest { + + private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 + private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 + private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + try { sketch.getRank(0); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantile(0.5); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantiles(new double[] {0.0, 1.0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getPMF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getCDF(new long[0]); fail(); } catch (SketchesArgumentException e) {} + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneValue() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(1, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(2, EXCLUSIVE), 1.0); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), 1L); + assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), 1L); + } + + @Test + public void manyValuesEstimationMode() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new long[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + assertEquals(sketch.getMinItem(), 0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1L); // max value is exact + + // check at every 0.1 percentage point + final double[] ranks = new double[1001]; + final double[] reverseRanks = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + ranks[i] = (double) i / 1000; + reverseRanks[1000 - i] = ranks[i]; + } + final long[] quantiles = sketch.getQuantiles(ranks); + final long[] reverseQuantiles = sketch.getQuantiles(reverseRanks); + long previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final long quantile = sketch.getQuantile(ranks[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + final int n = 1000; + final long[] values = new long[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + + @Test + public void merge() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2 * n - 1); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2 * n - 1L); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2 * n - 1); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2 * n - 1); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n / 2 * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n / 2 * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1L); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + int n = 1_000_000; + for (int i = 1; i <= n; i++) { + sketch1.update(i); + } + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1L); + assertEquals(sketch2.getMaxItem(), 1_000_000L); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + getUpdatableDirectLongSketch(KllSketch.DEFAULT_M - 1, 0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + getUpdatableDirectLongSketch(KllSketch.MAX_K + 1, 0); + } + + @Test + public void minK() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(KllSketch.DEFAULT_M, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(KllSketch.MAX_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); + } + + @Test + public void serializeDeserializeEmptyViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final byte[] bytes = sketch1.toByteArray(); //compact + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false)); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + try { sketch2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sketch2.currentSerializedSizeBytes(false), + sketch1.currentSerializedSizeBytes(false)); + } + + @Test + public void serializeDeserializeEmptyViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final byte[] bytes = KllHelper.toByteArray(sketch1, true); + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + try { sketch2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sketch2.currentSerializedSizeBytes(true), + sketch1.currentSerializedSizeBytes(true)); + } + + @Test + public void serializeDeserializeOneValueViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toByteArray(); + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(sketch2.getMinItem() < Long.MAX_VALUE); + assertTrue(sketch2.getMaxItem() > Long.MIN_VALUE); + assertEquals(sketch2.currentSerializedSizeBytes(false), 8 + Long.BYTES); + } + + @Test + public void serializeDeserializeOneValueViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + final byte[] bytes = KllHelper.toByteArray(sketch1, true); + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), 1L); + assertEquals(sketch2.getMaxItem(), 1L); + assertEquals(sketch2.currentSerializedSizeBytes(false), 8 + Long.BYTES); + assertEquals(sketch2.currentSerializedSizeBytes(true), bytes.length); + } + + @Test + public void serializeDeserializeFullViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 1000); + final byte[] byteArr1 = sketch1.toByteArray(); //compact + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(byteArr1)); + assertEquals(byteArr1.length, sketch1.currentSerializedSizeBytes(false)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), sketch1.getMinItem()); + assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem()); + assertEquals(sketch2.currentSerializedSizeBytes(false), sketch1.currentSerializedSizeBytes(false)); + } + + @Test + public void serializeDeserializeFullViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final int n = 1000; + for (int i = 1; i <= n; i++) { + sketch1.update(i); + } + final byte[] bytes = KllHelper.toByteArray(sketch1, true); //updatable + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes), memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), sketch1.getMinItem()); + assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem()); + assertEquals(sketch2.currentSerializedSizeBytes(true), sketch1.currentSerializedSizeBytes(true)); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(0); + sketch.getCDF(new long[] {1, 0}); + } + + @Test + public void checkSimpleMergeDirect() { //used for troubleshooting + int k = 20; + int n1 = 21; + int n2 = 43; + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println("SK1:"); + println(sk1.toString(true, true)); + println("SK2:"); + println(sk2.toString(true, true)); + WritableMemory wmem1 = WritableMemory.writableWrap(KllHelper.toByteArray(sk1, true)); + WritableMemory wmem2 = WritableMemory.writableWrap(KllHelper.toByteArray(sk2, true)); + KllLongsSketch dsk1 = KllLongsSketch.writableWrap(wmem1, memReqSvr); + KllLongsSketch dsk2 = KllLongsSketch.writableWrap(wmem2, memReqSvr); + println("BEFORE MERGE"); + println(dsk1.toString(true, true)); + dsk1.merge(dsk2); + println("AFTER MERGE"); + println(dsk1.toString(true, true)); + } + + @Test + public void checkSketchInitializeDirectLongUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL DIRECT FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkGetWritableMemory() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 200); + assertEquals(sketch.getK(), 200); + assertEquals(sketch.getN(), 200); + assertFalse(sketch.isEmpty()); + assertTrue(sketch.isMemoryUpdatableFormat()); + assertFalse(sketch.isEstimationMode()); + assertTrue(sketch.isLongsSketch()); + assertFalse(sketch.isLevelZeroSorted()); + assertFalse(sketch.isDoublesSketch()); + + final WritableMemory wmem = sketch.getWritableMemory(); + final KllLongsSketch sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), 200); + assertEquals(sk.getN(), 200); + assertFalse(sk.isEmpty()); + assertFalse(sk.isMemoryUpdatableFormat()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isLongsSketch()); + assertFalse(sk.isLevelZeroSorted()); + assertFalse(sk.isDoublesSketch()); + } + + @Test + public void checkReset() { + WritableMemory dstMem = WritableMemory.allocate(3000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + long min1 = sk.getMinItem(); + long max1 = sk.getMaxItem(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + long min2 = sk.getMinItem(); + long max2 = sk.getMaxItem(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkHeapify() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + KllLongsSketch sk2 = KllHeapLongsSketch.heapifyImpl(dstMem); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 100L); + } + + @Test + public void checkMergeKllLongsSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk.merge(sk2); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 121L); + } + + @Test + public void checkReverseMergeKllLongsSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk2.merge(sk); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 121L); + } + + @Test + public void checkWritableWrapOfCompactForm() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk.update(i); } + WritableMemory srcMem = WritableMemory.writableWrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(srcMem, memReqSvr); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + } + + @Test + public void checkReadOnlyExceptions() { + int k = 20; + long[] fltArr = new long[0]; + long fltV = 1; + int idx = 1; + boolean bool = true; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + try { sk2.incN(1); fail(); } catch (SketchesArgumentException e) { } + try { sk2.incNumLevels(); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLongItemsArray(fltArr); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLongItemsArrayAt(idx, fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLevelZeroSorted(bool); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMaxItem(fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMinItem(fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMinK(idx); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setN(idx); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setNumLevels(idx); fail(); } catch (SketchesArgumentException e) { } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkMergeExceptions() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + WritableMemory srcMem1 = WritableMemory.writableWrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(srcMem1, memReqSvr); + sk2.merge(sk1); + } + + @Test + public void checkVectorUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + long[] v = new long[21]; + for (int i = 0; i < 21; i++) { v[i] = i + 1; } + sk.update(v, 0, 21); + println(sk.toString(true, true)); + int[] levelsArr = sk.getLevelsArray(SketchStructure.UPDATABLE); + assertEquals(levelsArr[0], 22); + long[] longsArr = sk.getLongItemsArray(); + assertEquals(longsArr[22], 21); + } + + @Test + public void checkWeightedUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(8, dstMem, memReqSvr); + for (int i = 0; i < 16; i++) { + sk.update(i + 1, 16); + } + println(sk.toString(true, true)); + assertEquals(sk.getN(), 256); + assertEquals(sk.getMaxItem(), 16L); + assertEquals(sk.getMinItem(), 1L); + } + + private static KllLongsSketch getUpdatableDirectLongSketch(int k, int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch dfsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void checkMergeExceptionsWrongType() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20); + try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { } + try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { } + } + + private final static boolean enablePrinting = false; + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java index e07a395da..007cc8370 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java @@ -64,7 +64,7 @@ public void serializeDeserializeEmpty() { @Test public void serializeDeserializeOneValue() { final KllDoublesSketch sk1 = KllDoublesSketch.newHeapInstance(); - sk1.update(1); + sk1.update(1.0); //from heap -> byte[] -> heap final byte[] bytes = sk1.toByteArray(); diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 0b3818f1f..e143577f4 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -165,8 +165,8 @@ public void manyValuesEstimationMode() { assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); - assertEquals(sketch.getMinItem(), 0f); // min value is exact - assertEquals(sketch.getMaxItem(), n - 1f); // max value is exact + assertEquals(sketch.getMinItem(), 0.0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1.0); // max value is exact // check at every 0.1 percentage point final double[] fractions = new double[1001]; @@ -261,11 +261,11 @@ public void mergeLowerK() { sketch2.update(2 * n - i - 1); } - assertEquals(sketch1.getMinItem(), 0.0f); - assertEquals(sketch1.getMaxItem(), n - 1f); + assertEquals(sketch1.getMinItem(), 0.0); + assertEquals(sketch1.getMaxItem(), n - 1); assertEquals(sketch2.getMinItem(), n); - assertEquals(sketch2.getMaxItem(), 2f * n - 1.0); + assertEquals(sketch2.getMaxItem(), 2.0 * n - 1.0); assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); @@ -306,7 +306,7 @@ public void mergeEmptyLowerK() { sketch2.merge(sketch1); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); - assertEquals(sketch1.getMinItem(), 0f); + assertEquals(sketch1.getMinItem(), 0.0); assertEquals(sketch1.getMaxItem(), n - 1.0); assertEquals(sketch1.getQuantile(0.5), n / 2.0, n * PMF_EPS_FOR_K_256); } @@ -424,7 +424,7 @@ public void checkNewDirectInstanceAndSize() { KllDoublesSketch.newDirectInstance(wmem, memReqSvr); try { KllDoublesSketch.newDirectInstance(null, memReqSvr); fail(); } catch (NullPointerException e) { } - try { KllFloatsSketch.newDirectInstance(wmem, null); fail(); } + try { KllDoublesSketch.newDirectInstance(wmem, null); fail(); } catch (NullPointerException e) { } int updateSize = KllSketch.getMaxSerializedSizeBytes(200, 0, DOUBLES_SKETCH, true); int compactSize = KllSketch.getMaxSerializedSizeBytes(200, 0, DOUBLES_SKETCH, false); diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java index 00028e341..9fc74d97b 100644 --- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java @@ -570,7 +570,7 @@ public void checkCDF_PDF() { } @Test - public void checkWrapCase1Floats() { + public void checkWrapCase1Items() { KllItemsSketchsk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe); final int n = 21; final int digits = Util.numDigits(n); diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java new file mode 100644 index 000000000..a98c32c9e --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; +import org.testng.Assert; +import org.testng.annotations.Test; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +public class KllLongsSketchIteratorTest { + + @Test + public void emptySketch() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getQuantile(), 1L); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void twoItemSketchForIterator() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.update(2); + QuantilesLongsSketchIterator itr = sketch.iterator(); + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 2L); + assertEquals(itr.getWeight(), 1); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 1L); + assertEquals(itr.getWeight(), 1); + } + + @Test + public void twoItemSketchForSortedViewIterator() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.update(2); + LongsSortedViewIterator itr = sketch.getSortedView().iterator(); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 1L); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); + assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); + assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 2L); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); + assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); + assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + QuantilesLongsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += (int)it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java new file mode 100644 index 000000000..b9b0f800d --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +public class KllLongsSketchSerDeTest { + + @Test + public void serializeDeserializeEmpty() { + final int N = 20; + + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(N); + //Empty: from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertTrue(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk2.getN(), sk1.getN()); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + + //Empty: from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertTrue(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk3.getN(), sk1.getN()); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + try { sk3.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk3.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + + @Test + public void serializeDeserializeOneValue() { + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + sk1.update(1); + + //from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertFalse(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), 1); + assertEquals(sk2.getN(), 1); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 1L); + assertEquals(sk2.getSerializedSizeBytes(), Long.BYTES + Long.BYTES); + + //from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertFalse(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), 1); + assertEquals(sk3.getN(), 1); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk3.getMinItem(), 1L); + assertEquals(sk3.getMaxItem(), 1L); + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + + @Test + public void serializeDeserializeMultipleValues() { + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + final int n = 1000; + for (int i = 0; i < n; i++) { + sk1.update(i); + } + assertEquals(sk1.getMinItem(), 0); + assertEquals(sk1.getMaxItem(), 999L); + + //from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertFalse(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk2.getN(), sk1.getN()); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk2.getMinItem(), sk1.getMinItem()); + assertEquals(sk2.getMaxItem(), sk1.getMaxItem()); + assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + + //from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertFalse(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk3.getN(), sk1.getN()); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk3.getMinItem(), sk1.getMinItem()); + assertEquals(sk3.getMaxItem(), sk1.getMaxItem()); + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java new file mode 100644 index 000000000..1e4c1004a --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java @@ -0,0 +1,719 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +public class KllLongsSketchTest { + private static final String LS = System.getProperty("line.separator"); + private static final double PMF_EPS_FOR_K_8 = KllSketch.getNormalizedRankError(8, true); + private static final double PMF_EPS_FOR_K_128 = KllSketch.getNormalizedRankError(128, true); + private static final double PMF_EPS_FOR_K_256 = KllSketch.getNormalizedRankError(256, true); + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + try { sketch.getRank(0); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantile(0.5); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantiles(new double[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getPMF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getCDF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneValue() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(0L, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(1L, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(2L, EXCLUSIVE), 1.0); + assertEquals(sketch.getRank(0L, INCLUSIVE), 0.0); + assertEquals(sketch.getRank(1L, INCLUSIVE), 1.0); + assertEquals(sketch.getRank(2L, INCLUSIVE), 1.0); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), 1L); + assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), 1L); + assertEquals(sketch.getQuantile(0.5, INCLUSIVE), 1L); + } + + @Test + public void tenValues() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 10; i++) { sketch.update(i); } + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 10); + assertEquals(sketch.getNumRetained(), 10); + for (int i = 1; i <= 10; i++) { + assertEquals(sketch.getRank(i, EXCLUSIVE), (i - 1) / 10.0); + assertEquals(sketch.getRank(i, INCLUSIVE), i / 10.0); + } + final long[] qArr = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + double[] rOut = sketch.getRanks(qArr); //inclusive + for (int i = 0; i < qArr.length; i++) { + assertEquals(rOut[i], (i + 1) / 10.0); + } + rOut = sketch.getRanks(qArr, EXCLUSIVE); //exclusive + for (int i = 0; i < qArr.length; i++) { + assertEquals(rOut[i], i / 10.0); + } + + for (int i = 0; i >= 10; i++) { + double rank = i/10.0; + double q = rank == 1.0 ? i : i + 1; + assertEquals(sketch.getQuantile(rank, EXCLUSIVE), q); + q = rank == 0 ? i + 1 : i; + assertEquals(sketch.getQuantile(rank, INCLUSIVE), q); + } + + { + // getQuantile() and getQuantiles() equivalence EXCLUSIVE + final long[] quantiles = + sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, EXCLUSIVE); + for (int i = 0; i <= 10; i++) { + assertEquals(sketch.getQuantile(i / 10.0, EXCLUSIVE), quantiles[i]); + } + } + { + // getQuantile() and getQuantiles() equivalence INCLUSIVE + final long[] quantiles = + sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, INCLUSIVE); + for (int i = 0; i <= 10; i++) { + assertEquals(sketch.getQuantile(i / 10.0, INCLUSIVE), quantiles[i]); + } + } + } + + @Test + public void manyValuesEstimationMode() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new long[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + + assertEquals(sketch.getMinItem(), 0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1); // max value is exact + + // check at every 0.1 percentage point + final double[] fractions = new double[1001]; + final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + fractions[i] = (double) i / 1000; + reverseFractions[1000 - i] = fractions[i]; + } + final long[] quantiles = sketch.getQuantiles(fractions); + final long[] reverseQuantiles = sketch.getQuantiles(reverseFractions); + double previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final double quantile = sketch.getQuantile(fractions[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final int n = 1000; + final long[] values = new long[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + { // inclusive = false (default) + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + { // inclusive = true + final double[] ranks = sketch.getCDF(values, INCLUSIVE); + final double[] pmf = sketch.getPMF(values, INCLUSIVE); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i], INCLUSIVE), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + } + + @Test + public void merge() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), (n - 1)); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), (2 * n - 1)); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), (2 * n - 1)); + assertEquals(sketch1.getQuantile(0.5), n, 2 * n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1L); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2L * n - 1L); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2L * n - 1L); + assertEquals(sketch1.getQuantile(0.5), n, 2L * n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= 1_000_000; i++) { + sketch1.update(i); + } + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(10); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1); + assertEquals(sketch2.getMaxItem(), 1_000_000); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + KllLongsSketch.newHeapInstance(KllSketch.DEFAULT_M - 1); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + KllLongsSketch.newHeapInstance(KllSketch.MAX_K + 1); + } + + @Test + public void minK() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(KllSketch.DEFAULT_M); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 1000 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(KllSketch.MAX_K); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 1000 * PMF_EPS_FOR_K_256); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(0); + sketch.getCDF(new long[] {1L, 0L}); + } + + @Test + public void checkReset() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + double min1 = sk.getMinItem(); + double max1 = sk.getMaxItem(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + double min2 = sk.getMinItem(); + double max2 = sk.getMaxItem(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkReadOnlyUpdate() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + Memory mem = Memory.wrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + try { sk2.update(1); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkNewDirectInstanceAndSize() { + WritableMemory wmem = WritableMemory.allocate(3000); + KllLongsSketch.newDirectInstance(wmem, memReqSvr); + try { KllLongsSketch.newDirectInstance(null, memReqSvr); fail(); } + catch (NullPointerException e) { } + try { KllLongsSketch.newDirectInstance(wmem, null); fail(); } + catch (NullPointerException e) { } + int updateSize = KllSketch.getMaxSerializedSizeBytes(200, 0, LONGS_SKETCH, true); + int compactSize = KllSketch.getMaxSerializedSizeBytes(200, 0, LONGS_SKETCH, false); + assertTrue(compactSize < updateSize); + } + + @Test + public void sortedView() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + sk.update(3); + sk.update(1); + sk.update(2); + + LongsSortedView view = sk.getSortedView(); + LongsSortedViewIterator itr = view.iterator(); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 1); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 2); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 3); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); + assertEquals(itr.next(), false); + } + + @Test //also visual + public void checkCDF_PDF() { + final double[] cdfI = {.25, .50, .75, 1.0, 1.0 }; + final double[] cdfE = {0.0, .25, .50, .75, 1.0 }; + final double[] pmfI = {.25, .25, .25, .25, 0.0 }; + final double[] pmfE = {0.0, .25, .25, .25, .25 }; + final double toll = 1E-10; + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final long[] doublesIn = {10, 20, 30, 40}; + for (int i = 0; i < doublesIn.length; i++) { sketch.update(doublesIn[i]); } + long[] sp = new long[] { 10, 20, 30, 40 }; + println("SplitPoints:"); + for (int i = 0; i < sp.length; i++) { + printf("%10d", sp[i]); + } + println(""); + println("INCLUSIVE:"); + double[] cdf = sketch.getCDF(sp, INCLUSIVE); + double[] pmf = sketch.getPMF(sp, INCLUSIVE); + printf("%10s%10s" + LS, "CDF", "PMF"); + for (int i = 0; i < cdf.length; i++) { + printf("%10.2f%10.2f" + LS, cdf[i], pmf[i]); + assertEquals(cdf[i], cdfI[i], toll); + assertEquals(pmf[i], pmfI[i], toll); + } + println("EXCLUSIVE"); + cdf = sketch.getCDF(sp, EXCLUSIVE); + pmf = sketch.getPMF(sp, EXCLUSIVE); + printf("%10s%10s" + LS, "CDF", "PMF"); + for (int i = 0; i < cdf.length; i++) { + printf("%10.2f%10.2f" + LS, cdf[i], pmf[i]); + assertEquals(cdf[i], cdfE[i], toll); + assertEquals(pmf[i], pmfE[i], toll); + } + } + + @Test + public void checkWrapCase1Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + + assertTrue(mem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkWritableWrapCase6And2Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + WritableMemory wmem = WritableMemory.writableWrap(KllHelper.toByteArray(sk, true)); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertFalse(wmem.isReadOnly()); + assertFalse(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase5Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertFalse(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase3Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true)); + WritableMemory wmem = (WritableMemory) mem; + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertTrue(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase7Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true)); + WritableMemory wmem = (WritableMemory) mem; + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertTrue(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkReadOnlyExceptions() { + int[] intArr = new int[0]; + int intV = 2; + int idx = 1; + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + Memory mem = Memory.wrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + try { sk2.setLevelsArray(intArr); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLevelsArrayAt(idx,intV); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkIsSameResource() { + int cap = 128; + WritableMemory wmem = WritableMemory.allocate(cap); + WritableMemory reg1 = wmem.writableRegion(0, 64); + WritableMemory reg2 = wmem.writableRegion(64, 64); + assertFalse(reg1 == reg2); + assertFalse(reg1.isSameResource(reg2)); + + WritableMemory reg3 = wmem.writableRegion(0, 64); + assertFalse(reg1 == reg3); + assertTrue(reg1.isSameResource(reg3)); + + byte[] byteArr1 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg1.putByteArray(0, byteArr1, 0, byteArr1.length); + KllLongsSketch sk1 = KllLongsSketch.wrap(reg1); + + byte[] byteArr2 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg2.putByteArray(0, byteArr2, 0, byteArr2.length); + assertFalse(sk1.isSameResource(reg2)); + + byte[] byteArr3 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg3.putByteArray(0, byteArr3, 0, byteArr3.length); + assertTrue(sk1.isSameResource(reg3)); + } + + @Test + public void checkSortedViewAfterReset() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + sk.update(1L); + LongsSortedView sv = sk.getSortedView(); + long dsv = sv.getQuantile(1.0, INCLUSIVE); + assertEquals(dsv, 1L); + sk.reset(); + try { sk.getSortedView(); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkVectorUpdate() { + boolean withLevels = false; + boolean withLevelsAndItems = true; + int k = 20; + int n = 108; + int maxVsz = 40; //max vector size + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + int j = 1; + int rem; + while ((rem = n - j + 1) > 0) { + int vecSz = min(rem, maxVsz); + long[] v = new long[vecSz]; + for (int i = 0; i < vecSz; i++) { v[i] = j++; } + sk.update(v, 0, vecSz); + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + assertEquals(sk.getN(), 108); + assertEquals(sk.getMaxItem(), 108L); + assertEquals(sk.getMinItem(), 1L); + } + + @Test + public void vectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final long[] values = new long[N]; + long vIn = 1L; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + sketch.update(values, 0, N); //vector input + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), totN / 2, totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d" + LS, N); + printf(" Num Vectors : %,12d" + LS, M); + printf(" Total Input : %,12d" + LS, totN); + printf(" Run Time mS : %,12.3f" + LS, runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f" + LS, trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f" + LS, updateTime); + } + + @Test + public void nonVectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final long[] values = new long[N]; + long vIn = 1L; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + for (int i = 0; i < N; i++) { + sketch.update(values[i]); //single item input + } + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), totN / 2, totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d" + LS, N); + printf(" Num Vectors : %,12d" + LS, M); + printf(" Total Input : %,12d" + LS, totN); + printf(" Run Time mS : %,12.3f" + LS, runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f" + LS, trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f" + LS, updateTime); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } +} diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java new file mode 100644 index 000000000..366f93f7d --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java @@ -0,0 +1,459 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +public class KllMiscDirectLongsTest { + static final String LS = System.getProperty("line.separator"); + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkBounds() { + final KllLongsSketch kll = getDirectLongsSketch(200, 0); + for (int i = 0; i < 1000; i++) { + kll.update(i); + } + final double eps = kll.getNormalizedRankError(false); + final long est = kll.getQuantile(0.5); + final long ub = kll.getQuantileUpperBound(0.5); + final long lb = kll.getQuantileLowerBound(0.5); + assertEquals(ub, kll.getQuantile(.5 + eps)); + assertEquals(lb, kll.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + final double rest = kll.getRank(est); + final double restUB = kll.getRankUpperBound(rest); + final double restLB = kll.getRankLowerBound(rest); + assertTrue(restUB - rest < (2 * eps)); + assertTrue(rest - restLB < (2 * eps)); + } + + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { + final int k = 20; + final KllLongsSketch sk = getDirectLongsSketch(k, 0); + for (int i = 0; i < 10; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + + final KllLongsSketch sk2 = getDirectLongsSketch(k, 0); + for (int i = 0; i < 400; i++) { sk2.update(i + 1); } + println("\n" + sk2.toString(true, true)); + + sk2.merge(sk); + final String s2 = sk2.toString(true, true); + println(LS + s2); + } + + @Test + public void viewDirectCompactions() { + int k = 20; + int u = 108; + KllLongsSketch sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= u; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(sk.toString(true, true)); + sk.update(++i); + println(sk.toString(true, true)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + } + + @Test + public void viewCompactionAndSortedView() { + int k = 20; + KllLongsSketch sk = getDirectLongsSketch(k, 0); + show(sk, 20); + LongsSortedView sv = sk.getSortedView(); + LongsSortedViewIterator itr = sv.iterator(); + printf("%12s%12s\n", "Value", "CumWeight"); + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + printf("%12d%12d\n", v, wt); + } + } + + private static void show(final KllLongsSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkSketchInitializeLongHeap() { + int k = 20; //don't change this + KllLongsSketch sk; + + //println("#### CASE: LONG FULL HEAP"); + sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG HEAP EMPTY"); + sk = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG HEAP SINGLE"); + sk = getDirectLongsSketch(k, 0); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeLongHeapifyCompactMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: LONG FULL HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG EMPTY HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG SINGLE HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeLongHeapifyUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: LONG FULL HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2,true); + wmem = WritableMemory.writableWrap(compBytes); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + // println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2,true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringLongUpdatable() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL UPDATABLE"); + sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG EMPTY UPDATABLE"); + sk = getDirectLongsSketch(k, 0); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG SINGLE UPDATABL"); + sk = getDirectLongsSketch(k, 0); + sk.update(1); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllLongsSketch sk1 = getDirectLongsSketch(k, 0); + KllLongsSketch sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + assertEquals(sk1.getMaxItem(), 121L); + assertEquals(sk1.getMinItem(), 1L); + } + + @Test + public void checkSizes() { + KllLongsSketch sk = getDirectLongsSketch(20, 0); + for (int i = 1; i <= 21; i++) { sk.update(i); } + //println(sk.toString(true, true)); + byte[] byteArr1 = KllHelper.toByteArray(sk, true); + int size1 = sk.currentSerializedSizeBytes(true); + assertEquals(size1, byteArr1.length); + byte[] byteArr2 = sk.toByteArray(); + int size2 = sk.currentSerializedSizeBytes(false); + assertEquals(size2, byteArr2.length); + } + + @Test + public void checkNewInstance() { + int k = 200; + WritableMemory dstMem = WritableMemory.allocate(3000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(k, dstMem, memReqSvr); + for (int i = 1; i <= 10_000; i++) {sk.update(i); } + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 10000L); + //println(sk.toString(true, true)); + } + + @Test + public void checkDifferentM() { + int k = 20; + int m = 4; + WritableMemory dstMem = WritableMemory.allocate(1000); + KllLongsSketch sk = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, dstMem, memReqSvr); + for (int i = 1; i <= 200; i++) {sk.update(i); } + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 200L); + } + + private static KllLongsSketch getDirectLongsSketch(final int k, final int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch dfsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void printlnTest() { + String s = "PRINTING: printf in " + this.getClass().getName(); + println(s); + printf("%s\n", s); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index e58c27419..4ce988d22 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -100,8 +100,8 @@ public void checkHeapifyExceptions2() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifyExceptions3() { KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(); - sk.update(1.0f); - sk.update(2.0f); + sk.update(1.0); + sk.update(2.0); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 5 KllDoublesSketch.heapify(wmem); diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java index acf3343d9..5f51a7f1a 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java @@ -293,7 +293,7 @@ public void checkSketchInitializeItemsHeap() { final int digits = Util.numDigits(n); KllItemsSketch sk; - println("#### CASE: FLOAT FULL HEAP"); + println("#### CASE: ITEM FULL HEAP"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); for (int i = 1; i <= n; i++) { sk.update(Util.longToFixedLengthString(i, digits)); } println(sk.toString(true, true)); @@ -310,7 +310,7 @@ public void checkSketchInitializeItemsHeap() { assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT HEAP EMPTY"); + println("#### CASE: ITEM HEAP EMPTY"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); println(sk.toString(true, true)); assertEquals(sk.getK(), k); @@ -326,7 +326,7 @@ public void checkSketchInitializeItemsHeap() { assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT HEAP SINGLE"); + println("#### CASE: ITEM HEAP SINGLE"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); sk.update("1"); println(sk.toString(true, true)); @@ -354,7 +354,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { byte[] compBytes; Memory mem; - println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT"); + println("#### CASE: ITEM FULL HEAPIFIED FROM COMPACT"); sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); for (int i = 1; i <= n; i++) { sk2.update(Util.longToFixedLengthString(i, digits)); } println(sk2.toString(true, true)); @@ -375,7 +375,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT"); + println("#### CASE: ITEM EMPTY HEAPIFIED FROM COMPACT"); sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); //println(sk.toString(true, true)); compBytes = sk2.toByteArray(); @@ -395,7 +395,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT"); + println("#### CASE: ITEM SINGLE HEAPIFIED FROM COMPACT"); sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); sk2.update("1"); //println(sk2.toString(true, true)); @@ -417,7 +417,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { assertFalse(sk.isLevelZeroSorted()); } - //public void checkSketchInitializeFloatHeapifyUpdatableMem() Not Supported + //public void checkSketchInitializeItemHeapifyUpdatableMem() Not Supported @Test //set static enablePrinting = true for visual checking public void checkMemoryToStringItemsCompact() { @@ -431,7 +431,7 @@ public void checkMemoryToStringItemsCompact() { Memory mem; String s; - println("#### CASE: FLOAT FULL COMPACT"); + println("#### CASE: ITEM FULL COMPACT"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); for (int i = 1; i <= n; i++) { sk.update(Util.longToFixedLengthString(i, digits)); } compBytes = sk.toByteArray(); @@ -447,7 +447,7 @@ public void checkMemoryToStringItemsCompact() { println(s); assertEquals(compBytes, compBytes2); - println("#### CASE: FLOAT EMPTY COMPACT"); + println("#### CASE: ITEM EMPTY COMPACT"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); compBytes = sk.toByteArray(); mem = Memory.wrap(compBytes); @@ -462,7 +462,7 @@ public void checkMemoryToStringItemsCompact() { println(s); assertEquals(compBytes, compBytes2); - println("#### CASE: FLOAT SINGLE COMPACT"); + println("#### CASE: ITEM SINGLE COMPACT"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); sk.update("1"); compBytes = sk.toByteArray(); diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java new file mode 100644 index 000000000..018ad91db --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java @@ -0,0 +1,790 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.common.Util.bitAt; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +/** + * @author Lee Rhodes + */ +public class KllMiscLongsTest { + private final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkSortedViewConstruction() { + final KllLongsSketch kll = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 20; i++) { kll.update(i); } + LongsSortedView fsv = kll.getSortedView(); + long[] cumWeights = fsv.getCumulativeWeights(); + long[] values = fsv.getQuantiles(); + assertEquals(cumWeights.length, 20); + assertEquals(values.length, 20); + for (int i = 0; i < 20; i++) { + assertEquals(cumWeights[i], i + 1); + assertEquals(values[i], i + 1); + } + } + + @Test //set static enablePrinting = true for visual checking + public void checkBounds() { + final KllLongsSketch kll = KllLongsSketch.newHeapInstance(); //default k = 200 + for (int i = 0; i < 1000; i++) { + kll.update(i); + } + final double eps = kll.getNormalizedRankError(false); + final long est = kll.getQuantile(0.5); + final long ub = kll.getQuantileUpperBound(0.5); + final long lb = kll.getQuantileLowerBound(0.5); + assertEquals(ub, kll.getQuantile(.5 + eps)); + assertEquals(lb, kll.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + final double rest = kll.getRank(est); + final double restUB = kll.getRankUpperBound(rest); + final double restLB = kll.getRankLowerBound(rest); + assertTrue(restUB - rest < (2 * eps)); + assertTrue(rest - restLB < (2 * eps)); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions1() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(6, (byte) 3); //corrupt with odd M + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions2() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 2 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions3() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + sk.update(1); + sk.update(2); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 5 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions4() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(1, (byte) 0); //corrupt SerVer, should be 1 or 2 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions5() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(2, (byte) 0); //corrupt FamilyID, should be 15 + KllLongsSketch.heapify(wmem); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMisc() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(8); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} //empty + println(sk.toString(true, true)); + for (int i = 0; i < 20; i++) { sk.update(i); } + println(sk.toString(true, true)); + sk.toByteArray(); + final long[] items = sk.getLongItemsArray(); + assertEquals(items.length, 16); + final int[] levels = sk.getLevelsArray(sk.sketchStructure); + assertEquals(levels.length, 3); + assertEquals(sk.getNumLevels(), 2); + } + + @Test //set static enablePrinting = true for visual checking + public void visualCheckToString() { + final KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + int n = 21; + for (int i = 1; i <= n; i++) { sk.update(i); } + println(sk.toString(true, true)); + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getNumRetained(), 11); + + final KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + n = 400; + for (int i = 101; i <= n + 100; i++) { sk2.update(i); } + println(LS + sk2.toString(true, true)); + assertEquals(sk2.getNumLevels(), 5); + assertEquals(sk2.getMinItem(), 101); + assertEquals(sk2.getMaxItem(), 500); + assertEquals(sk2.getNumRetained(), 52); + + sk2.merge(sk); + println(LS + sk2.toString(true, true)); + assertEquals(sk2.getNumLevels(), 5); + assertEquals(sk2.getMinItem(), 1); + assertEquals(sk2.getMaxItem(), 500); + assertEquals(sk2.getNumRetained(), 56); + } + + @Test //set static enablePrinting = true for visual checking + public void viewHeapCompactions() { + int k = 20; + int n = 108; + boolean withLevels = false; + boolean withLevelsAndItems = true; + int compaction = 0; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + sk.update(++i); + println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void viewDirectCompactions() { + int k = 20; + int n = 108; + boolean withLevels = false; + boolean withLevelsAndItems = true; + int compaction = 0; + int sizeBytes = KllSketch.getMaxSerializedSizeBytes(k, n, LONGS_SKETCH, true); + WritableMemory wmem = WritableMemory.allocate(sizeBytes); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(k, wmem, memReqSvr); + for (int i = 1; i <= n; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + sk.update(++i); + println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void viewCompactionAndSortedView() { + int n = 43; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= n; i++) { sk.update(i); } + println(sk.toString(true, true)); + LongsSortedView sv = sk.getSortedView(); + LongsSortedViewIterator itr = sv.iterator(); + println("### SORTED VIEW"); + printf("%6s %12s %12s" + LS, "Idx", "Value", "Weight"); + int i = 0; + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + printf("%6d %12d %12d" + LS, i, v, wt); + i++; + } + assertEquals(sv.getMinItem(), 1L); + assertEquals(sv.getMaxItem(), n); + } + + @Test //set static enablePrinting = true for visual checking + public void checkWeightedUpdates1() { + int k = 20; + int weight = 127; + long item = 10; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + println(sk.toString(true, true)); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 7); + assertEquals(sk.getN(), weight); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 14); + assertEquals(sk.getN(), 254); + } + + @Test //set static enablePrinting = true for visual checking + public void checkWeightedUpdates2() { + int k = 20; + int initial = 1000; + int weight = 127; + long item = 10; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= initial; i++) { sk.update(i + 1000); } + println(sk.toString(true, true)); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 65); + assertEquals(sk.getN(), 1127); + + LongsSortedViewIterator itr = sk.getSortedView().iterator(); + println("### SORTED VIEW"); + printf("%12s %12s %12s" + LS, "Value", "Weight", "NaturalRank"); + long cumWt = 0; + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + long natRank = itr.getNaturalRank(INCLUSIVE); + cumWt += wt; + assertEquals(cumWt, natRank); + printf("%12d %12d %12d" + LS, v, wt, natRank); + } + assertEquals(cumWt, sk.getN()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateItemsArray() { //used with weighted updates + long item = 10; + int weight = 108; + long[] itemsArr = KllLongsHelper.createItemsArray(item, weight); + assertEquals(itemsArr.length, 4); + for (int i = 0; i < itemsArr.length; i++) { itemsArr[i] = item; } + outputItems(itemsArr); + } + + private static void outputItems(long[] itemsArr) { + String[] hdr2 = {"Index", "Value"}; + String hdr2fmt = "%6s %15s" + LS; + String d2fmt = "%6d %15d" + LS; + println("ItemsArr"); + printf(hdr2fmt, (Object[]) hdr2); + for (int i = 0; i < itemsArr.length; i++) { + printf(d2fmt, i, itemsArr[i]); + } + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateLevelsArray() { //used with weighted updates + int weight = 108; + int[] levelsArr = KllHelper.createLevelsArray(weight); + assertEquals(levelsArr.length, 8); + int[] correct = {0,0,0,1,2,2,3,4}; + for (int i = 0; i < levelsArr.length; i++) { + assertEquals(levelsArr[i], correct[i]); + } + outputLevels(weight, levelsArr); + } + + private static void outputLevels(int weight, int[] levelsArr) { + String[] hdr = {"Lvl", "StartAdr", "BitPattern", "Weight"}; + String hdrfmt = "%3s %9s %10s %s" + LS; + String dfmt = "%3d %9d %10d %d" + LS; + String dfmt_2 = "%3d %9d %s" + LS; + println("Count = " + weight + " => " + (Integer.toBinaryString(weight))); + println("LevelsArr"); + printf(hdrfmt, (Object[]) hdr); + for (int i = 0; i < levelsArr.length; i++) { + if (i == levelsArr.length - 1) { printf(dfmt_2, i, levelsArr[i], "ItemsArr.length"); } + else { + int j = bitAt(weight, i); + printf(dfmt, i, levelsArr[i], j, 1 << (i)); + } + } + println(""); + } + + @Test + public void viewMemorySketchData() { + int k = 20; + int n = 109; + boolean withLevels = true; + boolean withLevelsAndItems = true; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toByteArray(); + Memory mem = Memory.wrap(byteArr); + KllLongsSketch fltSk = KllLongsSketch.wrap(mem); + println(fltSk.toString(withLevels, withLevelsAndItems)); + assertEquals(fltSk.getN(), n); + } + + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAux() { + String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"}; + String hdrFmt = "%6s %6s %28s %10s %10s %34s" + LS; + String dataFmt = "%6d %6d %,28d %,10d %,10d %,34d" + LS; + int k = 1000; + int m = 8; + int numLevels = 20; + println("k=" + k + ", m=" + m + ", numLevels=" + numLevels); + printf(hdrFmt, (Object[]) hdr); + long maxN = 0; + long[] correct = {0,1,1,2,2,3,5,8,12,17,26,39,59,88,132,198,296,444,667,1000}; + for (int i = 0; i < numLevels; i++) { + int depth = numLevels - i - 1; + long cap = KllHelper.intCapAux(k, depth); + long end = Math.max(m, cap); + long wt = 1L << i; + maxN += wt * end; + printf(dataFmt, i, depth, wt, cap, end, maxN); + assertEquals(cap, correct[i]); + } + } + + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAuxAux() { + String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"}; + String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s" + LS; + String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d" + LS; + long k = (1L << 16) - 1L; + long m = 8; + println("k = " + k + ", m = " + m); + printf(hdrFmt, (Object[]) hdr); + long[] correct = + {65535,43690,29127,19418,12945,8630,5753,3836,2557,1705,1136,758,505,337,224,150,100,67,44,30,20,13,9,6,4,3,2,1,1,1,0}; + for (int i = 0; i < 31; i++) { + long twoK = k << 1; + long twoKxtwoD = twoK << i; + long threeToD = KllHelper.powersOfThree[i]; + long tmp = twoKxtwoD / threeToD; + long result = (tmp + 1L) >>> 1; + long end = Math.max(m, result); //performed later + printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end); + assertEquals(result,correct[i]); + assertEquals(result, KllHelper.intCapAuxAux(k, i)); + } + } + + @Test + public void checkGrowLevels() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure)[2], 33); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeap() { + int k = 20; //don't change this + KllLongsSketch sk; + + println("#### CASE: LONG FULL HEAP"); + sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG HEAP EMPTY"); + sk = KllLongsSketch.newHeapInstance(k); + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG HEAP SINGLE"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeapifyCompactMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeapifyUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMemoryToStringLongCompact() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + byte[] compBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: LONG EMPTY COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: LONG SINGLE COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMemoryToStringLongUpdatable() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); //note: heapify does not copy free space, while toUpdatableByteArray does + assertEquals(sk.getN(), sk2.getN()); + assertEquals(sk.getMinItem(), sk2.getMinItem()); + assertEquals(sk.getMaxItem(), sk2.getMaxItem()); + assertEquals(sk.getNumRetained(), sk2.getNumRetained()); + + println("#### CASE: LONG EMPTY UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(k); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG SINGLE UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int m = 8; + int n1 = 21; + int n2 = 43; + WritableMemory wmem = WritableMemory.allocate(3000); + WritableMemory wmem2 = WritableMemory.allocate(3000); + + KllLongsSketch sk1 = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, wmem, memReqSvr); + KllLongsSketch sk2 = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, wmem2, memReqSvr); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + sk1.merge(sk2); + assertEquals(sk1.getMinItem(), 1L); + assertEquals(sk1.getMaxItem(), 143L); + } + + @Test + public void checkGetSingleItem() { + int k = 20; + KllLongsSketch skHeap = KllLongsSketch.newHeapInstance(k); + skHeap.update(1); + assertTrue(skHeap instanceof KllHeapLongsSketch); + assertEquals(skHeap.getLongSingleItem(), 1L); + + WritableMemory srcMem = WritableMemory.writableWrap(KllHelper.toByteArray(skHeap, true)); + KllLongsSketch skDirect = KllLongsSketch.writableWrap(srcMem, memReqSvr); + assertTrue(skDirect instanceof KllDirectLongsSketch); + assertEquals(skDirect.getLongSingleItem(), 1L); + + Memory srcMem2 = Memory.wrap(skHeap.toByteArray()); + KllLongsSketch skCompact = KllLongsSketch.wrap(srcMem2); + assertTrue(skCompact instanceof KllDirectCompactLongsSketch); + assertEquals(skCompact.getLongSingleItem(), 1L); + } + + @Test + public void printlnTest() { + String s = "PRINTING: printf in " + this.getClass().getName(); + println(s); + printf("%s" + LS, s); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ... args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java index bbf89f338..636105ef8 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java @@ -23,10 +23,14 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; import java.util.HashSet; import org.testng.annotations.Test; -import org.apache.datasketches.memory.WritableHandle; + +import jdk.incubator.foreign.ResourceScope; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator; @@ -61,8 +65,8 @@ public void test() { DoublesSketch.setRandom(1); //make deterministic for test DoublesUnion dUnion; DoublesSketch dSketch; - try ( WritableHandle wdh = WritableMemory.allocateDirect(10_000_000) ) { - WritableMemory wmem = wdh.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(10_000_000)).scope()) { dUnion = DoublesUnion.builder().setMaxK(8).build(wmem); for (int s = 0; s < numSketches; s++) { dUnion.union(sketchArr[s]); } dSketch = dUnion.getResult(); //result is on heap diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java index 5c2882c5e..d896bbefa 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java @@ -30,9 +30,10 @@ import org.testng.annotations.Test; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import jdk.incubator.foreign.ResourceScope; + /** * The concept for these tests is that the "MemoryManager" classes below are proxies for the * implementation that owns the native memory allocations, thus is responsible for @@ -44,38 +45,35 @@ public class DirectQuantilesMemoryRequestTest { public void checkLimitedMemoryScenarios() { //Requesting application final int k = 128; final int u = 40 * k; - final int initBytes = ((2 * k) + 4) << 3; //just the BB + final int initBytes = ((2 * k) + 4) << 3; //just the BaseBuffer //########## Owning Implementation - // This part would actually be part of the Memory owning implemention so it is faked here - try (WritableHandle wdh = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - final WritableMemory wmem = wdh.getWritable(); - println("Initial mem size: " + wmem.getCapacity()); - - //########## Receiving Application - // The receiving application has been given wmem to use for a sketch, - // but alas, it is not ultimately large enough. - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); - assertTrue(usk1.isEmpty()); - - //Load the sketch - for (int i = 0; i < u; i++) { - // The sketch uses The MemoryRequest, acquired from wmem, to acquire more memory as - // needed, and requests via the MemoryRequest to free the old allocations. - usk1.update(i); - } - final double result = usk1.getQuantile(0.5); - println("Result: " + result); - assertEquals(result, u / 2.0, 0.05 * u); //Success - - //########## Owning Implementation - //The actual Memory has been re-allocated several times, - // so the above wmem reference is invalid. - println("\nFinal mem size: " + wmem.getCapacity()); - } catch (Exception e) { - throw new RuntimeException(e); + // This part would actually be part of the Memory owning implementation so it is faked here + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + println("Initial mem size: " + wmem.getCapacity()); + + //########## Receiving Application + // The receiving application has been given wmem to use for a sketch, + // but alas, it is not ultimately large enough. + final UpdateDoublesSketch usk = DoublesSketch.builder().setK(k).build(wmem); + assertTrue(usk.isEmpty()); + + //Load the sketch + for (int i = 0; i < u; i++) { + // The sketch uses The MemoryRequest, acquired from wmem, to acquire more memory as + // needed, and requests via the MemoryRequest to free the old allocations. + usk.update(i); } + final double result = usk.getQuantile(0.5); + println("Result: " + result); + assertEquals(result, u / 2.0, 0.05 * u); //Success + + //########## Owning Implementation + //The actual Memory has been re-allocated several times, + // so the above wmem reference is invalid. + println("\nFinal mem size: " + wmem.getCapacity()); + assertFalse(wmem2.isAlive()); } @Test @@ -84,22 +82,17 @@ public void checkGrowBaseBuf() { final int u = 32; // don't need the BB to fill here final int initBytes = (4 + (u / 2)) << 3; // not enough to hold everything - try (WritableHandle memHandler = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - //final MemoryManager memMgr = new MemoryManager(); - //final WritableMemory mem1 = memMgr.request(initBytes); - final WritableMemory mem1 = memHandler.getWritable(); - println("Initial mem size: " + mem1.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(mem1); - for (int i = 1; i <= u; i++) { - usk1.update(i); - } - final int currentSpace = usk1.getCombinedBufferItemCapacity(); - println("curCombBufItemCap: " + currentSpace); - assertEquals(currentSpace, 2 * k); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); + for (int i = 1; i <= u; i++) { + usk1.update(i); } + final int currentSpace = usk1.getCombinedBufferItemCapacity(); + println("curCombBufItemCap: " + currentSpace); + assertEquals(currentSpace, 2 * k); + assertFalse(wmem2.isAlive()); } @Test @@ -108,26 +101,20 @@ public void checkGrowCombBuf() { final int u = (2 * k) - 1; //just to fill the BB final int initBytes = ((2 * k) + 4) << 3; //just room for BB - try (WritableHandle memHandler = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - //final MemoryManager memMgr = new MemoryManager(); - //final WritableMemory mem1 = memMgr.request(initBytes); - final WritableMemory mem1 = memHandler.getWritable(); - println("Initial mem size: " + mem1.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(mem1); - for (int i = 1; i <= u; i++) { - usk1.update(i); - } - final int currentSpace = usk1.getCombinedBufferItemCapacity(); - println("curCombBufItemCap: " + currentSpace); - final double[] newCB = usk1.growCombinedBuffer(currentSpace, 3 * k); - final int newSpace = usk1.getCombinedBufferItemCapacity(); - println("newCombBurItemCap: " + newSpace); - assertEquals(newCB.length, 3 * k); - //memMgr.free(mem1); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); + for (int i = 1; i <= u; i++) { + usk1.update(i); } + final int currentSpace = usk1.getCombinedBufferItemCapacity(); + println("curCombBufItemCap: " + currentSpace); + final double[] newCB = usk1.growCombinedBuffer(currentSpace, 3 * k); + final int newSpace = usk1.getCombinedBufferItemCapacity(); + println("newCombBurItemCap: " + newSpace); + assertEquals(newCB.length, 3 * k); + assertFalse(wmem2.isAlive()); } @Test @@ -138,28 +125,25 @@ public void checkGrowFromWrappedEmptySketch() { final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(); final Memory origSketchMem = Memory.wrap(usk1.toByteArray()); - try (WritableHandle memHandle = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory mem = memHandle.getWritable(); - origSketchMem.copyTo(0, mem, 0, initBytes); - UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(mem); - assertTrue(mem.isSameResource(usk2.getMemory())); - assertEquals(mem.getCapacity(), initBytes); - assertTrue(mem.isDirect()); - assertTrue(usk2.isEmpty()); - - //update the sketch forcing it to grow on-heap - for (int i = 1; i <= 5; i++) { usk2.update(i); } - assertEquals(usk2.getN(), 5); - WritableMemory mem2 = usk2.getMemory(); - assertFalse(mem.isSameResource(mem2)); - assertFalse(mem2.isDirect()); //should now be on-heap - - final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); - assertEquals(mem2.getCapacity(), expectedSize); - } catch (final Exception e) { - throw new RuntimeException(e); - } + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + origSketchMem.copyTo(0, wmem, 0, initBytes); + UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(wmem); + assertTrue(wmem.isSameResource(usk2.getMemory())); + assertEquals(wmem.getCapacity(), initBytes); + assertTrue(wmem.isDirect()); + assertTrue(usk2.isEmpty()); + + //update the sketch forcing it to grow on-heap + for (int i = 1; i <= 5; i++) { usk2.update(i); } + assertEquals(usk2.getN(), 5); + WritableMemory mem2 = usk2.getMemory(); + assertFalse(wmem.isSameResource(mem2)); + assertFalse(mem2.isDirect()); //should now be on-heap + + final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); + assertEquals(mem2.getCapacity(), expectedSize); + assertFalse(wmem2.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index 5bc5b4eaa..8cdc7bf71 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -28,13 +28,14 @@ import java.nio.ByteOrder; import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.DoublesSortedView; import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; import org.testng.Assert; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + public class DoublesSketchTest { @Test @@ -140,46 +141,42 @@ public void checkEmptyExceptions() { @Test public void directSketchShouldMoveOntoHeapEventually() { - try (WritableHandle wdh = WritableMemory.allocateDirect(1000, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); - Assert.assertTrue(sketch.isSameResource(mem)); - for (int i = 0; i < 1000; i++) { - sketch.update(i); - } - Assert.assertFalse(sketch.isSameResource(mem)); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(1000, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); + for (int i = 0; i < 1000; i++) { + sketch.update(i); } + Assert.assertFalse(sketch.isSameResource(wmem)); + Assert.assertFalse(wmem2.isAlive()); } @Test public void directSketchShouldMoveOntoHeapEventually2() { int i = 0; - try (WritableHandle wdh = - WritableMemory.allocateDirect(50, ByteOrder.LITTLE_ENDIAN, new DefaultMemoryRequestServer())) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); - Assert.assertTrue(sketch.isSameResource(mem)); - for (; i < 1000; i++) { - if (sketch.isSameResource(mem)) { - sketch.update(i); - } else { - //println("MOVED OUT at i = " + i); - break; - } + WritableMemory wmem = WritableMemory.allocateDirect(50, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); + for (; i < 1000; i++) { + if (sketch.isSameResource(wmem)) { + sketch.update(i); + } else { + //println("MOVED OUT at i = " + i); + break; } - } catch (final Exception e) { - throw new RuntimeException(e); } + Assert.assertFalse(wmem2.isAlive()); } @Test public void checkEmptyDirect() { - try (WritableHandle wdh = WritableMemory.allocateDirect(1000)) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); + WritableMemory wmem ; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(1000, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); sketch.toByteArray(); //exercises a specific path } catch (final Exception e) { throw new RuntimeException(e); diff --git a/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java index be2f328b2..adf916ef3 100644 --- a/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java @@ -37,19 +37,24 @@ import static org.apache.datasketches.quantiles.PreambleUtil.insertSerVer; import static org.testng.Assert.assertEquals; -import org.testng.annotations.Test; +import java.nio.ByteOrder; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +import jdk.incubator.foreign.ResourceScope; public class PreambleUtilTest { @Test public void checkInsertsAndExtracts() { final int bytes = 32; - try (WritableHandle offHeapMemHandler = WritableMemory.allocateDirect(bytes)) { - final WritableMemory offHeapMem = offHeapMemHandler.getWritable(); + WritableMemory offHeapMem; + try (ResourceScope scope = (offHeapMem = WritableMemory.allocateDirect(bytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + final WritableMemory onHeapMem = WritableMemory.writableWrap(new byte[bytes]); onHeapMem.clear(); diff --git a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java index 8c80f4399..68347ffb8 100644 --- a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java @@ -249,7 +249,7 @@ private static void getAndCheck(String ver, int n, double quantile) { Assert.assertEquals(q2, quantile, 0.0); // same thing with compact sketch - qs2 = CompactDoublesSketch.heapify(srcMem); + qs2 = HeapCompactDoublesSketch.heapifyInstance(srcMem); //Test the quantile q2 = qs2.getQuantile(nf, EXCLUSIVE); println("New Median: " + q2); diff --git a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java index db043cff6..e1c6914c7 100644 --- a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java +++ b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java @@ -41,6 +41,8 @@ public void empty() { assertThrows(SketchesStateException.class, () -> td.getMaxValue()); assertThrows(SketchesStateException.class, () -> td.getRank(0)); assertThrows(SketchesStateException.class, () -> td.getQuantile(0.5)); + assertThrows(SketchesStateException.class, () -> td.getPMF(new double[]{0})); + assertThrows(SketchesStateException.class, () -> td.getCDF(new double[]{0})); } @Test @@ -82,6 +84,14 @@ public void manyValues() { assertEquals(td.getQuantile(0.9), n * 0.9, n * 0.9 * 0.01); assertEquals(td.getQuantile(0.95), n * 0.95, n * 0.95 * 0.01); assertEquals(td.getQuantile(1), n - 1); + final double[] pmf = td.getPMF(new double[] {n / 2}); + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, 0.0001); + assertEquals(pmf[1], 0.5, 0.0001); + final double[] cdf = td.getCDF(new double[] {n / 2}); + assertEquals(cdf.length, 2); + assertEquals(cdf[0], 0.5, 0.0001); + assertEquals(cdf[1], 1.0); } @Test diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java index 42b6069da..188dbf427 100644 --- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java @@ -26,13 +26,17 @@ import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + /** * @author Lee Rhodes */ @@ -78,8 +82,8 @@ public void checkHeapifyWrap(int k, int u, boolean ordered) { //Prepare Memory for direct int bytes = usk.getCompactBytes(); //for Compact - try (WritableHandle wdh = WritableMemory.allocateDirect(bytes)) { - WritableMemory directMem = wdh.getWritable(); + WritableMemory directMem; + try (ResourceScope scope = (directMem = WritableMemory.allocateDirect(bytes)).scope()) { /**Via CompactSketch.compact**/ refSk = usk.compact(ordered, directMem); diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java index e4c112281..6d6af7047 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java @@ -119,7 +119,6 @@ public void checkHeapifyByteArrayExact() { // That is, this is being run for its side-effect of accessing things. // If something is wonky, it will generate an exception and fail the test. local2.toString(true, true, 8, true); - } @Test diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java index 2261edc3b..84ddcb80e 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java @@ -114,7 +114,7 @@ public void checkIllegalSketchID_UpdateSketch() { WritableMemory mem = WritableMemory.writableWrap(byteArray); mem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to heapify the corruped mem + //try to heapify the corrupted mem Sketch.heapify(mem, sl.seed); } diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java index 5191c7c68..f36597b7c 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java @@ -38,19 +38,22 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.nio.ByteOrder; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + /** * @author Lee Rhodes */ @@ -59,10 +62,10 @@ public class DirectQuickSelectSketchTest { @Test//(expectedExceptions = SketchesArgumentException.class) public void checkBadSerVer() { int k = 512; - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(mem); + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wmem); DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks assertTrue(usk.isEmpty()); @@ -73,33 +76,33 @@ public void checkBadSerVer() { assertEquals(usk.getEstimate(), k, 0.0); assertEquals(sk1.getRetainedEntries(false), k); - mem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + wmem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - Sketch.wrap(mem); + Sketch.wrap(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } } } - @Test//(expectedExceptions = SketchesArgumentException.class) + @Test public void checkConstructorKtooSmall() { int k = 8; - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); - UpdateSketch.builder().setNominalEntries(k).build(mem); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { + UpdateSketch.builder().setNominalEntries(k).build(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } } } - @Test//(expectedExceptions = SketchesArgumentException.class) + @Test public void checkConstructorMemTooSmall() { int k = 16; - try (WritableHandle h = makeNativeMemory(k/2)) { - WritableMemory mem = h.getWritable(); - UpdateSketch.builder().setNominalEntries(k).build(mem); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k/2)).scope()) { + UpdateSketch.builder().setNominalEntries(k).build(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } @@ -124,10 +127,10 @@ public void checkHeapifyMemoryEstimating() { int k = 512; int u = 2*k; //thus estimating - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { - UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(k).build(mem); + UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(k).build(wmem); for (int i=0; i + diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 873a878a0..184a05ad3 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -230,7 +230,7 @@ under the License. - +