diff --git a/.github/workflows/check_cpp_files.yml b/.github/workflows/auto-check_cpp_files.yml similarity index 76% rename from .github/workflows/check_cpp_files.yml rename to .github/workflows/auto-check_cpp_files.yml index 59ae5824a..858e760fb 100644 --- a/.github/workflows/check_cpp_files.yml +++ b/.github/workflows/auto-check_cpp_files.yml @@ -1,9 +1,9 @@ name: Serialization Compatibility Test on: + pull_request: push: - branches: - - master + branches: [ 7.0.X ] workflow_dispatch: jobs: @@ -12,21 +12,34 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 + - name: Checkout C++ - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: apache/datasketches-cpp path: cpp + + - name: Setup Java + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + - name: Configure C++ build run: cd cpp/build && cmake .. -DGENERATE=true + - name: Build C++ unit tests run: cd cpp && cmake --build build --config Release + - name: Run C++ tests run: cd cpp && cmake --build build --config Release --target test + - name: Make dir run: mkdir -p serialization_test_data/cpp_generated_files + - name: Copy files run: cp cpp/build/*/test/*_cpp.sk serialization_test_data/cpp_generated_files + - name: Run Java tests run: mvn test -P check-cpp-files diff --git a/.github/workflows/auto-jdk-matrix.yml b/.github/workflows/auto-jdk-matrix.yml index 83da580ca..49be43018 100644 --- a/.github/workflows/auto-jdk-matrix.yml +++ b/.github/workflows/auto-jdk-matrix.yml @@ -1,65 +1,72 @@ name: DataSketches-Java Auto JDK Matrix Test & Install on: - pull_request: - push: - branches: [ master ] - workflow_dispatch: + pull_request: + push: + branches: [ 7.0.X ] + workflow_dispatch: env: - MAVEN_OPTS: -Xmx4g -Xms1g + MAVEN_OPTS: -Xmx4g -Xms1g jobs: - build: - name: Build, Test, Install - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - jdk: [ 8,11 ] - env: - JDK_VERSION: ${{ matrix.jdk }} + build: + name: Build, Test, Install + runs-on: ubuntu-latest - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 - with: - persist-credentials: false + strategy: + fail-fast: false + matrix: + jdk: [ 17 ] - - name: Cache local Maven repository - uses: actions/cache@v3 - with: - path: ~/.m2/repository - key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: build-${{ runner.os }}-maven- + env: + JDK_VERSION: ${{ matrix.jdk }} - - name: Install Matrix JDK - uses: actions/setup-java@v3 - with: - java-version: ${{ matrix.jdk }} - distribution: 'temurin' - java-package: jdk - architecture: x64 -# Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v3 has a "with cache" option + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Print Current workflow + run: > + cat .github/workflows/auto-jdk-matrix.yml + + - name: Cache local Maven repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: build-${{ runner.os }}-maven- - - name: Echo Java Version - run: > - java -version + - name: Install Matrix JDK + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + java-package: jdk + architecture: x64 - - name: Test - run: > - mvn clean test - -Dmaven.javadoc.skip=true - -Dgpg.skip=true + - name: Echo Java Version + run: > + java -version - - name: Install - run: > - mvn clean install -B - -DskipTests=true - -Dgpg.skip=true + - name: Test + run: > + mvn clean test -B + -Dmaven.javadoc.skip=true + -Dgpg.skip=true + - name: Install + run: > + mvn clean install -B + -DskipTests=true + -Dgpg.skip=true + +# Architecture options: x86, x64, armv7, aarch64, ppc64le +# setup-java@v4 has a "with cache" option # Lifecycle: validate, compile, test, package, verify, install, deploy -# -B batch mode +# -B batch mode, never stops for user input # -V show Version without stopping +# -X debug mode # -q quiet, only show errors diff --git a/.github/workflows/auto-os-matrix.yml b/.github/workflows/auto-os-matrix.yml new file mode 100644 index 000000000..b7d703a08 --- /dev/null +++ b/.github/workflows/auto-os-matrix.yml @@ -0,0 +1,81 @@ +name: DataSketches-Java Auto OS Matrix Test & Install + +on: + pull_request: + push: + branches: [ 7.0.X ] + workflow_dispatch: + +env: + MAVEN_OPTS: -Xmx1g -Xms1g + +jobs: + build: + name: Build, Test, Install + + strategy: + fail-fast: false + + matrix: + jdk: [ 17 ] + os: [ windows-latest, ubuntu-latest, macos-latest ] + include: + - os: windows-latest + skip_javadoc: "`-Dmaven`.javadoc`.skip=true" + skip_gpg: "`-Dgpg`.skip=true" + - os: ubuntu-latest + skip_javadoc: -Dmaven.javadoc.skip=true + skip_gpg: -Dgpg.skip=true + - os: macos-latest + skip_javadoc: -Dmaven.javadoc.skip=true + skip_gpg: -Dgpg.skip=true + + runs-on: ${{matrix.os}} + + env: + JDK_VERSION: ${{ matrix.jdk }} + + steps: + - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" + uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Cache local Maven repository + uses: actions/cache@v4 + with: + path: ~/.m2/repository + key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: build-${{ runner.os }}-maven- + + - name: Install Matrix JDK + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + java-package: jdk + architecture: x64 + + - name: Echo Java Version + run: > + java -version + + - name: Test + run: > + mvn clean test + ${{matrix.os.skip_javadoc}} + ${{matrix.os.skip_gpg}} + + - name: Install + run: > + mvn clean install -B + ${{matrix.os.skip_javadoc}} + -D skipTests=true + ${{matrix.os.skip_gpg}} + +# Architecture options: x86, x64, armv7, aarch64, ppc64le +# setup-java@v4 has a "with cache" option +# Lifecycle: validate, compile, test, package, verify, install, deploy +# -B batch mode +# -V show Version without stopping +# -q quiet, only show errors diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index 7cb4c8976..000000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: "CodeQL" - -on: - push: - branches: [ 'master' ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ 'master' ] - schedule: - - cron: '10 17 * * 4' - workflow_dispatch: - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'java' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v3 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - - # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - queries: +security-and-quality - - - # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v3 - - # ℹī¸ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 - with: - category: "/language:${{matrix.language}}" diff --git a/.github/workflows/javadoc.yml b/.github/workflows/javadoc.yml deleted file mode 100644 index 6033d3273..000000000 --- a/.github/workflows/javadoc.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: JavaDoc - -on: - push: - branches: - - master - workflow_dispatch: - -jobs: - javadoc: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Generate JavaDoc - run: mvn javadoc:javadoc - - name: Deploy JavaDoc - uses: JamesIves/github-pages-deploy-action@5dc1d5a192aeb5ab5b7d5a77b7d36aea4a7f5c92 - with: - token: ${{ secrets.GITHUB_TOKEN }} - folder: target/site/apidocs - target-folder: docs/${{ github.ref_name }} - branch: gh-pages diff --git a/.github/workflows/manual-codeql-analysis.yml b/.github/workflows/manual-codeql-analysis.yml new file mode 100644 index 000000000..a68d4cfff --- /dev/null +++ b/.github/workflows/manual-codeql-analysis.yml @@ -0,0 +1,57 @@ +name: "CodeQL" + +on: + workflow_dispatch: + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'java' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v4 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + queries: +security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v4 + + # Command-line programs to run using the OS shell. + # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v4 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/manual-coverage.yml b/.github/workflows/manual-coverage.yml deleted file mode 100644 index e7b6ebc81..000000000 --- a/.github/workflows/manual-coverage.yml +++ /dev/null @@ -1,71 +0,0 @@ -name: Datasketches-Java Manual Coverage Report - -on: - workflow_dispatch: - -env: - MAVEN_OPTS: -Xmx4g -Xms1g - -jobs: - build: - name: Build, Test, Coverage - runs-on: ${{matrix.os}} - strategy: - fail-fast: false - matrix: - jdk: [ 8 ] - os: [ ubuntu-latest ] - include: -# - os: windows-latest -# skip_javadoc: "`-Dmaven`.javadoc`.skip=true" -# skip_gpg: "`-Dgpg`.skip=true" - - os: ubuntu-latest - skip_javadoc: -Dmaven.javadoc.skip=true - skip_gpg: -Dgpg.skip=true -# - os: macos-latest -# skip_javadoc: -Dmaven.javadoc.skip=true -# skip_gpg: -Dgpg.skip=true - - env: - JDK_VERSION: ${{ matrix.jdk }} - - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 - with: - persist-credentials: false - - - name: Cache local Maven repository - uses: actions/cache@v3 - with: - path: ~/.m2/repository - key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: build-${{ runner.os }}-maven- - - - name: Install Matrix JDK - uses: actions/setup-java@v3 - with: - java-version: ${{ matrix.jdk }} - distribution: 'temurin' - java-package: jdk - architecture: x64 -# Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v3 has a "with cache" option - - - name: Echo Java Version - run: > - java -version - - - name: Test, Package, Verify, Coverage Report - if: ${{ matrix.jdk == 8 && success() }} - run: - mvn verify coveralls:report -B - -DrepoToken=${{secrets.coveralls_token}} - ${{matrix.os.skip_javadoc}} - ${{matrix.os.skip_gpg}} - -# Lifecycle: validate, compile, test, package, verify, install, deploy -# Coverage reports are available after the verify phase -# -B batch mode -# -V show Version without stopping -# -q quiet, only show errors diff --git a/.github/workflows/manual-javadoc.yml b/.github/workflows/manual-javadoc.yml new file mode 100644 index 000000000..dfcfb1152 --- /dev/null +++ b/.github/workflows/manual-javadoc.yml @@ -0,0 +1,36 @@ +name: JavaDoc + +on: + workflow_dispatch: + +jobs: + javadoc: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Java + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + + - name: Echo Java Version + run: java -version + + - name: Print Current workflow + run: > + cat .github/workflows/manual-javadoc.yml + + - name: Generate JavaDoc + run: mvn clean javadoc:javadoc + + - name: Deploy JavaDoc + uses: JamesIves/github-pages-deploy-action@v4.6.8 + with: + token: ${{ secrets.GITHUB_TOKEN }} + folder: target/reports/apidocs + target-folder: docs/${{ github.ref_name }} + branch: gh-pages diff --git a/.github/workflows/manual-os-matrix.yml b/.github/workflows/manual-os-matrix.yml deleted file mode 100644 index 2d5537841..000000000 --- a/.github/workflows/manual-os-matrix.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: DataSketches-Java Manual OS Matrix Test & Install - -on: - workflow_dispatch: - -env: - MAVEN_OPTS: -Xmx4g -Xms1g - -jobs: - build: - name: Build, Test, Install - runs-on: ${{matrix.os}} - strategy: - fail-fast: false - matrix: - jdk: [ 8, 11 ] - os: [ windows-latest, ubuntu-latest, macos-latest ] - include: - - os: windows-latest - skip_javadoc: "`-Dmaven`.javadoc`.skip=true" - skip_gpg: "`-Dgpg`.skip=true" - - os: ubuntu-latest - skip_javadoc: -Dmaven.javadoc.skip=true - skip_gpg: -Dgpg.skip=true - - os: macos-latest - skip_javadoc: -Dmaven.javadoc.skip=true - skip_gpg: -Dgpg.skip=true - - env: - JDK_VERSION: ${{ matrix.jdk }} - - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 - with: - persist-credentials: false - - - name: Cache local Maven repository - uses: actions/cache@v3 - with: - path: ~/.m2/repository - key: build-${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: build-${{ runner.os }}-maven- - - - name: Install Matrix JDK - uses: actions/setup-java@v3 - with: - java-version: ${{ matrix.jdk }} - distribution: 'temurin' - java-package: jdk - architecture: x64 -# Architecture options: x86, x64, armv7, aarch64, ppc64le -# setup-java@v3 has a "with cache" option - - - name: Echo Java Version - run: > - java -version - - - name: Test - run: > - mvn clean test - ${{matrix.os.skip_javadoc}} - ${{matrix.os.skip_gpg}} - - - name: Install - run: > - mvn clean install -B - ${{matrix.os.skip_javadoc}} - -D skipTests=true - ${{matrix.os.skip_gpg}} - -# Lifecycle: validate, compile, test, package, verify, install, deploy -# -B batch mode -# -V show Version without stopping -# -q quiet, only show errors diff --git a/.gitignore b/.gitignore index 96b25576b..f03c5078f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ *.ipr *.iws +# VSCode project files +**/.vscode/ + # Additional tools .clover/ diff --git a/README.md b/README.md index 3190036d1..b6db7f89e 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,6 @@ --> [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-java/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.apache.datasketches/datasketches-java) -[![Language grade: Java](https://img.shields.io/lgtm/grade/java/g/apache/datasketches-java.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/datasketches-java/context:java) -[![Total alerts](https://img.shields.io/lgtm/alerts/g/apache/datasketches-java.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/apache/datasketches-java/alerts/) [![Coverage Status](https://coveralls.io/repos/github/apache/datasketches-java/badge.svg)](https://coveralls.io/github/apache/datasketches-java) ================= @@ -27,10 +25,10 @@ # Apache® DataSketches™ Core Java Library Component This is the core Java component of the DataSketches library. It contains all of the sketching algorithms and can be accessed directly from user applications. -This component is also a dependency of other components of the library that create adaptors for target systems, such as the [Apache Pig adaptor](https://github.com/apache/datasketches-pig) and the [Apache Hive adaptor](https://github.com/apache/datasketches-hive). +This component is also a dependency of other components of the library that create adaptors for target systems, such as the [Apache Pig adaptor](https://github.com/apache/datasketches-pig), the [Apache Hive adaptor](https://github.com/apache/datasketches-hive), and others. Note that we have a parallel core component for C++ and Python implementations of the same sketch algorithms, -[datasketches-cpp](https://github.com/apache/datasketches-cpp). +[datasketches-cpp](https://github.com/apache/datasketches-cpp) and [datasketches-python](https://github.com/apache/datasketches-python) Please visit the main [DataSketches website](https://datasketches.apache.org) for more information. @@ -41,30 +39,29 @@ If you are interested in making contributions to this site please see our [Commu ## Maven Build Instructions __NOTE:__ This component accesses resource files for testing. As a result, the directory elements of the full absolute path of the target installation directory must qualify as Java identifiers. In other words, the directory elements must not have any space characters (or non-Java identifier characters) in any of the path elements. This is required by the Oracle Java Specification in order to ensure location-independent access to resources: [See Oracle Location-Independent Access to Resources](https://docs.oracle.com/javase/8/docs/technotes/guides/lang/resources.html) -### A JDK8 with Hotspot or JDK11 with Hotspot is required to compile -This component depends on the [datasketches-memory](https://github.com/apache/datasketches-memory) component, -and, as a result, must be compiled with one of the above JDKs. -If your application only relies on the APIs of this component no special JVM arguments are required. -However, if your application also directly relies on the APIs of the *datasketches-memory* component, -you may need additional JVM arguments. -Please refer to the [datasketches-memory README](https://github.com/apache/datasketches-memory/blob/master/README.md) for details. +### JDK17 is required to compile +This component depends on the [datasketches-memory-4.1.X](https://github.com/apache/datasketches-memory/tree/4.1.X) component, +and, as a result, must be compiled with JDK17 and this dependency: -If your application uses Maven, you can also use the *pom.xml* of this component as an example of how to automatically -configure the JVM arguments for compilation and testing based on the version of the JDK. +``` + + org.apache.datasketches + datasketches-memory + 4.1.0 + +``` + +If your application only relies on the APIs of datasketches-java no special JVM arguments are required. +However, if your application also directly relies on the APIs of the *datasketches-memory* component, +you may need the additional JVM argument **--add-modules=jdk.incubator.foreign**. ### Recommended Build Tool This DataSketches component is structured as a Maven project and Maven is the recommended Build Tool. -There are two types of tests: normal unit tests and tests run by the strict profile. - To run normal unit tests: $ mvn clean test -To run the strict profile tests (only supported in Java 8): - - $ mvn clean test -P strict - To install jars built from the downloaded source: $ mvn clean install -DskipTests=true @@ -82,74 +79,11 @@ This will create the following jars: #### Run-time There is one run-time dependency: -* org.apache.datasketches : datasketches-memory +* [datasketches-memory-4.1.X](https://github.com/apache/datasketches-memory/tree/4.1.X) #### Testing See the pom.xml file for test dependencies. -## Special Build / Test Instructions for Eclipse - -Building and running tests using JDK 8 should not be a problem. - -However, with JDK 9+, and Eclipse versions up to and including 4.22.0 (2021-12), Eclipse fails to translate the required JPMS JVM arguments specified in the POM compiler or surefire plugins into the *.classpath* file, causing illegal reflection access errors -[eclipse-m2e/m2e-core Bug 543631](https://github.com/eclipse-m2e/m2e-core/issues/129). - -There are two ways to fix this: - -#### Method 1: Manually update *.classpath* file: -Open the *.classpath* file in a text editor and find the following *classpathentry* element (this assumes JDK11, change to suit): - -``` - - - - - - -``` -Then edit it as follows: - -``` - - - - - - - - -``` -Finally, *refresh*. - -#### Method 2: Manually update *Module Dependencies* - -In Eclipse, open the project *Properties / Java Build Path / Module Dependencies ...* - -* Select *java.base* -* Select *Configured details* -* Select *Expose Package...* - * Enter *Package* = java.nio - * Enter *Target module* = ALL-UNNAMED - * Select button: *opens* - * Hit *OK* -* Select *Expose Package...* - * Enter *Package* = jdk.internal.misc - * Enter *Target module* = ALL-UNNAMED - * Select button: *exports* - * Hit *OK* -* Select *Expose Package...* - * Enter *Package* = jdk.internal.ref - * Enter *Target module* = ALL-UNNAMED - * Select button: *exports* - * Hit *OK* -* Select *Expose Package...* - * Enter *Package* = sun.nio.ch - * Enter *Target module* = ALL-UNNAMED - * Select button: *opens* - * Hit *OK* - -**NOTE:** If you execute *Maven/Update Project...* from Eclipse with the option *Update project configuration from pom.xml* checked, all of the above will be erased, and you will have to redo it. - ## Known Issues #### SpotBugs diff --git a/pom.xml b/pom.xml index 35107f24c..35022e3b1 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ under the License. org.apache.datasketches datasketches-java - 6.1.0-SNAPSHOT + 7.0.0 jar ${project.artifactId} @@ -83,64 +83,63 @@ under the License. - - 2.2.0 - + 4.1.0 - 7.5.1 + 7.10.2 generate_java_files check_cpp_files check_cpp_historical_files - 3.6.3 - 1.8 + 3.6.3 + 17 + --add-modules=jdk.incubator.foreign ${java.version} ${java.version} - -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 + -Xmx4g -Duser.language=en -Duser.country=US -Dfile.encoding=UTF-8 ${add-modules} UTF-8 ${charset.encoding} ${charset.encoding} ${charset.encoding} yyyy-MM-dd'T'HH-mm-ss'Z' - - 2.16.2 - - - 3.7.1 - 3.13.0 - 3.1.1 - 3.4.1 - 3.2.3 - 3.4.0 - 3.6.3 - 3.0.1 - 3.2.0 - 3.3.1 - 3.2.5 - - 0.16.1 - - 0.8.12 - + + 3.7.1 + 3.13.0 + 3.1.3 + 3.5.0 + 3.2.7 + 3.4.2 + 3.11.2 + 3.1.1 + 3.2.0 + 3.3.1 + 3.5.2 + 3.2.0 + + 4.9.10 + + 0.16.1 + 4.3.0 + + 0.8.12 + + 2.18.0 - 1.0.0 - 4.9.10 + 1.0.0 - org.apache.datasketches datasketches-memory ${datasketches-memory.version} - + org.testng @@ -148,13 +147,6 @@ under the License. ${testng.version} test - @@ -170,6 +162,11 @@ under the License. org.apache.maven.plugins maven-compiler-plugin ${maven-compiler-plugin.version} + + + ${add-modules} + + @@ -193,10 +190,10 @@ under the License. - [1.8.0,9.0),[11.0,12.0) + ${java.version} - [${maven.version},) + [${maven.version},4.0.0) @@ -212,7 +209,6 @@ under the License. - org.apache.maven.plugins maven-jar-plugin ${maven-jar-plugin.version} @@ -235,12 +231,15 @@ under the License. - org.apache.maven.plugins maven-javadoc-plugin ${maven-javadoc-plugin.version} true + public + + ${add-modules} + @@ -253,14 +252,12 @@ under the License. - org.apache.maven.plugins maven-release-plugin ${maven-release-plugin.version} - org.apache.maven.plugins maven-source-plugin ${maven-source-plugin.version} @@ -283,11 +280,11 @@ under the License. - org.apache.maven.plugins maven-surefire-plugin ${maven-surefire-failsafe-plugins.version} + ${add-modules} false false true @@ -297,7 +294,26 @@ under the License. - + org.apache.maven.plugins + maven-toolchains-plugin + ${maven-toolchains-plugin.version} + + + + toolchain + + + + + + + ${java.version} + + + + + + org.apache.rat apache-rat-plugin ${apache-rat-plugin.version} @@ -401,6 +417,10 @@ under the License. org.apache.maven.plugins maven-surefire-plugin + + org.apache.maven.plugins + maven-toolchains-plugin + org.apache.rat apache-rat-plugin @@ -423,44 +443,8 @@ under the License. - - - - only-eclipse - - - m2e.version - - - - - - - org.apache.maven.plugins - maven-remote-resources-plugin - ${maven-remote-resources-plugin.version} - - - process-resource-bundles - none - - - - - - - - org.apache.maven.plugins - maven-remote-resources-plugin - - - - + + This is performed from a script outside Maven apache-release @@ -624,44 +609,7 @@ under the License. - - - - java8only - - [1.8,1.9),8 - - - - - java11plus - - [11,14) - - - 8 - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - @{argLine} - --add-exports java.base/jdk.internal.misc=ALL-UNNAMED - --add-exports java.base/jdk.internal.ref=ALL-UNNAMED - --add-opens java.base/java.nio=ALL-UNNAMED - --add-opens java.base/sun.nio.ch=ALL-UNNAMED - - ${testng.generate-java-files},${testng.check-cpp-files} - - - - - - + --> generate-java-files diff --git a/src/main/java/org/apache/datasketches/common/MemoryStatus.java b/src/main/java/org/apache/datasketches/common/MemoryStatus.java new file mode 100644 index 000000000..564797e32 --- /dev/null +++ b/src/main/java/org/apache/datasketches/common/MemoryStatus.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.common; + +import org.apache.datasketches.memory.Memory; + +/** + * Methods for inquiring the status of a backing Memory object. + */ +public interface MemoryStatus { + + /** + * Returns true if this object's internal data is backed by a Memory object, + * which may be on-heap or off-heap. + * @return true if this object's internal data is backed by a Memory object. + */ + default boolean hasMemory() { return false; } + + /** + * Returns true if this object's internal data is backed by direct (off-heap) Memory. + * @return true if this object's internal data is backed by direct (off-heap) Memory. + */ + default boolean isDirect() { return false; } + + /** + * Returns true if the backing resource of this is identical with the backing resource + * of that. The capacities must be the same. If this is a region, + * the region offset must also be the same. + * + * @param that A different non-null and alive Memory object. + * @return true if the backing resource of this is identical with the backing resource + * of that. + * @throws SketchesArgumentException if that is not alive (already closed). + */ + default boolean isSameResource(final Memory that) { return false; } + +} diff --git a/src/main/java/org/apache/datasketches/common/Util.java b/src/main/java/org/apache/datasketches/common/Util.java index 729b92f3f..19a8ee614 100644 --- a/src/main/java/org/apache/datasketches/common/Util.java +++ b/src/main/java/org/apache/datasketches/common/Util.java @@ -245,7 +245,8 @@ public static String zeroPad(final String s, final int fieldLength) { /** * Prepend or postpend the given string with the given character to fill the given field length. - * If the given string is equal to or greater than the given field length, it will be returned without modification. + * If the given string is equal to or greater than the given field length, it will be returned + * without modification. * @param s the given string * @param fieldLength the desired field length * @param padChar the desired pad character diff --git a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java index a374b5745..4ed89dd19 100644 --- a/src/main/java/org/apache/datasketches/cpc/CpcSketch.java +++ b/src/main/java/org/apache/datasketches/cpc/CpcSketch.java @@ -300,7 +300,7 @@ public byte[] toByteArray() { final long cap = state.getRequiredSerializedBytes(); final WritableMemory wmem = WritableMemory.allocate((int) cap); state.exportToMemory(wmem); - return (byte[]) wmem.getArray(); + return wmem.getArray(); } /** diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index eafe88340..171fc2cfb 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -33,8 +33,8 @@ import org.apache.datasketches.memory.XxHash; /** - *

A Bloom filter is a data structure that can be used for probabilistic - * set membership.

+ * A Bloom filter is a data structure that can be used for probabilistic + * set membership. * *

When querying a Bloom filter, there are no false positives. Specifically: * When querying an item that has already been inserted to the filter, the filter will diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java index f865a3350..ee17a9918 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java @@ -25,8 +25,8 @@ import org.apache.datasketches.memory.WritableMemory; /** - *

This class provides methods to help estimate the correct parameters when - * creating a Bloom filter, and methods to create the filter using those values.

+ * This class provides methods to help estimate the correct parameters when + * creating a Bloom filter, and methods to create the filter using those values. * *

The underlying math is described in the * diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java index 8a752e168..5cc6f28fe 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/package-info.java @@ -16,5 +16,7 @@ * specific language governing permissions and limitations * under the License. */ - +/** + * BloomFilter package + */ package org.apache.datasketches.filters.bloomfilter; diff --git a/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java b/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java index 7c5bc770d..b2648b943 100644 --- a/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java +++ b/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java @@ -55,10 +55,10 @@ import org.apache.datasketches.memory.WritableMemory; /** - *

This sketch is useful for tracking approximate frequencies of items of type <T> + * This sketch is useful for tracking approximate frequencies of items of type <T> * with optional associated counts (<T> item, long count) that are members of a * multiset of such items. The true frequency of an item is defined to be the sum of associated - * counts.

+ * counts. * *

This implementation provides the following capabilities:

*
* *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + * * @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -116,7 +116,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -140,7 +140,7 @@ default double[] getCDF(final T[] splitPoints, final QuantileSearchCriteria sear * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java b/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java index 203e338d2..1edfc2054 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/IncludeMinMax.java @@ -63,6 +63,24 @@ public FloatsPair(final float[] quantiles, final long[] cumWeights) { } } + /** A simple structure to hold a pair of arrays */ + public static class LongsPair { + /** the array of quantiles */ + public long[] quantiles; + /** the array of associated cumulative weights */ + public long[] cumWeights; + + /** + * Constructor. + * @param quantiles the array of quantiles + * @param cumWeights the array of associated cumulative weights + */ + public LongsPair(final long[] quantiles, final long[] cumWeights) { + this.quantiles = quantiles; + this.cumWeights = cumWeights; + } + } + /** * A simple structure to hold a pair of arrays * @param the item class type @@ -131,6 +149,53 @@ public static DoublesPair includeDoublesMinMax( return new DoublesPair(adjQuantiles, adjCumWeights); } + /** + * The logic to include the min and max of type double. + * @param quantilesIn The array of quantiles + * @param cumWeightsIn The array of associated cumulative weights + * @param maxItem the maximum item of the stream + * @param minItem the minimum item of the stream + * @return a DoublesPair + */ + public static LongsPair includeLongsMinMax( + final long[] quantilesIn, + final long[] cumWeightsIn, + final long maxItem, + final long minItem) { + final int lenIn = cumWeightsIn.length; + final boolean adjLow = quantilesIn[0] != minItem; //if true, adjust the low end + final boolean adjHigh = quantilesIn[lenIn - 1] != maxItem; //if true, adjust the high end + int adjLen = lenIn; //this will be the length of the local copies of quantiles and cumWeights + adjLen += adjLow ? 1 : 0; + adjLen += adjHigh ? 1 : 0; + final long[] adjQuantiles; + final long[] adjCumWeights; + if (adjLen > lenIn) { //is any adjustment required at all? + adjQuantiles = new long[adjLen]; + adjCumWeights = new long[adjLen]; + final int offset = adjLow ? 1 : 0; + System.arraycopy(quantilesIn, 0, adjQuantiles, offset, lenIn); + System.arraycopy(cumWeightsIn,0, adjCumWeights, offset, lenIn); + + //Adjust the low end if required. Don't need to adjust weight of next one because it is cumulative. + if (adjLow) { + adjQuantiles[0] = minItem; + adjCumWeights[0] = 1; + } + + if (adjHigh) { + adjQuantiles[adjLen - 1] = maxItem; + adjCumWeights[adjLen - 1] = cumWeightsIn[lenIn - 1]; + adjCumWeights[adjLen - 2] = cumWeightsIn[lenIn - 1] - 1; + } + } else { //both min and max are already in place, no adjustments are required. + adjQuantiles = quantilesIn; + adjCumWeights = cumWeightsIn; + + } //END of Adjust End Points + return new LongsPair(adjQuantiles, adjCumWeights); + } + /** * The logic to include the min and max of type float. * @param quantilesIn The array of quantiles diff --git a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java index 51b013573..21af2b531 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/InequalitySearch.java @@ -40,7 +40,7 @@ *

Given a sorted array of values arr[] and a search key value v, the algorithms for * the searching criteria are given with each enum criterion.

* - * @see + * @see * Sketching Quantiles and Ranks Tutorial * @author Lee Rhodes */ diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java new file mode 100644 index 000000000..efb4006f6 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSketchSortedView.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG; +import static org.apache.datasketches.quantilescommon.QuantilesUtil.getNaturalRank; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.quantilescommon.IncludeMinMax.LongsPair; + +/** + * The SortedView of the KllLongsSketch. + * @author Lee Rhodes + * @author Zac Blanco + */ +public final class LongsSketchSortedView implements LongsSortedView { + private final long[] quantiles; + private final long[] cumWeights; //cumulative natural weights + private final long totalN; + + /** + * Construct from elements, also used in testing. + * @param quantiles sorted array of quantiles + * @param cumWeights sorted, monotonically increasing cumulative weights. + * @param sk the underlying quantile sketch. + */ + public LongsSketchSortedView( + final long[] quantiles, + final long[] cumWeights, + final QuantilesLongsAPI sk) { + final LongsPair dPair = + IncludeMinMax.includeLongsMinMax(quantiles, cumWeights, sk.getMaxItem(), sk.getMinItem()); + this.quantiles = dPair.quantiles; + this.cumWeights = dPair.cumWeights; + this.totalN = sk.getN(); + } + + //Used for testing + LongsSketchSortedView( + final long[] quantiles, + final long[] cumWeights, + final long totalN, + final long maxItem, + final long minItem) { + final LongsPair dPair = + IncludeMinMax.includeLongsMinMax(quantiles, cumWeights, maxItem, minItem); + this.quantiles = dPair.quantiles; + this.cumWeights = dPair.cumWeights; + this.totalN = totalN; + } + + @Override + public long[] getCumulativeWeights() { + return cumWeights.clone(); + } + + @Override + public long getMaxItem() { + final int top = quantiles.length - 1; + return quantiles[top]; + } + + @Override + public long getMinItem() { + return quantiles[0]; + } + + @Override + public long getN() { + return totalN; + } + + @Override + public int getNumRetained() { + return quantiles.length; + } + + @Override + public long getQuantile(final double rank, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + QuantilesUtil.checkNormalizedRankBounds(rank); + final int len = cumWeights.length; + final double naturalRank = getNaturalRank(rank, totalN, searchCrit); + final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT; + final int index = InequalitySearch.find(cumWeights, 0, len - 1, naturalRank, crit); + if (index == -1) { + return quantiles[len - 1]; //EXCLUSIVE (GT) case: normRank == 1.0; + } + return quantiles[index]; + } + + @Override + public long[] getQuantiles() { + return quantiles.clone(); + } + + @Override + public double getRank(final long quantile, final QuantileSearchCriteria searchCrit) { + if (isEmpty()) { throw new SketchesArgumentException(EMPTY_MSG); } + final int len = quantiles.length; + final InequalitySearch crit = (searchCrit == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT; + final int index = InequalitySearch.find(quantiles, 0, len - 1, quantile, crit); + if (index == -1) { + return 0; //EXCLUSIVE (LT) case: quantile <= minQuantile; INCLUSIVE (LE) case: quantile < minQuantile + } + return (double)cumWeights[index] / totalN; + } + + @Override + public boolean isEmpty() { + return totalN == 0; + } + + @Override + public LongsSortedViewIterator iterator() { + return new LongsSortedViewIterator(quantiles, cumWeights); + } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java new file mode 100644 index 000000000..e7e3521c7 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedView.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * The Sorted View for quantile sketches of primitive type long. + * @see SortedView + * @author Lee Rhodes + * @author Zac Blanco + */ +public interface LongsSortedView extends SortedView { + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF) of the input stream + * as a monotonically increasing array of double ranks (or cumulative probabilities) on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 overlapping intervals. + *
+ *

The start of each interval is below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and the end of the interval + * is the rank or cumulative probability corresponding to the split point.

+ * + *

The (m+1)th interval represents 100% of the distribution represented by the sketch + * and consistent with the definition of a cumulative probability distribution, thus the (m+1)th + * rank or probability in the returned array is always 1.0.

+ * + *

If a split point exactly equals a retained item of the sketch and the search criterion is:

+ * + *
    + *
  • INCLUSIVE, the resulting cumulative probability will include that item.
  • + *
  • EXCLUSIVE, the resulting cumulative probability will not include the weight of that split point.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ *
+ * @param searchCrit the desired search criteria. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + QuantilesUtil.checkLongsSplitPointsOrder(splitPoints); + final int len = splitPoints.length + 1; + final double[] buckets = new double[len]; + for (int i = 0; i < len - 1; i++) { + buckets[i] = getRank(splitPoints[i], searchCrit); + } + buckets[len - 1] = 1; + return buckets; + } + + /** + * Returns the maximum item of the stream. This may be distinct from the largest item retained by the + * sketch algorithm. + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMaxItem(); + + /** + * Returns the minimum item of the stream. This may be distinct from the smallest item retained by the + * sketch algorithm. + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMinItem(); + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * as an array of probability masses as doubles on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 consecutive, non-overlapping intervals. + *
+ *

Each interval except for the end intervals starts with a split point and ends with the next split + * point in sequence.

+ * + *

The first interval starts below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and ends with the first split point

+ * + *

The last (m+1)th interval starts with the last split point and ends after the last + * item retained by the sketch corresponding to a rank or probability of 1.0.

+ * + *

The sum of the probability masses of all (m+1) intervals is 1.0.

+ * + *

If the search criterion is:

+ * + *
    + *
  • INCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will include that item. If the lower split point equals an item retained by the sketch, the interval will exclude + * that item.
  • + *
  • EXCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will exclude that item. If the lower split point equals an item retained by the sketch, the interval will include + * that item.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ *
+ * @param searchCrit the desired search criteria. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit) { + final double[] buckets = getCDF(splitPoints, searchCrit); + final int len = buckets.length; + for (int i = len; i-- > 1; ) { + buckets[i] -= buckets[i - 1]; + } + return buckets; + } + + /** + * Gets the approximate quantile of the given normalized rank and the given search criterion. + * + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ + * the quantile directly corresponding to the given rank. + * If EXCLUSIVE, he given rank includes all quantiles < + * the quantile directly corresponding to the given rank. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + * @see QuantileSearchCriteria + */ + long getQuantile(double rank, QuantileSearchCriteria searchCrit); + + /** + * Returns an array of all retained quantiles by the sketch. + * @return an array of all retained quantiles by the sketch. + */ + long[] getQuantiles(); + + /** + * Gets the normalized rank corresponding to the given a quantile. + * + * @param quantile the given quantile + * @param searchCrit if INCLUSIVE the given quantile is included into the rank. + * @return the normalized rank corresponding to the given quantile. + * @throws IllegalArgumentException if sketch is empty. + * @see QuantileSearchCriteria + */ + double getRank(long quantile, QuantileSearchCriteria searchCrit); + + @Override + LongsSortedViewIterator iterator(); + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java new file mode 100644 index 000000000..77510cd5a --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/LongsSortedViewIterator.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * Iterator over quantile sketches of primitive type long. + */ +public final class LongsSortedViewIterator extends SortedViewIterator { + private final long[] quantiles; + + /** + * Constructor. + * @param quantiles the given array of quantiles, which must be ordered. + * @param cumWeights the given array of cumulative weights, which must be ordered, start with the value one, and + * the last value must be equal to N, the total number of items updated to the sketch. + */ + public LongsSortedViewIterator(final long[] quantiles, final long[] cumWeights) { + super(cumWeights); + this.quantiles = quantiles; //SpotBugs EI_EXPOSE_REP2 suppressed by FindBugsExcludeFilter + } + + /** + * Gets the quantile at the current index. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * + * @return the quantile at the current index. + */ + public long getQuantile() { + return quantiles[index]; + } + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java index 8ea3c3415..e7b9e6ef6 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantileSearchCriteria.java @@ -22,7 +22,7 @@ /** * These search criteria are used by the KLL, REQ and Classic Quantiles sketches in the DataSketches library. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java index b70843bb4..a082fc27a 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesAPI.java @@ -20,12 +20,12 @@ package org.apache.datasketches.quantilescommon; /** - *

This is a stochastic streaming sketch that enables near-real time analysis of the + * This is a stochastic streaming sketch that enables near-real time analysis of the * approximate distribution of items from a very large stream in a single pass, requiring only * that the items are comparable. * The analysis is obtained using the getQuantile() function or the * inverse functions getRank(), getPMF() (the Probability Mass Function), and getCDF() - * (the Cumulative Distribution Function).

+ * (the Cumulative Distribution Function). * *

Given an input stream of N items, the natural rank of any specific * item is defined as its index (1 to N) in the hypothetical sorted stream of all @@ -194,7 +194,7 @@ *

[*] Note that obtaining epsilon may require using a similar function but with more parameters * based on the specific sketch implementation.

* - * @see + * @see * Sketching Quantiles and Ranks, Tutorial * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria * @@ -205,11 +205,22 @@ @SuppressWarnings("javadoc") public interface QuantilesAPI { + /** The sketch must not be empty for this operation. */ static String EMPTY_MSG = "The sketch must not be empty for this operation. "; + + /** Unsupported operation for this Sketch Type. */ static String UNSUPPORTED_MSG = "Unsupported operation for this Sketch Type. "; + + /** Sketch does not have just one item. */ static String NOT_SINGLE_ITEM_MSG = "Sketch does not have just one item. "; + + /** MemoryRequestServer must not be null. */ static String MEM_REQ_SVR_NULL_MSG = "MemoryRequestServer must not be null. "; + + /** Target sketch is Read Only, cannot write. */ static String TGT_IS_READ_ONLY_MSG = "Target sketch is Read Only, cannot write. "; + + /** A sketch cannot merge with itself. */ static String SELF_MERGE_MSG = "A sketch cannot merge with itself. "; /** diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java index e8e5310f5..8c4f6620f 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesDoublesAPI.java @@ -50,7 +50,7 @@ default double[] getCDF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -67,7 +67,7 @@ default double[] getCDF(double[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -113,7 +113,7 @@ default double[] getPMF(double[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -137,7 +137,7 @@ default double[] getPMF(double[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java index 986780444..8b8a91bdd 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesFloatsAPI.java @@ -49,7 +49,7 @@ default double[] getCDF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -66,7 +66,7 @@ default double[] getCDF(float[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -112,7 +112,7 @@ default double[] getPMF(float[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -136,7 +136,7 @@ default double[] getPMF(float[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java index 459e58cdd..bc0881282 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesGenericAPI.java @@ -53,7 +53,7 @@ default double[] getCDF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 overlapping intervals. - * + *
*

The start of each interval is below the lowest item retained by the sketch * corresponding to a zero rank or zero probability, and the end of the interval * is the rank or cumulative probability corresponding to the split point.

@@ -70,7 +70,7 @@ default double[] getCDF(T[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. @@ -132,7 +132,7 @@ default double[] getPMF(T[] splitPoints) { * @param splitPoints an array of m unique, monotonically increasing items * (of the same type as the input items) * that divide the item input domain into m+1 consecutive, non-overlapping intervals. - * + *
*

Each interval except for the end intervals starts with a split point and ends with the next split * point in sequence.

* @@ -156,7 +156,7 @@ default double[] getPMF(T[] splitPoints) { * * *

It is not recommended to include either the minimum or maximum items of the input stream.

- * + *
* @param searchCrit the desired search criteria. * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. * @throws IllegalArgumentException if sketch is empty. diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java new file mode 100644 index 000000000..fb1ca5817 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsAPI.java @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; + +/** + * The Quantiles API for item type long. + * @see QuantilesAPI + * @author Lee Rhodes + * @author Zac Blanco + */ +public interface QuantilesLongsAPI extends QuantilesAPI { + + /** + * This is equivalent to {@link #getCDF(long[], QuantileSearchCriteria) getCDF(splitPoints, INCLUSIVE)} + * @param splitPoints an array of m unique, monotonically increasing items. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getCDF(long[] splitPoints) { + return getCDF(splitPoints, INCLUSIVE); + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF) of the input stream + * as a monotonically increasing array of double ranks (or cumulative probabilities) on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(false) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 overlapping intervals. + *
+ *

The start of each interval is below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and the end of the interval + * is the rank or cumulative probability corresponding to the split point.

+ * + *

The (m+1)th interval represents 100% of the distribution represented by the sketch + * and consistent with the definition of a cumulative probability distribution, thus the (m+1)th + * rank or probability in the returned array is always 1.0.

+ * + *

If a split point exactly equals a retained item of the sketch and the search criterion is:

+ * + *
    + *
  • INCLUSIVE, the resulting cumulative probability will include that item.
  • + *
  • EXCLUSIVE, the resulting cumulative probability will not include the weight of that split point.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ *
+ * @param searchCrit the desired search criteria. + * @return a discrete CDF array of m+1 double ranks (or cumulative probabilities) on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + double[] getCDF(long[] splitPoints, QuantileSearchCriteria searchCrit); + + /** + * Returns the maximum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(1.0). + * + * @return the maximum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMaxItem(); + + /** + * Returns the minimum item of the stream. This is provided for convenience and may be different from the + * item returned by getQuantile(0.0). + * + * @return the minimum item of the stream + * @throws IllegalArgumentException if sketch is empty. + */ + long getMinItem(); + + /** + * This is equivalent to {@link #getPMF(long[], QuantileSearchCriteria) getPMF(splitPoints, INCLUSIVE)} + * @param splitPoints an array of m unique, monotonically increasing items. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getPMF(long[] splitPoints) { + return getPMF(splitPoints, INCLUSIVE); + } + + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * as an array of probability masses as doubles on the interval [0.0, 1.0], + * given a set of splitPoints. + * + *

The resulting approximations have a probabilistic guarantee that can be obtained from the + * getNormalizedRankError(true) function.

+ * + * @param splitPoints an array of m unique, monotonically increasing items + * (of the same type as the input items) + * that divide the item input domain into m+1 consecutive, non-overlapping intervals. + *
+ *

Each interval except for the end intervals starts with a split point and ends with the next split + * point in sequence.

+ * + *

The first interval starts below the lowest item retained by the sketch + * corresponding to a zero rank or zero probability, and ends with the first split point

+ * + *

The last (m+1)th interval starts with the last split point and ends after the last + * item retained by the sketch corresponding to a rank or probability of 1.0.

+ * + *

The sum of the probability masses of all (m+1) intervals is 1.0.

+ * + *

If the search criterion is:

+ * + *
    + *
  • INCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will include that item. If the lower split point equals an item retained by the sketch, the interval will exclude + * that item.
  • + *
  • EXCLUSIVE, and the upper split point of an interval equals an item retained by the sketch, the interval + * will exclude that item. If the lower split point equals an item retained by the sketch, the interval will include + * that item.
  • + *
+ * + *

It is not recommended to include either the minimum or maximum items of the input stream.

+ *
+ * @param searchCrit the desired search criteria. + * @return a PMF array of m+1 probability masses as doubles on the interval [0.0, 1.0]. + * @throws IllegalArgumentException if sketch is empty. + */ + double[] getPMF(long[] splitPoints, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getQuantile(double, QuantileSearchCriteria) getQuantile(rank, INCLUSIVE)} + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + */ + default long getQuantile(double rank) { + return getQuantile(rank, INCLUSIVE); + } + + /** + * Gets the approximate quantile of the given normalized rank and the given search criterion. + * + * @param rank the given normalized rank, a double in the range [0.0, 1.0]. + * @param searchCrit If INCLUSIVE, the given rank includes all quantiles ≤ + * the quantile directly corresponding to the given rank. + * If EXCLUSIVE, he given rank includes all quantiles < + * the quantile directly corresponding to the given rank. + * @return the approximate quantile given the normalized rank. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + long getQuantile(double rank, QuantileSearchCriteria searchCrit); + + /** + * Gets the lower bound of the quantile confidence interval in which the quantile of the + * given rank exists. + * + *

Although it is possible to estimate the probability that the true quantile + * exists within the quantile confidence interval specified by the upper and lower quantile bounds, + * it is not possible to guarantee the width of the quantile confidence interval + * as an additive or multiplicative percent of the true quantile.

+ * + * @param rank the given normalized rank + * @return the lower bound of the quantile confidence interval in which the quantile of the + * given rank exists. + * @throws IllegalArgumentException if sketch is empty. + */ + long getQuantileLowerBound(double rank); + + /** + * Gets the upper bound of the quantile confidence interval in which the true quantile of the + * given rank exists. + * + *

Although it is possible to estimate the probability that the true quantile + * exists within the quantile confidence interval specified by the upper and lower quantile bounds, + * it is not possible to guarantee the width of the quantile interval + * as an additive or multiplicative percent of the true quantile.

+ * + * @param rank the given normalized rank + * @return the upper bound of the quantile confidence interval in which the true quantile of the + * given rank exists. + * @throws IllegalArgumentException if sketch is empty. + */ + long getQuantileUpperBound(double rank); + + /** + * This is equivalent to {@link #getQuantiles(double[], QuantileSearchCriteria) getQuantiles(ranks, INCLUSIVE)} + * @param ranks the given array of normalized ranks, each of which must be + * in the interval [0.0,1.0]. + * @return an array of quantiles corresponding to the given array of normalized ranks. + * @throws IllegalArgumentException if sketch is empty. + */ + default long[] getQuantiles(double[] ranks) { + return getQuantiles(ranks, INCLUSIVE); + } + + /** + * Gets an array of quantiles from the given array of normalized ranks. + * + * @param ranks the given array of normalized ranks, each of which must be + * in the interval [0.0,1.0]. + * @param searchCrit if INCLUSIVE, the given ranks include all quantiles ≤ + * the quantile directly corresponding to each rank. + * @return an array of quantiles corresponding to the given array of normalized ranks. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + long[] getQuantiles(double[] ranks, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getRank(long, QuantileSearchCriteria) getRank(quantile, INCLUSIVE)} + * @param quantile the given quantile + * @return the normalized rank corresponding to the given quantile + * @throws IllegalArgumentException if sketch is empty. + */ + default double getRank(long quantile) { + return getRank(quantile, INCLUSIVE); + } + + /** + * Gets the normalized rank corresponding to the given a quantile. + * + * @param quantile the given quantile + * @param searchCrit if INCLUSIVE the given quantile is included into the rank. + * @return the normalized rank corresponding to the given quantile + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + double getRank(long quantile, QuantileSearchCriteria searchCrit); + + /** + * This is equivalent to {@link #getRanks(long[], QuantileSearchCriteria) getRanks(quantiles, INCLUSIVE)} + * @param quantiles the given array of quantiles + * @return an array of normalized ranks corresponding to the given array of quantiles. + * @throws IllegalArgumentException if sketch is empty. + */ + default double[] getRanks(long[] quantiles) { + return getRanks(quantiles, INCLUSIVE); + } + + /** + * Gets an array of normalized ranks corresponding to the given array of quantiles and the given + * search criterion. + * + * @param quantiles the given array of quantiles + * @param searchCrit if INCLUSIVE, the given quantiles include the rank directly corresponding to each quantile. + * @return an array of normalized ranks corresponding to the given array of quantiles. + * @throws IllegalArgumentException if sketch is empty. + * @see org.apache.datasketches.quantilescommon.QuantileSearchCriteria + */ + double[] getRanks(long[] quantiles, QuantileSearchCriteria searchCrit); + + /** + * Returns the current number of bytes this Sketch would require if serialized. + * @return the number of bytes this sketch would require if serialized. + */ + int getSerializedSizeBytes(); + + /** + * Gets the sorted view of this sketch + * @return the sorted view of this sketch + */ + LongsSortedView getSortedView(); + + /** + * Gets the iterator for this sketch, which is not sorted. + * @return the iterator for this sketch + */ + QuantilesLongsSketchIterator iterator(); + + /** + * Returns a byte array representation of this sketch. + * @return a byte array representation of this sketch. + */ + byte[] toByteArray(); + + /** + * Updates this sketch with the given item. + * @param item from a stream of items. NaNs are ignored. + */ + void update(long item); + +} diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java new file mode 100644 index 000000000..7ed0d9805 --- /dev/null +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesLongsSketchIterator.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.quantilescommon; + +/** + * The quantiles sketch iterator for primitive type long. + * @see QuantilesSketchIterator + * @author Zac Blanco + */ +public interface QuantilesLongsSketchIterator extends QuantilesSketchIterator { + + /** + * Gets the long quantile at the current index. + * + *

Don't call this before calling next() for the first time + * or after getting false from next().

+ * + * @return the long quantile at the current index. + */ + long getQuantile(); + +} + diff --git a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java index 75798c20f..529fd386d 100644 --- a/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java +++ b/src/main/java/org/apache/datasketches/quantilescommon/QuantilesUtil.java @@ -67,6 +67,21 @@ public static final void checkDoublesSplitPointsOrder(final double[] values) { } } + /** + * Checks the sequential validity of the given array of double values. + * They must be unique and monotonically increasing. + * @param values the given array of double values + */ + public static void checkLongsSplitPointsOrder(final long[] values) { + Objects.requireNonNull(values); + final int len = values.length; + for (int j = 0; j < len - 1; j++) { + if (values[j] < values[j + 1]) { continue; } + throw new SketchesArgumentException( + "Values must be unique and monotonically increasing."); + } + } + /** * Checks the sequential validity of the given array of float values. * They must be unique, monotonically increasing and not NaN. diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java index e587cd633..feeba7739 100644 --- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java +++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java @@ -28,7 +28,7 @@ * This abstract class provides a single place to define and document the public API * for the Relative Error Quantiles Sketch. * - * @see + * @see * Sketching Quantiles and Ranks Tutorial * * @author Lee Rhodes @@ -89,11 +89,23 @@ public static double getRSE(final int k, final double rank, final boolean hra, f @Override public abstract float getQuantileLowerBound(double rank); + /** + * Gets an approximate lower bound of the quantile associated with the given rank. + * @param rank the given normalized rank, a number between 0 and 1.0. + * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. + * @return an approximate lower bound quantile, if it exists. + */ public abstract float getQuantileLowerBound(double rank, int numStdDev); @Override public abstract float getQuantileUpperBound(double rank); + /** + * Gets an approximate upper bound of the quantile associated with the given rank. + * @param rank the given normalized rank, a number between 0 and 1.0. + * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. + * @return an approximate upper bound quantile, if it exists. + */ public abstract float getQuantileUpperBound(double rank, int numStdDev); @Override @@ -101,7 +113,7 @@ public static double getRSE(final int k, final double rank, final boolean hra, f /** * Gets an approximate lower bound rank of the given normalized rank. - * @param rank the given rank, a number between 0 and 1.0. + * @param rank the given normalized rank, a number between 0 and 1.0. * @param numStdDev the number of standard deviations. Must be 1, 2, or 3. * @return an approximate lower bound rank. */ @@ -160,6 +172,7 @@ public boolean isReadOnly() { /** * {@inheritDoc} + * *

The parameters k, highRankAccuracy, and reqDebug will not change.

*/ @Override diff --git a/src/main/java/org/apache/datasketches/req/ReqSerDe.java b/src/main/java/org/apache/datasketches/req/ReqSerDe.java index 52b1371a9..952749deb 100644 --- a/src/main/java/org/apache/datasketches/req/ReqSerDe.java +++ b/src/main/java/org/apache/datasketches/req/ReqSerDe.java @@ -110,7 +110,8 @@ * 0 || (empty)| 0 | K | Flags |FamID=17| SerVer | PreInts = 2 | * *
-   * 

Flags:

+ * + * Flags: * Bit 0 : Endianness, reserved * Bit 1 : ReadOnly, reserved * Bit 2 : Empty diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java index c3ef33957..b58317a9a 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirLongsUnion.java @@ -132,10 +132,9 @@ public static ReservoirLongsUnion heapify(final Memory srcMem) { /** * Union the given sketch. - *

- * This method can be repeatedly called. If the given sketch is null it is interpreted as an empty - * sketch. - *

+ * + *

This method can be repeatedly called. If the given sketch is null it is interpreted as an empty + * sketch.

* * @param sketchIn The incoming sketch. */ diff --git a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java index e6f361955..e12d31aa9 100644 --- a/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java +++ b/src/main/java/org/apache/datasketches/sampling/ReservoirSize.java @@ -25,6 +25,7 @@ /** * This class provides a compact representation of reservoir size by encoding it into a * fixed-point 16-bit value. + * *

The value itself is a fractional power of 2, with 5 bits of exponent and 11 bits of * mantissa. The exponent allows a choice of anywhere from 0-30, and there are 2048 possible * reservoir size values within each octave. Because reservoir size must be an integer, this diff --git a/src/main/java/org/apache/datasketches/sampling/package-info.java b/src/main/java/org/apache/datasketches/sampling/package-info.java index edfaa20a8..bbe446914 100644 --- a/src/main/java/org/apache/datasketches/sampling/package-info.java +++ b/src/main/java/org/apache/datasketches/sampling/package-info.java @@ -18,8 +18,8 @@ */ /** - *

This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of - * weighted and unweighted items from a stream.

+ * This package is dedicated to streaming algorithms that enable fixed size, uniform sampling of + * weighted and unweighted items from a stream. * *

These sketches are mergeable and can be serialized and deserialized to/from a compact * form.

diff --git a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java index 1e3408511..951bd7244 100644 --- a/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java +++ b/src/main/java/org/apache/datasketches/tdigest/TDigestDouble.java @@ -32,6 +32,7 @@ import org.apache.datasketches.memory.WritableBuffer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.QuantilesAPI; +import org.apache.datasketches.quantilescommon.QuantilesUtil; /** * t-Digest for estimating quantiles and ranks. @@ -125,7 +126,7 @@ public void merge(final TDigestDouble other) { /** * Process buffered values and merge centroids if needed */ - public void compress() { + private void compress() { if (numBuffered_ == 0) { return; } final int num = numBuffered_ + numCentroids_; final double[] values = new double[num]; @@ -277,6 +278,51 @@ public double getQuantile(final double rank) { return weightedAverage(centroidWeights_[numCentroids_ - 1], w1, maxValue_, w2); } + /** + * Returns an approximation to the Probability Mass Function (PMF) of the input stream + * given a set of split points. + * + * @param splitPoints an array of m unique, monotonically increasing values + * that divide the input domain into m+1 consecutive disjoint intervals (bins). + * + * @return an array of m+1 doubles each of which is an approximation + * to the fraction of the input stream values (the mass) that fall into one of those intervals. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getPMF(final double[] splitPoints) { + final double[] buckets = getCDF(splitPoints); + for (int i = buckets.length; i-- > 1; ) { + buckets[i] -= buckets[i - 1]; + } + return buckets; + } + + /** + * Returns an approximation to the Cumulative Distribution Function (CDF), which is the + * cumulative analog of the PMF, of the input stream given a set of split points. + * + * @param splitPoints an array of m unique, monotonically increasing values + * that divide the input domain into m+1 consecutive disjoint intervals. + * + * @return an array of m+1 doubles, which are a consecutive approximation to the CDF + * of the input stream given the splitPoints. The value at array position j of the returned + * CDF array is the sum of the returned values in positions 0 through j of the returned PMF + * array. This can be viewed as array of ranks of the given split points plus one more value + * that is always 1. + * @throws SketchesStateException if sketch is empty. + */ + public double[] getCDF(final double[] splitPoints) { + if (isEmpty()) { throw new SketchesStateException(QuantilesAPI.EMPTY_MSG); } + QuantilesUtil.checkDoublesSplitPointsOrder(splitPoints); + final int len = splitPoints.length + 1; + final double[] ranks = new double[len]; + for (int i = 0; i < len - 1; i++) { + ranks[i] = getRank(splitPoints[i]); + } + ranks[len - 1] = 1.0; + return ranks; + } + /** * Computes size needed to serialize the current state. * @return size in bytes needed to serialize this tdigest diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java index d2161c995..e7b2c99eb 100644 --- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java +++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java @@ -26,7 +26,6 @@ import java.util.Arrays; import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.ThetaUtil; @@ -148,11 +147,6 @@ int getRetainedEntries() { return curCount_; } - @Override - public boolean isSameResource(final Memory that) { - return false; - } - //restricted private static long[] getHashArrA(final Sketch skA) { //returns a new array diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java index 741db2f72..e7b3ddaac 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentHeapThetaBuffer.java @@ -94,7 +94,7 @@ private static int computeLogBufferSize(final int lgNomLongs, final long exactSi * @param hash to be propagated */ private boolean propagateToSharedSketch(final long hash) { - //noinspection StatementWithEmptyBody + //no inspection StatementWithEmptyBody while (localPropagationInProgress.get()) { } //busy wait until previous propagation completed localPropagationInProgress.set(true); @@ -108,7 +108,7 @@ private boolean propagateToSharedSketch(final long hash) { * Propagates the content of the buffer as a sketch to the shared sketch */ private void propagateToSharedSketch() { - //noinspection StatementWithEmptyBody + //no inspection StatementWithEmptyBody while (localPropagationInProgress.get()) { } //busy wait until previous propagation completed diff --git a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java index 49a8140c3..cdc843f8b 100644 --- a/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java +++ b/src/main/java/org/apache/datasketches/theta/ConcurrentSharedThetaSketch.java @@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.memory.WritableMemory; /** @@ -30,7 +31,7 @@ * * @author eshcar */ -interface ConcurrentSharedThetaSketch { +interface ConcurrentSharedThetaSketch extends MemoryStatus { long NOT_SINGLE_HASH = -1L; double MIN_ERROR = 0.0000001; @@ -63,8 +64,8 @@ static long computeExactLimit(long k, double error) { void endPropagation(AtomicBoolean localPropagationInProgress, boolean isEager); /** - * Returns the value of the volatile theta manged by the shared sketch - * @return the value of the volatile theta manged by the shared sketch + * Returns the value of the volatile theta managed by the shared sketch + * @return the value of the volatile theta managed by the shared sketch */ long getVolatileTheta(); @@ -124,10 +125,10 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s // //For the external user all of the below methods can be obtained by casting the shared //sketch to UpdateSketch. However, these methods here also act as an alias so that an - //attempt to access these methods from the local buffer will be divered to the shared + //attempt to access these methods from the local buffer will be diverted to the shared //sketch. - //From Sketch + //From Sketch and MemoryStatus int getCompactBytes(); @@ -139,10 +140,6 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s double getUpperBound(int numStdDev); - boolean hasMemory(); - - boolean isDirect(); - boolean isEmpty(); boolean isEstimationMode(); diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java index de0e6e43b..0f69ec3c2 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java @@ -109,12 +109,12 @@ public long getThetaLong() { @Override public boolean hasMemory() { - return true; + return mem_ != null; } @Override public boolean isDirect() { - return mem_.isDirect(); + return hasMemory() ? mem_.isDirect() : false; } @Override @@ -132,7 +132,7 @@ public boolean isOrdered() { @Override public boolean isSameResource(final Memory that) { - return mem_.isSameResource(that); + return hasMemory() ? mem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java index a1ac53c6d..ad9051a08 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java @@ -323,6 +323,10 @@ UpdateReturnState hashUpdate(final long hash) { memReqSvr_ = (memReqSvr_ == null) ? wmem_.getMemoryRequestServer() : memReqSvr_; + if (memReqSvr_ == null) { //in case the MRS is not enabled or null. + throw new SketchesArgumentException("Out of Memory, MemoryRequestServer is null, cannot expand."); + } + final WritableMemory newDstMem = memReqSvr_.request(wmem_,reqBytes); moveAndResize(wmem_, preambleLongs, lgArrLongs, newDstMem, tgtLgArrLongs, thetaLong); diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java index a3ffebc14..fb2aed2a5 100644 --- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java +++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java @@ -144,12 +144,12 @@ public long getThetaLong() { @Override public boolean hasMemory() { - return true; + return wmem_ != null; } @Override public boolean isDirect() { - return wmem_.isDirect(); + return hasMemory() ? wmem_.isDirect() : false; } @Override @@ -159,7 +159,7 @@ public boolean isEmpty() { @Override public boolean isSameResource(final Memory that) { - return wmem_.isSameResource(that); + return hasMemory() ? wmem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java index ae481a425..8f6e4972a 100644 --- a/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/EmptyCompactSketch.java @@ -91,16 +91,6 @@ public long getThetaLong() { return Long.MAX_VALUE; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return true; diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java index 479aa3eeb..f394e9303 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java @@ -102,16 +102,6 @@ public long getThetaLong() { return thetaLong_; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return empty_; diff --git a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java index 1cc6d75cd..49734a9e8 100644 --- a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java @@ -66,16 +66,6 @@ public int getCurrentBytes() { return (preLongs + dataLongs) << 3; } - @Override - public boolean isDirect() { - return false; - } - - @Override - public boolean hasMemory() { - return false; - } - //UpdateSketch @Override diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java index 509ec2f93..fc81d1124 100644 --- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java @@ -336,14 +336,24 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds dstMem, compactCache); } + @Override + public boolean hasMemory() { + return wmem_ != null; + } + @Override public boolean hasResult() { - return wmem_ != null ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + return hasMemory() ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0; + } + + @Override + public boolean isDirect() { + return hasMemory() ? wmem_.isDirect() : false; } @Override public boolean isSameResource(final Memory that) { - return wmem_ != null ? wmem_.isSameResource(that) : false; + return hasMemory() ? wmem_.isSameResource(that) : false; } @Override diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java index 562be982c..e1d9262e6 100644 --- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java +++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java @@ -37,6 +37,7 @@ /** * This class defines the preamble data structure and provides basic utilities for some of the key * fields. + * *

The intent of the design of this class was to isolate the detailed knowledge of the bit and * byte layout of the serialized form of the sketches derived from the Sketch class into one place. * This allows the possibility of the introduction of different serialization @@ -126,7 +127,7 @@ * 3 ||----------------------Start of Hash Table of longs---------------------------------| *

* - *

Union objects require 32 bytes of preamble plus a non-compact array of longs representing a + *

Union objects require 32 bytes of preamble plus a non-compact array of longs representing a * hash table.

* *
diff --git a/src/main/java/org/apache/datasketches/theta/Rebuilder.java b/src/main/java/org/apache/datasketches/theta/Rebuilder.java
index 07093f652..b6e3de342 100644
--- a/src/main/java/org/apache/datasketches/theta/Rebuilder.java
+++ b/src/main/java/org/apache/datasketches/theta/Rebuilder.java
@@ -84,7 +84,7 @@ static final void quickSelectAndRebuild(final WritableMemory mem, final int prea
   }
 
   /**
-   * Moves me (the entire sketch) to a new larger Memory location and rebuilds the hash table.
+   * Moves me (the entire updatable sketch) to a new larger Memory location and rebuilds the hash table.
    * This assumes a Memory preamble of standard form with the correct value of thetaLong.
    * Afterwards, the caller must update the local Memory reference, lgArrLongs
    * and hashTableThreshold from the dstMemory and free the source Memory.
diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java
index 4d8ebf0c4..b89ca9703 100644
--- a/src/main/java/org/apache/datasketches/theta/SetOperation.java
+++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java
@@ -25,6 +25,7 @@
 import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
 
 import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.MemoryStatus;
 import org.apache.datasketches.common.SketchesArgumentException;
 import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableMemory;
@@ -35,7 +36,7 @@
  *
  * @author Lee Rhodes
  */
-public abstract class SetOperation {
+public abstract class SetOperation implements MemoryStatus {
   static final int CONST_PREAMBLE_LONGS = 3;
 
   SetOperation() {}
@@ -237,20 +238,6 @@ public static int getMaxAnotBResultBytes(final int nomEntries) {
    */
   public abstract Family getFamily();
 
-  /**
-   * Returns true if the backing resource of this is identical with the backing resource
-   * of that. The capacities must be the same.  If this is a region,
-   * the region offset must also be the same.
-   *
-   * 

Note: Only certain set operators during stateful operations can be serialized. - * Only when they are stored into Memory will this be relevant.

- * - * @param that A different non-null object - * @return true if the backing resource of this is the same as the backing resource - * of that. - */ - public abstract boolean isSameResource(Memory that); - //restricted /** diff --git a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java index d788418dc..4a35cf67d 100644 --- a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java +++ b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java @@ -66,7 +66,7 @@ public SetOperationBuilder() { * Sets the Maximum Nominal Entries (max K) for this set operation. The effective value of K of the result of a * Set Operation can be less than max K, but never greater. * The minimum value is 16 and the maximum value is 67,108,864, which is 2^26. - * @param nomEntries Nominal Entres + * @param nomEntries Nominal Entries * This will become the ceiling power of 2 if it is not a power of 2. * @return this SetOperationBuilder */ diff --git a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java index a4bac21c9..3cfc13b1e 100644 --- a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java +++ b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java @@ -343,16 +343,6 @@ public double getUpperBound(final int numStdDev) { return 1.0; } - @Override - public boolean hasMemory() { - return false; - } - - @Override - public boolean isDirect() { - return false; - } - @Override public boolean isEmpty() { return false; diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java index cc1fd4d23..89618bc23 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketch.java +++ b/src/main/java/org/apache/datasketches/theta/Sketch.java @@ -32,6 +32,7 @@ import static org.apache.datasketches.thetacommon.HashOperations.count; import org.apache.datasketches.common.Family; +import org.apache.datasketches.common.MemoryStatus; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableMemory; @@ -44,7 +45,7 @@ * * @author Lee Rhodes */ -public abstract class Sketch { +public abstract class Sketch implements MemoryStatus { static final int DEFAULT_LG_RESIZE_FACTOR = 3; //Unique to Heap Sketch() {} @@ -292,14 +293,11 @@ public double getLowerBound(final int numStdDev) { /** * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. Note that this assumes the worse case of the sketch in - * estimation mode, which requires storing theta and count. - * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of entries. - * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + * of retained entries. */ - @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { if (numberOfEntries == 0) { return 8; } if (numberOfEntries == 1) { return 16; } @@ -311,11 +309,11 @@ public static int getMaxCompactSketchBytes(final int numberOfEntries) { * log_base2 of the number of nominal entries, which is a power of 2. * @param lgNomEntries Nominal Entries * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. */ public static int getCompactSketchMaxBytes(final int lgNomEntries) { - return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD) - + Family.QUICKSELECT.getMaxPreLongs() * Long.BYTES; + return (int)((2 << lgNomEntries) * ThetaUtil.REBUILD_THRESHOLD + + Family.QUICKSELECT.getMaxPreLongs()) * Long.BYTES; } /** @@ -386,26 +384,12 @@ public double getUpperBound(final int numStdDev) { : getRetainedEntries(true); } - /** - * Returns true if this sketch's data structure is backed by Memory or WritableMemory. - * @return true if this sketch's data structure is backed by Memory or WritableMemory. - */ - public abstract boolean hasMemory(); - /** * Returns true if this sketch is in compact form. * @return true if this sketch is in compact form. */ public abstract boolean isCompact(); - /** - * Returns true if the this sketch's internal data structure is backed by direct (off-heap) - * Memory. - * @return true if the this sketch's internal data structure is backed by direct (off-heap) - * Memory. - */ - public abstract boolean isDirect(); - /** * See Empty * @return true if empty. @@ -427,18 +411,6 @@ public boolean isEstimationMode() { */ public abstract boolean isOrdered(); - /** - * Returns true if the backing resource of this is identical with the backing resource - * of that. The capacities must be the same. If this is a region, - * the region offset must also be the same. - * @param that A different non-null object - * @return true if the backing resource of this is the same as the backing resource - * of that. - */ - public boolean isSameResource(final Memory that) { - return false; - } - /** * Returns a HashIterator that can be used to iterate over the retained hash values of the * Theta sketch. @@ -605,8 +577,8 @@ public static String toString(final Memory mem) { abstract int getCurrentPreambleLongs(); /** - * Returns the Memory object if it exists, otherwise null. - * @return the Memory object if it exists, otherwise null. + * Returns the backing Memory object if it exists, otherwise null. + * @return the backing Memory object if it exists, otherwise null. */ abstract Memory getMemory(); diff --git a/src/main/java/org/apache/datasketches/theta/Sketches.java b/src/main/java/org/apache/datasketches/theta/Sketches.java index 4b1461876..2e7fa0915 100644 --- a/src/main/java/org/apache/datasketches/theta/Sketches.java +++ b/src/main/java/org/apache/datasketches/theta/Sketches.java @@ -80,29 +80,25 @@ public static int getMaxAnotBResultBytes(final int maxNomEntries) { /** * Returns the maximum number of storage bytes required for a CompactSketch with the given - * number of actual entries. Note that this assumes the worse case of the sketch in - * estimation mode, which requires storing theta and count. - * @param numberOfEntries the actual number of entries stored with the CompactSketch. + * number of actual entries. + * @param numberOfEntries the actual number of retained entries stored in the sketch. * @return the maximum number of storage bytes required for a CompactSketch with the given number - * of entries. - * @see Sketch#getMaxCompactSketchBytes(int) - * @deprecated as a public method. Use {@link #getCompactSketchMaxBytes(int) instead} + * of retained entries. */ - @Deprecated public static int getMaxCompactSketchBytes(final int numberOfEntries) { return Sketch.getMaxCompactSketchBytes(numberOfEntries); } /** * Returns the maximum number of storage bytes required for a CompactSketch given the configured - * number of nominal entries (power of 2). - * @param nomEntries Nominal Entries + * log_base2 of the number of nominal entries, which is a power of 2. + * @param lgNomEntries Nominal Entries * @return the maximum number of storage bytes required for a CompactSketch with the given - * nomEntries. + * lgNomEntries. * @see Sketch#getCompactSketchMaxBytes(int) */ - public static int getCompactSketchMaxBytes(final int nomEntries) { - return Sketch.getCompactSketchMaxBytes(nomEntries); + public static int getCompactSketchMaxBytes(final int lgNomEntries) { + return Sketch.getCompactSketchMaxBytes(lgNomEntries); } /** diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java index 3ea5ca557..bac05de74 100644 --- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java +++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java @@ -262,10 +262,22 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstMem, compactCacheOut); } + @Override + public boolean hasMemory() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.hasMemory() : false; + } + + @Override + public boolean isDirect() { + return gadget_ instanceof DirectQuickSelectSketchR + ? gadget_.isDirect() : false; + } + @Override public boolean isSameResource(final Memory that) { return gadget_ instanceof DirectQuickSelectSketchR - ? gadget_.getMemory().isSameResource(that) : false; + ? gadget_.isSameResource(that) : false; } @Override @@ -321,7 +333,7 @@ public void union(final Sketch sketchIn) { if (sketchIn.isOrdered() && (sketchIn instanceof CompactSketch)) { //Use early stop //Ordered, thus compact if (sketchIn.hasMemory()) { - final Memory skMem = ((CompactSketch) sketchIn).getMemory(); + final Memory skMem = sketchIn.getMemory(); final int preambleLongs = skMem.getByte(PREAMBLE_LONGS_BYTE) & 0X3F; for (int i = 0; i < curCountIn; i++ ) { final int offsetBytes = preambleLongs + i << 3; diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java index 882c5e2e9..cb6854b02 100644 --- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java +++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java @@ -343,6 +343,7 @@ public UpdateReturnState update(final long[] data) { /** * All potential updates converge here. + * *

Don't ever call this unless you really know what you are doing!

* * @param hash the given input hash value. A hash of zero or Long.MAX_VALUE is ignored. diff --git a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java index 20dd6ee7d..d9fda48bb 100644 --- a/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java +++ b/src/main/java/org/apache/datasketches/thetacommon/SetOperationCornerCases.java @@ -34,8 +34,11 @@ public class SetOperationCornerCases { /** Intersection actions */ public enum IntersectAction { + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Empty{1.0, 0, T */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Intersect */ FULL_INTERSECT("I", "Full Intersect"); private String actionId; @@ -46,10 +49,18 @@ private IntersectAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the Action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the Action Description + * @return the actionDescription + */ public String getActionDescription() { return actionDescription; } @@ -57,11 +68,17 @@ public String getActionDescription() { /** A not B actions */ public enum AnotbAction { + /** Sketch A Exact */ SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Empty{1.0, 0, T} */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full AnotB */ FULL_ANOTB("N", "Full AnotB"); private String actionId; @@ -72,24 +89,42 @@ private AnotbAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the Action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the action description + * @return the action description + */ public String getActionDescription() { return actionDescription; } } + /** List of union actions */ public enum UnionAction { + /** Sketch A Exactly */ SKETCH_A("A", "Sketch A Exactly"), + /** Trim Sketch A by MinTheta */ TRIM_A("TA", "Trim Sketch A by MinTheta"), + /** Sketch B Exactly */ SKETCH_B("B", "Sketch B Exactly"), + /** Trim Sketch B by MinTheta */ TRIM_B("TB", "Trim Sketch B by MinTheta"), + /** Degenerate{MinTheta, 0, F} */ DEGEN_MIN_0_F("D", "Degenerate{MinTheta, 0, F}"), + /** Degenerate{ThetaA, 0, F} */ DEGEN_THA_0_F("DA", "Degenerate{ThetaA, 0, F}"), + /** Degenerate{ThetaB, 0, F} */ DEGEN_THB_0_F("DB", "Degenerate{ThetaB, 0, F}"), + /** Empty{1.0, 0, T} */ EMPTY_1_0_T("E", "Empty{1.0, 0, T}"), + /** Full Union */ FULL_UNION("N", "Full Union"); private String actionId; @@ -100,49 +135,74 @@ private UnionAction(final String actionId, final String actionDescription) { this.actionDescription = actionDescription; } + /** + * Gets the action ID + * @return the actionId + */ public String getActionId() { return actionId; } + /** + * Gets the action description + * @return the actionDescription + */ public String getActionDescription() { return actionDescription; } } + /** List of corner cases */ public enum CornerCase { + /** Empty Empty */ Empty_Empty(055, "A{ 1.0, 0, T} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.EMPTY_1_0_T), + /** Empty Exact */ Empty_Exact(056, "A{ 1.0, 0, T} ; B{ 1.0,>0, F}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Estimation */ Empty_Estimation(052, "A{ 1.0, 0, T} ; B{<1.0,>0, F", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.SKETCH_B), + /** Empty Degen */ Empty_Degen(050, "A{ 1.0, 0, T} ; B{<1.0, 0, F}", IntersectAction.EMPTY_1_0_T, AnotbAction.EMPTY_1_0_T, UnionAction.DEGEN_THB_0_F), + /** Exact Empty */ Exact_Empty(065, "A{ 1.0,>0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Exact Exact */ Exact_Exact(066, "A{ 1.0,>0, F} ; B{ 1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Estimation */ Exact_Estimation(062, "A{ 1.0,>0, F} ; B{<1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Exact Degen */ Exact_Degen(060, "A{ 1.0,>0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + /** Estimation_Empty */ Estimation_Empty(025, "A{<1.0,>0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.SKETCH_A, UnionAction.SKETCH_A), + /** Estimation_Exact */ Estimation_Exact(026, "A{<1.0,>0, F} ; B{ 1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Estimation */ Estimation_Estimation(022, "A{<1.0,>0, F} ; B{<1.0,>0, F}", IntersectAction.FULL_INTERSECT, AnotbAction.FULL_ANOTB, UnionAction.FULL_UNION), + /** Estimation_Degen */ Estimation_Degen(020, "A{<1.0,>0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.TRIM_A, UnionAction.TRIM_A), + /** Degen_Empty */ Degen_Empty(005, "A{<1.0, 0, F} ; B{ 1.0, 0, T}", IntersectAction.EMPTY_1_0_T, AnotbAction.DEGEN_THA_0_F, UnionAction.DEGEN_THA_0_F), + /** Degen_Exact */ Degen_Exact(006, "A{<1.0, 0, F} ; B{ 1.0,>0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_THA_0_F, UnionAction.TRIM_B), + /** Degen_Estimation */ Degen_Estimation(002, "A{<1.0, 0, F} ; B{<1.0,>0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.TRIM_B), + /** Degen_Degen */ Degen_Degen(000, "A{<1.0, 0, F} ; B{<1.0, 0, F}", IntersectAction.DEGEN_MIN_0_F, AnotbAction.DEGEN_MIN_0_F, UnionAction.DEGEN_MIN_0_F); @@ -168,27 +228,52 @@ private CornerCase(final int caseId, final String caseDescription, this.unionAction = unionAction; } + /** + * Gets the case ID + * @return the caseId + */ public int getId() { return caseId; } + /** + * Gets the case description + * @return the caseDescription + */ public String getCaseDescription() { return caseDescription; } + /** + * Gets the intersect action + * @return the intersectAction + */ public IntersectAction getIntersectAction() { return intersectAction; } + /** + * Gets the AnotB action + * @return the anotbAction + */ public AnotbAction getAnotbAction() { return anotbAction; } + /** + * Gets the union action + * @return the unionAction + */ public UnionAction getUnionAction() { return unionAction; } //See checkById test in /tuple/MiscTest. + /** + * Converts caseId to CornerCaseId + * @param id the case ID + * @return the Corner Case ID + */ public static CornerCase caseIdToCornerCase(final int id) { final CornerCase cc = caseIdToCornerCaseMap.get(id); if (cc == null) { @@ -198,12 +283,29 @@ public static CornerCase caseIdToCornerCase(final int id) { } } //end of enum CornerCase + /** + * Creates the CornerCase ID + * @param thetaLongA the theta of A as a long + * @param countA the count of A + * @param emptyA true if A is empty + * @param thetaLongB the theta of B as a long + * @param countB the count of B + * @param emptyB true if B is empty + * @return the Corner Case ID + */ public static int createCornerCaseId( final long thetaLongA, final int countA, final boolean emptyA, final long thetaLongB, final int countB, final boolean emptyB) { return (sketchStateId(emptyA, countA, thetaLongA) << 3) | sketchStateId(emptyB, countB, thetaLongB); } + /** + * Returns the sketch state ID + * @param isEmpty true if empty + * @param numRetained the number of items retained + * @param thetaLong the value of theta as a long + * @return the sketch state ID + */ public static int sketchStateId(final boolean isEmpty, final int numRetained, final long thetaLong) { // assume thetaLong = MAX if empty return (((thetaLong == MAX) || isEmpty) ? 4 : 0) | ((numRetained > 0) ? 2 : 0) | (isEmpty ? 1 : 0); diff --git a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java index 44d1d9cc0..9b0ca33cb 100644 --- a/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java +++ b/src/main/java/org/apache/datasketches/tuple/SerializerDeserializer.java @@ -32,8 +32,17 @@ public final class SerializerDeserializer { * Defines the sketch classes that this SerializerDeserializer can handle. */ @SuppressWarnings("javadoc") - public static enum SketchType { QuickSelectSketch, CompactSketch, ArrayOfDoublesQuickSelectSketch, - ArrayOfDoublesCompactSketch, ArrayOfDoublesUnion } + public static enum SketchType { + /** QuickSelectSketch */ + QuickSelectSketch, + /** CompactSketch */ + CompactSketch, + /** ArrayOfDoublesQuickSelectSketch */ + ArrayOfDoublesQuickSelectSketch, + /** ArrayOfDoublesCompactSketch */ + ArrayOfDoublesCompactSketch, + /** ArrayOfDoublesUnion */ + ArrayOfDoublesUnion } static final int TYPE_BYTE_OFFSET = 3; diff --git a/src/main/java/org/apache/datasketches/tuple/Union.java b/src/main/java/org/apache/datasketches/tuple/Union.java index 653312fa0..acefa2ab5 100644 --- a/src/main/java/org/apache/datasketches/tuple/Union.java +++ b/src/main/java/org/apache/datasketches/tuple/Union.java @@ -100,8 +100,7 @@ public CompactSketch union(final Sketch tupleSketch, /** * Performs a stateful union of the internal set with the given tupleSketch. * @param tupleSketch input tuple sketch to merge with the internal set. - * - *

Nulls and empty sketches are ignored.

+ * Nulls and empty sketches are ignored. */ public void union(final Sketch tupleSketch) { if (tupleSketch == null || tupleSketch.isEmpty()) { return; } diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java index e7abae0d4..a54c11afc 100644 --- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java @@ -41,24 +41,28 @@ public enum Mode { /** * The aggregation mode is the summation function. + * *

New retained value = previous retained value + incoming value

*/ Sum, /** * The aggregation mode is the minimum function. + * *

New retained value = min(previous retained value, incoming value)

*/ Min, /** * The aggregation mode is the maximum function. + * *

New retained value = max(previous retained value, incoming value)

*/ Max, /** * The aggregation mode is always one. + * *

New retained value = 1.0

*/ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java index 4c04fa2c2..72695355e 100644 --- a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java +++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java @@ -41,24 +41,28 @@ public enum Mode { /** * The aggregation mode is the summation function. + * *

New retained value = previous retained value + incoming value

*/ Sum, /** * The aggregation mode is the minimum function. + * *

New retained value = min(previous retained value, incoming value)

*/ Min, /** * The aggregation mode is the maximum function. + * *

New retained value = max(previous retained value, incoming value)

*/ Max, /** * The aggregation mode is always one. + * *

New retained value = 1

*/ AlwaysOne diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java index 91d4eade4..52f827149 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesCompactSketch.java @@ -30,6 +30,7 @@ /** * Direct Compact Sketch of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java index 3dd019d74..7c1b1bf07 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesIntersection.java @@ -23,6 +23,7 @@ /** * Direct Intersection operation for tuple sketches of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

@@ -43,7 +44,7 @@ final class DirectArrayOfDoublesIntersection extends ArrayOfDoublesIntersection } @Override - protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, + protected ArrayOfDoublesQuickSelectSketch createSketch(final int nomEntries, final int numValues, final long seed) { return new DirectArrayOfDoublesQuickSelectSketch(nomEntries, 0, 1f, numValues, seed, mem_); } diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java index 1b4e86904..ae1aa3dc0 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java @@ -33,6 +33,7 @@ /** * Direct QuickSelect tuple sketch of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java index 15503fc04..dcdab1313 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesSketchIterator.java @@ -23,6 +23,7 @@ /** * Iterator over the off-heap, Direct tuple sketch of type ArrayOfDoubles (compact or hash table). + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java index 00310f534..734019632 100644 --- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java +++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesUnion.java @@ -27,6 +27,7 @@ /** * Direct Union operation for tuple sketches of type ArrayOfDoubles. + * *

This implementation uses data in a given Memory that is owned and managed by the caller. * This Memory can be off-heap, which if managed properly will greatly reduce the need for * the JVM to perform garbage collection.

diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java new file mode 100644 index 000000000..12c6f20df --- /dev/null +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.filters.bloomfilter; + +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; +import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.cppPath; +import static org.apache.datasketches.common.TestUtil.javaPath; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.io.IOException; +import java.nio.file.Files; + +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +/** + * Serialize binary sketches to be tested by C++ code. + * Test deserialization of binary sketches serialized by C++ code. + */ +public class BloomFilterCrossLanguageTest { + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generatBloomFilterBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {0, 10_000, 2_000_000, 300_000_00}; + final short[] hArr = {3, 5}; + for (int n : nArr) { + for (short numHashes : hArr) { + final long configBits = Math.max(n, 1000L); // so empty still has valid bit size + BloomFilter bf = BloomFilterBuilder.createBySize(configBits, numHashes); + for (int i = 0; i < n / 10; ++i) { + bf.update(i); + } + if (n > 0) { bf.update(Float.NaN); } + assertEquals(bf.isEmpty(), n == 0); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > (n / 10))); + Files.newOutputStream(javaPath.resolve("bf_n" + n + "_h" + numHashes + "_java.sk")).write(bf.toByteArray()); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void readBloomFilterBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {0, 10_000, 2_000_000, 300_000_00}; + final short[] hArr = {3, 5}; + for (int n : nArr) { + for (short numHashes : hArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("bf_n" + n + "_h" + numHashes + "_cpp.sk")); + final BloomFilter bf = BloomFilter.heapify(Memory.wrap(bytes)); + assertEquals(bf.isEmpty(), n == 0); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > (n / 10))); + + for (int i = 0; i < n / 10; ++i) { + assertTrue(bf.query(i)); + } + if (n > 0) { + assert(bf.query(Double.NaN)); + } + } + } + } +} diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java index dedaf9db3..25bef3643 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterTest.java @@ -24,14 +24,18 @@ import static org.testng.Assert.assertThrows; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + public class BloomFilterTest { @Test @@ -51,8 +55,8 @@ public void createNewFilterTest() throws Exception { assertFalse(bf1.isDirect()); assertFalse(bf1.isReadOnly()); - try (WritableHandle wh = WritableMemory.allocateDirect(sizeBytes)) { - final WritableMemory wmem = wh.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(sizeBytes)).scope()) { final BloomFilter bf2 = new BloomFilter(numBits, numHashes, seed, wmem); assertTrue(bf2.isEmpty()); assertTrue(bf2.hasMemory()); diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java index 521019e62..bbedd2fb7 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java @@ -142,8 +142,8 @@ public void bitAddressOutOfBoundsNonEmptyTest() { final Memory mem = bitArrayToMemory(hba); DirectBitArrayR dba = DirectBitArrayR.wrap(mem, hba.isEmpty()); - assertThrows(AssertionError.class, () -> dba.getBit(-10)); - assertThrows(AssertionError.class, () -> dba.getBit(2048)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(-10)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(2048)); } @Test diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java index a45bcbb82..8327a0d5e 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java @@ -139,7 +139,7 @@ public void basicWritableWrapTest() { @Test public void countWritableWrappedBitsWhenDirty() { // like basicOperationTest but with setBit which does - // not neecssarily track numBitsSet_ + // not necessarily track numBitsSet_ final HeapBitArray hba = new HeapBitArray(128); assertFalse(hba.getAndSetBit(1)); assertFalse(hba.getAndSetBit(2)); @@ -172,12 +172,12 @@ public void bitAddresOutOfBoundsNonEmptyTest() { dba.getAndSetBit(i); } - assertThrows(AssertionError.class, () -> dba.getBit(-10)); - assertThrows(AssertionError.class, () -> dba.getBit(2048)); - assertThrows(AssertionError.class, () -> dba.setBit(-20)); - assertThrows(AssertionError.class, () -> dba.setBit(4096)); - assertThrows(AssertionError.class, () -> dba.getAndSetBit(-30)); - assertThrows(AssertionError.class, () -> dba.getAndSetBit(8192)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(-10)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getBit(2048)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.setBit(-20)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.setBit(4096)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getAndSetBit(-30)); + assertThrows(IndexOutOfBoundsException.class, () -> dba.getAndSetBit(8192)); } @Test diff --git a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java b/src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java similarity index 82% rename from src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java rename to src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java index 23f369e63..8699a091a 100644 --- a/src/test/java/org/apache/datasketches/hash/MurmurHash3v2Test.java +++ b/src/test/java/org/apache/datasketches/hash/MurmurHash3v3Test.java @@ -28,13 +28,13 @@ import org.testng.annotations.Test; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.MurmurHash3v2; +import org.apache.datasketches.memory.internal.MurmurHash3v3; import org.apache.datasketches.memory.WritableMemory; /** * @author Lee Rhodes */ -public class MurmurHash3v2Test { +public class MurmurHash3v3Test { private Random rand = new Random(); private static final int trials = 1 << 20; @@ -154,33 +154,33 @@ private static final long[] hashV1(byte[] key, long seed) { } private static final long[] hashV2(long[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } private static final long[] hashV2(int[] key2, long seed) { - return MurmurHash3v2.hash(key2, seed); + return MurmurHash3v3.hash(key2, seed); } private static final long[] hashV2(char[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } private static final long[] hashV2(byte[] key, long seed) { - return MurmurHash3v2.hash(key, seed); + return MurmurHash3v3.hash(key, seed); } //V2 single primitives private static final long[] hashV2(long key, long seed, long[] out) { - return MurmurHash3v2.hash(key, seed, out); + return MurmurHash3v3.hash(key, seed, out); } // private static final long[] hashV2(double key, long seed, long[] out) { -// return MurmurHash3v2.hash(key, seed, out); +// return MurmurHash3v3.hash(key, seed, out); // } // private static final long[] hashV2(String key, long seed, long[] out) { -// return MurmurHash3v2.hash(key, seed, out); +// return MurmurHash3v3.hash(key, seed, out); // } @@ -199,7 +199,7 @@ public void offsetChecks() { for (int offset = 0; offset < 16; offset++) { int arrLen = cap - offset; - hash1 = MurmurHash3v2.hash(wmem, offset, arrLen, seed, hash1); + hash1 = MurmurHash3v3.hash(wmem, offset, arrLen, seed, hash1); byte[] byteArr2 = new byte[arrLen]; wmem.getByteArray(offset, byteArr2, 0, arrLen); hash2 = MurmurHash3.hash(byteArr2, seed); @@ -222,8 +222,8 @@ public void byteArrChecks() { for (int i = 0; i < j; i++) { wmem.putByte(i, (byte) (-128 + i)); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -246,8 +246,8 @@ public void charArrChecks() { for (int i = 0; i < j; i++) { wmem.putInt(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -270,8 +270,8 @@ public void intArrChecks() { for (int i = 0; i < j; i++) { wmem.putInt(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -294,8 +294,8 @@ public void longArrChecks() { for (int i = 0; i < j; i++) { wmem.putLong(i, i); } long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -313,8 +313,8 @@ public void longCheck() { WritableMemory wmem = WritableMemory.writableWrap(in); long[] hash1 = MurmurHash3.hash(in, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(in, seed); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(in, seed); assertEquals(hash1, hash2); assertEquals(hash1, hash3); @@ -325,62 +325,57 @@ public void checkEmptiesNulls() { long seed = 123; long[] hashOut = new long[2]; try { - MurmurHash3v2.hash(Memory.wrap(new long[0]), 0, 0, seed, hashOut); //mem empty - fail(); - } catch (final IllegalArgumentException e) { } //OK - try { - Memory mem = null; - MurmurHash3v2.hash(mem, 0, 0, seed, hashOut); //mem null + MurmurHash3v3.hash(Memory.wrap(new long[0]), 0, 0, seed, hashOut); //mem empty fail(); } catch (final IllegalArgumentException e) { } //OK try { String s = ""; - MurmurHash3v2.hash(s, seed, hashOut); //string empty + MurmurHash3v3.hash(s, seed, hashOut); //string empty fail(); } catch (final IllegalArgumentException e) { } //OK try { String s = null; - MurmurHash3v2.hash(s, seed, hashOut); //string null + MurmurHash3v3.hash(s, seed, hashOut); //string null fail(); } catch (final IllegalArgumentException e) { } //OK try { byte[] barr = new byte[0]; - MurmurHash3v2.hash(barr, seed); //byte[] empty + MurmurHash3v3.hash(barr, seed); //byte[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { byte[] barr = null; - MurmurHash3v2.hash(barr, seed); //byte[] null + MurmurHash3v3.hash(barr, seed); //byte[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { char[] carr = new char[0]; - MurmurHash3v2.hash(carr, seed); //char[] empty + MurmurHash3v3.hash(carr, seed); //char[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { char[] carr = null; - MurmurHash3v2.hash(carr, seed); //char[] null + MurmurHash3v3.hash(carr, seed); //char[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { int[] iarr = new int[0]; - MurmurHash3v2.hash(iarr, seed); //int[] empty + MurmurHash3v3.hash(iarr, seed); //int[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { int[] iarr = null; - MurmurHash3v2.hash(iarr, seed); //int[] null + MurmurHash3v3.hash(iarr, seed); //int[] null fail(); } catch (final IllegalArgumentException e) { } //OK try { long[] larr = new long[0]; - MurmurHash3v2.hash(larr, seed); //long[] empty + MurmurHash3v3.hash(larr, seed); //long[] empty fail(); } catch (final IllegalArgumentException e) { } //OK try { long[] larr = null; - MurmurHash3v2.hash(larr, seed); //long[] null + MurmurHash3v3.hash(larr, seed); //long[] null fail(); } catch (final IllegalArgumentException e) { } //OK } @@ -390,9 +385,9 @@ public void checkStringLong() { long seed = 123; long[] hashOut = new long[2]; String s = "123"; - assertTrue(MurmurHash3v2.hash(s, seed, hashOut)[0] != 0); + assertTrue(MurmurHash3v3.hash(s, seed, hashOut)[0] != 0); long v = 123; - assertTrue(MurmurHash3v2.hash(v, seed, hashOut)[0] != 0); + assertTrue(MurmurHash3v3.hash(v, seed, hashOut)[0] != 0); } @Test @@ -420,8 +415,8 @@ private static long[] checkDouble(double dbl) { WritableMemory wmem = WritableMemory.writableWrap(dataArr); long[] hash1 = MurmurHash3.hash(dataArr, 0); - hash2 = MurmurHash3v2.hash(wmem, offset, bytes, seed, hash2); - long[] hash3 = MurmurHash3v2.hash(dbl, seed, hash2); + hash2 = MurmurHash3v3.hash(wmem, offset, bytes, seed, hash2); + long[] hash3 = MurmurHash3v3.hash(dbl, seed, hash2); assertEquals(hash1, hash2); assertEquals(hash1, hash3); diff --git a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java index 78b18c1cc..c913af378 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectAuxHashMapTest.java @@ -31,12 +31,11 @@ import org.apache.datasketches.common.SketchesStateException; import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.testng.annotations.Test; - import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; /** * @author Lee Rhodes @@ -48,53 +47,50 @@ public void checkGrow() { int lgConfigK = 4; TgtHllType tgtHllType = TgtHllType.HLL_4; int n = 8; //put lgConfigK == 4 into HLL mode - int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); + long bytes = HllSketch.getMaxUpdatableSerializationBytes(lgConfigK, tgtHllType); HllSketch hllSketch; - try (WritableHandle handle = WritableMemory.allocateDirect(bytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory wmem = handle.getWritable(); - hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); - for (int i = 0; i < n; i++) { - hllSketch.update(i); - } - hllSketch.couponUpdate(HllUtil.pair(7, 15)); //mock extreme values - hllSketch.couponUpdate(HllUtil.pair(8, 15)); - hllSketch.couponUpdate(HllUtil.pair(9, 15)); - //println(hllSketch.toString(true, true, true, true)); - DirectHllArray dha = (DirectHllArray) hllSketch.hllSketchImpl; - assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 2); - assertTrue(hllSketch.isMemory()); - assertTrue(hllSketch.isOffHeap()); - assertTrue(hllSketch.isSameResource(wmem)); - - //Check heapify - byte[] byteArray = hllSketch.toCompactByteArray(); - HllSketch hllSketch2 = HllSketch.heapify(byteArray); - HllArray ha = (HllArray) hllSketch2.hllSketchImpl; - assertEquals(ha.getAuxHashMap().getLgAuxArrInts(), 2); - assertEquals(ha.getAuxHashMap().getAuxCount(), 3); - - //Check wrap - byteArray = hllSketch.toUpdatableByteArray(); - WritableMemory wmem2 = WritableMemory.writableWrap(byteArray); - hllSketch2 = HllSketch.writableWrap(wmem2); - //println(hllSketch2.toString(true, true, true, true)); - DirectHllArray dha2 = (DirectHllArray) hllSketch2.hllSketchImpl; - assertEquals(dha2.getAuxHashMap().getLgAuxArrInts(), 2); - assertEquals(dha2.getAuxHashMap().getAuxCount(), 3); - - //Check grow to on-heap - hllSketch.couponUpdate(HllUtil.pair(10, 15)); //puts it over the edge, must grow - //println(hllSketch.toString(true, true, true, true)); - dha = (DirectHllArray) hllSketch.hllSketchImpl; - assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 3); - assertEquals(dha.getAuxHashMap().getAuxCount(), 4); - assertTrue(hllSketch.isMemory()); - assertFalse(hllSketch.isOffHeap()); - assertFalse(hllSketch.isSameResource(wmem)); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(bytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + + hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); + for (int i = 0; i < n; i++) { + hllSketch.update(i); } + hllSketch.couponUpdate(HllUtil.pair(7, 15)); //mock extreme values + hllSketch.couponUpdate(HllUtil.pair(8, 15)); + hllSketch.couponUpdate(HllUtil.pair(9, 15)); + //println(hllSketch.toString(true, true, true, true)); + DirectHllArray dha = (DirectHllArray) hllSketch.hllSketchImpl; + assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 2); + assertTrue(hllSketch.isMemory()); + assertTrue(hllSketch.isOffHeap()); + assertTrue(hllSketch.isSameResource(wmem)); + + //Check heapify + byte[] byteArray = hllSketch.toCompactByteArray(); + HllSketch hllSketch2 = HllSketch.heapify(byteArray); + HllArray ha = (HllArray) hllSketch2.hllSketchImpl; + assertEquals(ha.getAuxHashMap().getLgAuxArrInts(), 2); + assertEquals(ha.getAuxHashMap().getAuxCount(), 3); + + //Check wrap + byteArray = hllSketch.toUpdatableByteArray(); + WritableMemory wmem2 = WritableMemory.writableWrap(byteArray); + hllSketch2 = HllSketch.writableWrap(wmem2); + //println(hllSketch2.toString(true, true, true, true)); + DirectHllArray dha2 = (DirectHllArray) hllSketch2.hllSketchImpl; + assertEquals(dha2.getAuxHashMap().getLgAuxArrInts(), 2); + assertEquals(dha2.getAuxHashMap().getAuxCount(), 3); + + //Check grow to on-heap + hllSketch.couponUpdate(HllUtil.pair(10, 15)); //puts it over the edge, must grow + //println(hllSketch.toString(true, true, true, true)); + dha = (DirectHllArray) hllSketch.hllSketchImpl; + assertEquals(dha.getAuxHashMap().getLgAuxArrInts(), 3); + assertEquals(dha.getAuxHashMap().getAuxCount(), 4); + assertTrue(hllSketch.isMemory()); + assertFalse(hllSketch.isOffHeap()); + assertFalse(hllSketch.isSameResource(wmem)); + assertFalse(wmem.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java index 38cbc4977..09eebabf7 100644 --- a/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java +++ b/src/test/java/org/apache/datasketches/hll/DirectCouponListTest.java @@ -27,8 +27,10 @@ import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; /** @@ -69,11 +71,8 @@ private static void promotions(int lgConfigK, int n, TgtHllType tgtHllType, bool //println("DIRECT"); byte[] barr1; - WritableMemory wmem = null; - try (WritableHandle hand = WritableMemory.allocateDirect(bytes)) { - wmem = hand.getWritable(); - //byte[] byteArr = new byte[bytes]; - //WritableMemory wmem = WritableMemory.wrap(byteArr); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(bytes)).scope()) { hllSketch = new HllSketch(lgConfigK, tgtHllType, wmem); assertTrue(hllSketch.isEmpty()); diff --git a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java index cd1e0cbcf..17f3d0d0f 100644 --- a/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/hll/PreambleUtilTest.java @@ -109,7 +109,6 @@ public void checkCorruptMemoryInput() { HllSketch sk = new HllSketch(12); byte[] memObj = sk.toCompactByteArray(); WritableMemory wmem = WritableMemory.writableWrap(memObj); - long memAdd = wmem.getCumulativeOffset(0); HllSketch bad; //checkFamily @@ -148,7 +147,6 @@ public void checkCorruptMemoryInput() { for (int i = 1; i <= 15; i++) { sk.update(i); } memObj = sk.toCompactByteArray(); wmem = WritableMemory.writableWrap(memObj); - memAdd = wmem.getCumulativeOffset(0); //check wrong PreInts and SET try { @@ -162,7 +160,6 @@ public void checkCorruptMemoryInput() { for (int i = 15; i <= 1000; i++) { sk.update(i); } memObj = sk.toCompactByteArray(); wmem = WritableMemory.writableWrap(memObj); - memAdd = wmem.getCumulativeOffset(0); //check wrong PreInts and HLL try { @@ -179,7 +176,6 @@ public void checkExtractFlags() { int bytes = HllSketch.getMaxUpdatableSerializationBytes(4, TgtHllType.HLL_4); WritableMemory wmem = WritableMemory.allocate(bytes); Object memObj = wmem.getArray(); - long memAdd = wmem.getCumulativeOffset(0L); HllSketch sk = new HllSketch(4, TgtHllType.HLL_4, wmem); int flags = extractFlags(wmem); assertEquals(flags, EMPTY_FLAG_MASK); diff --git a/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java b/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java index 078f3503b..53b422b7c 100644 --- a/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllCrossLanguageTest.java @@ -67,6 +67,16 @@ public void generateKllFloatsSketchBinaries() throws IOException { } } + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateKllLongsSketchBinaries() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000}; + for (int n: nArr) { + final KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= n; i++) { sk.update(i); } + Files.newOutputStream(javaPath.resolve("kll_long_n" + n + "_java.sk")).write(sk.toByteArray()); + } + } + @Test(groups = {GENERATE_JAVA_FILES}) public void generateKllItemsSketchBinaries() throws IOException { final int[] nArr = {0, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000}; diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java index 9831c2f57..7a4d061ad 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactDoublesSketchTest.java @@ -110,13 +110,13 @@ public void checkDirectCompactGetDoubleItemsArray() { KllDoublesSketch sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); double[] itemsArr = sk2.getDoubleItemsArray(); - for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0F); } + for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0.0); } sk.update(1); sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); itemsArr = sk2.getDoubleItemsArray(); - for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0F); } - assertEquals(itemsArr[19], 1F); + for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0.0); } + assertEquals(itemsArr[19], 1.0); for (int i = 2; i <= 21; i++) { sk.update(i); } sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); @@ -169,12 +169,12 @@ public void checkMinAndMax() { try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} sk.update(1); sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); - assertEquals(sk2.getMaxItem(),1.0F); - assertEquals(sk2.getMinItem(),1.0F); + assertEquals(sk2.getMaxItem(),1.0); + assertEquals(sk2.getMinItem(),1.0); for (int i = 2; i <= 21; i++) { sk.update(i); } sk2 = KllDoublesSketch.wrap(Memory.wrap(sk.toByteArray())); - assertEquals(sk2.getMaxItem(),21.0F); - assertEquals(sk2.getMinItem(),1.0F); + assertEquals(sk2.getMaxItem(),21.0); + assertEquals(sk2.getMinItem(),1.0); } @Test diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java new file mode 100644 index 000000000..6b57fccac --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectCompactLongsSketchTest.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +public class KllDirectCompactLongsSketchTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkRODirectUpdatable_ROandWritable() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); //request updatable + Memory srcMem = Memory.wrap(byteArr); //cast to Memory -> read only + KllLongsSketch sk2 = KllLongsSketch.wrap(srcMem); + assertTrue(sk2 instanceof KllDirectLongsSketch); + + assertTrue(sk2.isMemoryUpdatableFormat()); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + + WritableMemory srcWmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch sk3 = KllLongsSketch.writableWrap(srcWmem, memReqSvr); + assertTrue(sk3 instanceof KllDirectLongsSketch); + println(sk3.toString(true, false)); + assertFalse(sk3.isReadOnly()); + sk3.update(22); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 22L); + } + + @Test + public void checkRODirectCompact() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + Memory srcMem = Memory.wrap(sk.toByteArray()); //compact RO fmt + KllLongsSketch sk2 = KllLongsSketch.wrap(srcMem); + assertTrue(sk2 instanceof KllDirectCompactLongsSketch); + //println(sk2.toString(true, false)); + assertFalse(sk2.isMemoryUpdatableFormat()); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + Memory srcMem2 = Memory.wrap(sk2.toByteArray()); + KllLongsSketch sk3 = KllLongsSketch.writableWrap((WritableMemory)srcMem2, memReqSvr); + assertTrue(sk3 instanceof KllDirectCompactLongsSketch); + assertFalse(sk2.isMemoryUpdatableFormat()); + //println(sk3.toString(true, false)); + assertTrue(sk3.isReadOnly()); + assertEquals(sk3.getMinItem(), 1L); + assertEquals(sk3.getMaxItem(), 21L); + } + + @Test + public void checkDirectCompactSingleItem() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + + sk.update(1); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertTrue(sk2 instanceof KllDirectCompactLongsSketch); + //println(sk2.toString(true, false)); + assertTrue(sk2.isReadOnly()); + assertEquals(sk2.getLongSingleItem(), 1L); + + sk.update(2); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getN(), 2); + try { + sk2.getLongSingleItem(); + fail(); + } catch (SketchesArgumentException e) { } + } + + @Test + public void checkDirectCompactGetLongItemsArray() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + long[] itemsArr = sk2.getLongItemsArray(); + for (int i = 0; i < 20; i++) { assertEquals(itemsArr[i], 0); } + + sk.update(1); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + itemsArr = sk2.getLongItemsArray(); + for (int i = 0; i < 19; i++) { assertEquals(itemsArr[i], 0); } + assertEquals(itemsArr[19], 1L); + + for (int i = 2; i <= 21; i++) { sk.update(i); } + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + itemsArr = sk2.getLongItemsArray(); + assertEquals(itemsArr.length, 33); + assertEquals(itemsArr[22], 21); + } + + @Test + public void checkHeapAndDirectCompactGetRetainedItemsArray() { + int k = 20; + + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + long[] retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, 0); + + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + retArr = sk2.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 0); + + sk.update(1); + retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 1); + assertEquals(retArr[0], 1L); + + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + retArr = sk2.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 1); + assertEquals(retArr[0], 1L); + + for (int i = 2; i <= 21; i++) { sk.update(i); } + retArr = sk.getLongRetainedItemsArray(); + assertEquals(retArr.length, sk.getNumRetained()); + assertEquals(retArr.length, 11); + + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(retArr.length, sk2.getNumRetained()); + assertEquals(retArr.length, 11); + } + + @Test + public void checkMinAndMax() { + int k = 20; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + sk.update(1); + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getMaxItem(),1L); + assertEquals(sk2.getMinItem(),1L); + for (int i = 2; i <= 21; i++) { sk.update(i); } + sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + assertEquals(sk2.getMaxItem(),21L); + assertEquals(sk2.getMinItem(),1L); + } + + @Test + public void checkQuantile() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= 1000; i++) { sk1.update(i); } + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk1.toByteArray())); + long med2 = sk2.getQuantile(0.5); + long med1 = sk1.getQuantile(0.5); + assertEquals(med1, med2); + println("Med1: " + med1); + println("Med2: " + med2); + } + + @Test + public void checkCompactSingleItemMerge() { + int k = 20; + KllLongsSketch skH1 = KllLongsSketch.newHeapInstance(k); //Heap with 1 (single) + skH1.update(21); + KllLongsSketch skDC1 = KllLongsSketch.wrap(Memory.wrap(skH1.toByteArray())); //Direct Compact with 1 (single) + KllLongsSketch skH20 = KllLongsSketch.newHeapInstance(k); //Heap with 20 + for (int i = 1; i <= 20; i++) { skH20.update(i); } + skH20.merge(skDC1); + assertEquals(skH20.getN(), 21); + + WritableMemory wmem = WritableMemory.allocate(1000); + KllLongsSketch skDU20 = KllLongsSketch.newDirectInstance(k, wmem, memReqSvr);//Direct Updatable with 21 + for (int i = 1; i <= 20; i++) { skDU20.update(i); } + skDU20.merge(skDC1); + assertEquals(skDU20.getN(), 21); + } + + @Test + public void printlnTest() { + println("PRINTING: " + this.getClass().getName()); + } + + private final static boolean enablePrinting = false; + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java index 78a3b9cd5..4bfdfa4fc 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchIteratorTest.java @@ -41,7 +41,7 @@ public void oneItemSketch() { sketch.update(0); QuantilesDoublesSketchIterator it = sketch.iterator(); Assert.assertTrue(it.next()); - Assert.assertEquals(it.getQuantile(), 0f); + Assert.assertEquals(it.getQuantile(), 0); Assert.assertEquals(it.getWeight(), 1); Assert.assertFalse(it.next()); } diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java index 33219a806..6342ac33d 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDirectDoublesSketchTest.java @@ -189,11 +189,11 @@ public void mergeLowerK() { sketch2.update(2 * n - i - 1); } - assertEquals(sketch1.getMinItem(), 0.0f); - assertEquals(sketch1.getMaxItem(), n - 1f); + assertEquals(sketch1.getMinItem(), 0.0); + assertEquals(sketch1.getMaxItem(), n - 1.0); assertEquals(sketch2.getMinItem(), n); - assertEquals(sketch2.getMaxItem(), 2f * n - 1f); + assertEquals(sketch2.getMaxItem(), 2.0 * n - 1.0); assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); @@ -613,7 +613,7 @@ public void checkWritableWrapOfCompactForm() { public void checkReadOnlyExceptions() { int k = 20; double[] dblArr = new double[0]; - double dblV = 1.0f; + double dblV = 1.0; int idx = 1; boolean bool = true; KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(k); diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java new file mode 100644 index 000000000..8be509f10 --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchIteratorTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class KllDirectLongsSketchIteratorTest { + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void emptySketch() { + final KllLongsSketch sketch = getDLSketch(200, 0); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + final KllLongsSketch sketch = getDLSketch(200, 0); + sketch.update(0); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getQuantile(), 0); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + final KllLongsSketch sketch = getDLSketch(200, 0); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + QuantilesLongsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += (int)it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + + private static KllLongsSketch getDLSketch(final int k, final int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + + KllLongsSketch dlsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dlsk; + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java new file mode 100644 index 000000000..f1784b7ce --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllDirectLongsSketchTest.java @@ -0,0 +1,686 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllSketch.SketchStructure; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +public class KllDirectLongsSketchTest { + + private static final double PMF_EPS_FOR_K_8 = 0.35; // PMF rank error (epsilon) for k=8 + private static final double PMF_EPS_FOR_K_128 = 0.025; // PMF rank error (epsilon) for k=128 + private static final double PMF_EPS_FOR_K_256 = 0.013; // PMF rank error (epsilon) for k=256 + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + try { sketch.getRank(0); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantile(0.5); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantiles(new double[] {0.0, 1.0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getPMF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getCDF(new long[0]); fail(); } catch (SketchesArgumentException e) {} + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneValue() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(1, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(2, EXCLUSIVE), 1.0); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), 1L); + assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), 1L); + } + + @Test + public void manyValuesEstimationMode() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new long[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + assertEquals(sketch.getMinItem(), 0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1L); // max value is exact + + // check at every 0.1 percentage point + final double[] ranks = new double[1001]; + final double[] reverseRanks = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + ranks[i] = (double) i / 1000; + reverseRanks[1000 - i] = ranks[i]; + } + final long[] quantiles = sketch.getQuantiles(ranks); + final long[] reverseQuantiles = sketch.getQuantiles(reverseRanks); + long previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final long quantile = sketch.getQuantile(ranks[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + final int n = 1000; + final long[] values = new long[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + + @Test + public void merge() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2 * n - 1); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2 * n - 1L); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2 * n - 1); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2 * n - 1); + assertEquals(sketch1.getQuantile(0.5), n, n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n / 2 * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n / 2 * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(256, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(128, 0); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1L); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final KllLongsSketch sketch2 = getUpdatableDirectLongSketch(200, 0); + int n = 1_000_000; + for (int i = 1; i <= n; i++) { + sketch1.update(i); + } + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1L); + assertEquals(sketch2.getMaxItem(), 1_000_000L); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + getUpdatableDirectLongSketch(KllSketch.DEFAULT_M - 1, 0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + getUpdatableDirectLongSketch(KllSketch.MAX_K + 1, 0); + } + + @Test + public void minK() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(KllSketch.DEFAULT_M, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(KllSketch.MAX_K, 0); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 500 * PMF_EPS_FOR_K_256); + } + + @Test + public void serializeDeserializeEmptyViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final byte[] bytes = sketch1.toByteArray(); //compact + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false)); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + try { sketch2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sketch2.currentSerializedSizeBytes(false), + sketch1.currentSerializedSizeBytes(false)); + } + + @Test + public void serializeDeserializeEmptyViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final byte[] bytes = KllHelper.toByteArray(sketch1, true); + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertTrue(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + try { sketch2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sketch2.currentSerializedSizeBytes(true), + sketch1.currentSerializedSizeBytes(true)); + } + + @Test + public void serializeDeserializeOneValueViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + final byte[] bytes = sketch1.toByteArray(); + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(false)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertTrue(sketch2.getMinItem() < Long.MAX_VALUE); + assertTrue(sketch2.getMaxItem() > Long.MIN_VALUE); + assertEquals(sketch2.currentSerializedSizeBytes(false), 8 + Long.BYTES); + } + + @Test + public void serializeDeserializeOneValueViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + sketch1.update(1); + final byte[] bytes = KllHelper.toByteArray(sketch1, true); + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes),memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), 1); + assertEquals(sketch2.getN(), 1); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), 1L); + assertEquals(sketch2.getMaxItem(), 1L); + assertEquals(sketch2.currentSerializedSizeBytes(false), 8 + Long.BYTES); + assertEquals(sketch2.currentSerializedSizeBytes(true), bytes.length); + } + + @Test + public void serializeDeserializeFullViaCompactHeapify() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 1000); + final byte[] byteArr1 = sketch1.toByteArray(); //compact + final KllLongsSketch sketch2 = KllLongsSketch.heapify(Memory.wrap(byteArr1)); + assertEquals(byteArr1.length, sketch1.currentSerializedSizeBytes(false)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), sketch1.getMinItem()); + assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem()); + assertEquals(sketch2.currentSerializedSizeBytes(false), sketch1.currentSerializedSizeBytes(false)); + } + + @Test + public void serializeDeserializeFullViaUpdatableWritableWrap() { + final KllLongsSketch sketch1 = getUpdatableDirectLongSketch(200, 0); + final int n = 1000; + for (int i = 1; i <= n; i++) { + sketch1.update(i); + } + final byte[] bytes = KllHelper.toByteArray(sketch1, true); //updatable + final KllLongsSketch sketch2 = + KllLongsSketch.writableWrap(WritableMemory.writableWrap(bytes), memReqSvr); + assertEquals(bytes.length, sketch1.currentSerializedSizeBytes(true)); + assertFalse(sketch2.isEmpty()); + assertEquals(sketch2.getNumRetained(), sketch1.getNumRetained()); + assertEquals(sketch2.getN(), sketch1.getN()); + assertEquals(sketch2.getNormalizedRankError(false), sketch1.getNormalizedRankError(false)); + assertEquals(sketch2.getMinItem(), sketch1.getMinItem()); + assertEquals(sketch2.getMaxItem(), sketch1.getMaxItem()); + assertEquals(sketch2.currentSerializedSizeBytes(true), sketch1.currentSerializedSizeBytes(true)); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 0); + sketch.update(0); + sketch.getCDF(new long[] {1, 0}); + } + + @Test + public void checkSimpleMergeDirect() { //used for troubleshooting + int k = 20; + int n1 = 21; + int n2 = 43; + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println("SK1:"); + println(sk1.toString(true, true)); + println("SK2:"); + println(sk2.toString(true, true)); + WritableMemory wmem1 = WritableMemory.writableWrap(KllHelper.toByteArray(sk1, true)); + WritableMemory wmem2 = WritableMemory.writableWrap(KllHelper.toByteArray(sk2, true)); + KllLongsSketch dsk1 = KllLongsSketch.writableWrap(wmem1, memReqSvr); + KllLongsSketch dsk2 = KllLongsSketch.writableWrap(wmem2, memReqSvr); + println("BEFORE MERGE"); + println(dsk1.toString(true, true)); + dsk1.merge(dsk2); + println("AFTER MERGE"); + println(dsk1.toString(true, true)); + } + + @Test + public void checkSketchInitializeDirectLongUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL DIRECT FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(compBytes, LONGS_SKETCH, true)); + sk = KllLongsSketch.writableWrap(wmem, memReqSvr); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkGetWritableMemory() { + final KllLongsSketch sketch = getUpdatableDirectLongSketch(200, 200); + assertEquals(sketch.getK(), 200); + assertEquals(sketch.getN(), 200); + assertFalse(sketch.isEmpty()); + assertTrue(sketch.isMemoryUpdatableFormat()); + assertFalse(sketch.isEstimationMode()); + assertTrue(sketch.isLongsSketch()); + assertFalse(sketch.isLevelZeroSorted()); + assertFalse(sketch.isDoublesSketch()); + + final WritableMemory wmem = sketch.getWritableMemory(); + final KllLongsSketch sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), 200); + assertEquals(sk.getN(), 200); + assertFalse(sk.isEmpty()); + assertFalse(sk.isMemoryUpdatableFormat()); + assertFalse(sk.isEstimationMode()); + assertTrue(sk.isLongsSketch()); + assertFalse(sk.isLevelZeroSorted()); + assertFalse(sk.isDoublesSketch()); + } + + @Test + public void checkReset() { + WritableMemory dstMem = WritableMemory.allocate(3000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + long min1 = sk.getMinItem(); + long max1 = sk.getMaxItem(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + long min2 = sk.getMinItem(); + long max2 = sk.getMaxItem(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkHeapify() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 100; i++) { sk.update(i); } + KllLongsSketch sk2 = KllHeapLongsSketch.heapifyImpl(dstMem); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 100L); + } + + @Test + public void checkMergeKllLongsSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk.merge(sk2); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 121L); + } + + @Test + public void checkReverseMergeKllLongsSketch() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + for (int i = 1; i <= 21; i++) { sk.update(i); } + KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk2.update(i + 100); } + sk2.merge(sk); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 121L); + } + + @Test + public void checkWritableWrapOfCompactForm() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++ ) { sk.update(i); } + WritableMemory srcMem = WritableMemory.writableWrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(srcMem, memReqSvr); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 21L); + } + + @Test + public void checkReadOnlyExceptions() { + int k = 20; + long[] fltArr = new long[0]; + long fltV = 1; + int idx = 1; + boolean bool = true; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + KllLongsSketch sk2 = KllLongsSketch.wrap(Memory.wrap(sk.toByteArray())); + try { sk2.incN(1); fail(); } catch (SketchesArgumentException e) { } + try { sk2.incNumLevels(); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLongItemsArray(fltArr); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLongItemsArrayAt(idx, fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLevelZeroSorted(bool); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMaxItem(fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMinItem(fltV); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setMinK(idx); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setN(idx); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setNumLevels(idx); fail(); } catch (SketchesArgumentException e) { } + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkMergeExceptions() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + WritableMemory srcMem1 = WritableMemory.writableWrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(srcMem1, memReqSvr); + sk2.merge(sk1); + } + + @Test + public void checkVectorUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(20, dstMem, memReqSvr); + long[] v = new long[21]; + for (int i = 0; i < 21; i++) { v[i] = i + 1; } + sk.update(v, 0, 21); + println(sk.toString(true, true)); + int[] levelsArr = sk.getLevelsArray(SketchStructure.UPDATABLE); + assertEquals(levelsArr[0], 22); + long[] longsArr = sk.getLongItemsArray(); + assertEquals(longsArr[22], 21); + } + + @Test + public void checkWeightedUpdate() { + WritableMemory dstMem = WritableMemory.allocate(6000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(8, dstMem, memReqSvr); + for (int i = 0; i < 16; i++) { + sk.update(i + 1, 16); + } + println(sk.toString(true, true)); + assertEquals(sk.getN(), 256); + assertEquals(sk.getMaxItem(), 16L); + assertEquals(sk.getMinItem(), 1L); + } + + private static KllLongsSketch getUpdatableDirectLongSketch(int k, int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch dfsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void checkMergeExceptionsWrongType() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + KllDoublesSketch sk2 = KllDoublesSketch.newHeapInstance(20); + try { sk1.merge(sk2); fail(); } catch (ClassCastException e) { } + try { sk2.merge(sk1); fail(); } catch (ClassCastException e) { } + } + + private final static boolean enablePrinting = false; + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java index e07a395da..007cc8370 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchSerDeTest.java @@ -64,7 +64,7 @@ public void serializeDeserializeEmpty() { @Test public void serializeDeserializeOneValue() { final KllDoublesSketch sk1 = KllDoublesSketch.newHeapInstance(); - sk1.update(1); + sk1.update(1.0); //from heap -> byte[] -> heap final byte[] bytes = sk1.toByteArray(); diff --git a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java index 0b3818f1f..e143577f4 100644 --- a/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllDoublesSketchTest.java @@ -165,8 +165,8 @@ public void manyValuesEstimationMode() { assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); - assertEquals(sketch.getMinItem(), 0f); // min value is exact - assertEquals(sketch.getMaxItem(), n - 1f); // max value is exact + assertEquals(sketch.getMinItem(), 0.0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1.0); // max value is exact // check at every 0.1 percentage point final double[] fractions = new double[1001]; @@ -261,11 +261,11 @@ public void mergeLowerK() { sketch2.update(2 * n - i - 1); } - assertEquals(sketch1.getMinItem(), 0.0f); - assertEquals(sketch1.getMaxItem(), n - 1f); + assertEquals(sketch1.getMinItem(), 0.0); + assertEquals(sketch1.getMaxItem(), n - 1); assertEquals(sketch2.getMinItem(), n); - assertEquals(sketch2.getMaxItem(), 2f * n - 1.0); + assertEquals(sketch2.getMaxItem(), 2.0 * n - 1.0); assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); @@ -306,7 +306,7 @@ public void mergeEmptyLowerK() { sketch2.merge(sketch1); assertFalse(sketch1.isEmpty()); assertEquals(sketch1.getN(), n); - assertEquals(sketch1.getMinItem(), 0f); + assertEquals(sketch1.getMinItem(), 0.0); assertEquals(sketch1.getMaxItem(), n - 1.0); assertEquals(sketch1.getQuantile(0.5), n / 2.0, n * PMF_EPS_FOR_K_256); } @@ -424,7 +424,7 @@ public void checkNewDirectInstanceAndSize() { KllDoublesSketch.newDirectInstance(wmem, memReqSvr); try { KllDoublesSketch.newDirectInstance(null, memReqSvr); fail(); } catch (NullPointerException e) { } - try { KllFloatsSketch.newDirectInstance(wmem, null); fail(); } + try { KllDoublesSketch.newDirectInstance(wmem, null); fail(); } catch (NullPointerException e) { } int updateSize = KllSketch.getMaxSerializedSizeBytes(200, 0, DOUBLES_SKETCH, true); int compactSize = KllSketch.getMaxSerializedSizeBytes(200, 0, DOUBLES_SKETCH, false); diff --git a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java index 00028e341..9fc74d97b 100644 --- a/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllItemsSketchTest.java @@ -570,7 +570,7 @@ public void checkCDF_PDF() { } @Test - public void checkWrapCase1Floats() { + public void checkWrapCase1Items() { KllItemsSketch sk = KllItemsSketch.newHeapInstance(20, Comparator.naturalOrder(), serDe); final int n = 21; final int digits = Util.numDigits(n); diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java new file mode 100644 index 000000000..a98c32c9e --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchIteratorTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.apache.datasketches.quantilescommon.QuantilesLongsSketchIterator; +import org.testng.Assert; +import org.testng.annotations.Test; + +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +public class KllLongsSketchIteratorTest { + + @Test + public void emptySketch() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertFalse(it.next()); + } + + @Test + public void oneItemSketch() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + QuantilesLongsSketchIterator it = sketch.iterator(); + Assert.assertTrue(it.next()); + Assert.assertEquals(it.getQuantile(), 1L); + Assert.assertEquals(it.getWeight(), 1); + Assert.assertFalse(it.next()); + } + + @Test + public void twoItemSketchForIterator() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.update(2); + QuantilesLongsSketchIterator itr = sketch.iterator(); + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 2L); + assertEquals(itr.getWeight(), 1); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 1L); + assertEquals(itr.getWeight(), 1); + } + + @Test + public void twoItemSketchForSortedViewIterator() { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.update(2); + LongsSortedViewIterator itr = sketch.getSortedView().iterator(); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 1L); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); + assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0); + assertEquals(itr.getNormalizedRank(INCLUSIVE), 0.5); + + assertTrue(itr.next()); + + assertEquals(itr.getQuantile(), 2L); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); + assertEquals(itr.getNormalizedRank(EXCLUSIVE), 0.5); + assertEquals(itr.getNormalizedRank(INCLUSIVE), 1.0); + } + + @Test + public void bigSketches() { + for (int n = 1000; n < 100000; n += 2000) { + KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + for (int i = 0; i < n; i++) { + sketch.update(i); + } + QuantilesLongsSketchIterator it = sketch.iterator(); + int count = 0; + int weight = 0; + while (it.next()) { + count++; + weight += (int)it.getWeight(); + } + Assert.assertEquals(count, sketch.getNumRetained()); + Assert.assertEquals(weight, n); + } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java new file mode 100644 index 000000000..b9b0f800d --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchSerDeTest.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +public class KllLongsSketchSerDeTest { + + @Test + public void serializeDeserializeEmpty() { + final int N = 20; + + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(N); + //Empty: from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertTrue(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk2.getN(), sk1.getN()); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + try { sk2.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk2.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + + //Empty: from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertTrue(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk3.getN(), sk1.getN()); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + try { sk3.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sk3.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + + @Test + public void serializeDeserializeOneValue() { + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + sk1.update(1); + + //from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertFalse(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), 1); + assertEquals(sk2.getN(), 1); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk2.getMinItem(), 1L); + assertEquals(sk2.getMaxItem(), 1L); + assertEquals(sk2.getSerializedSizeBytes(), Long.BYTES + Long.BYTES); + + //from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertFalse(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), 1); + assertEquals(sk3.getN(), 1); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk3.getMinItem(), 1L); + assertEquals(sk3.getMaxItem(), 1L); + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + + @Test + public void serializeDeserializeMultipleValues() { + final KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(); + final int n = 1000; + for (int i = 0; i < n; i++) { + sk1.update(i); + } + assertEquals(sk1.getMinItem(), 0); + assertEquals(sk1.getMaxItem(), 999L); + + //from heap -> byte[] -> heap + final byte[] bytes = sk1.toByteArray(); + final KllLongsSketch sk2 = KllLongsSketch.heapify(Memory.wrap(bytes)); + assertEquals(bytes.length, sk1.getSerializedSizeBytes()); + assertFalse(sk2.isEmpty()); + assertEquals(sk2.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk2.getN(), sk1.getN()); + assertEquals(sk2.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk2.getMinItem(), sk1.getMinItem()); + assertEquals(sk2.getMaxItem(), sk1.getMaxItem()); + assertEquals(sk2.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + + //from heap -> byte[] -> off heap + final KllLongsSketch sk3 = KllLongsSketch.wrap(Memory.wrap(bytes)); + assertFalse(sk3.isEmpty()); + assertEquals(sk3.getNumRetained(), sk1.getNumRetained()); + assertEquals(sk3.getN(), sk1.getN()); + assertEquals(sk3.getNormalizedRankError(false), sk1.getNormalizedRankError(false)); + assertEquals(sk3.getMinItem(), sk1.getMinItem()); + assertEquals(sk3.getMaxItem(), sk1.getMaxItem()); + assertEquals(sk3.getSerializedSizeBytes(), sk1.getSerializedSizeBytes()); + //from heap -> byte[] -> off heap -> byte[] -> compare byte[] + final byte[] bytes2 = sk3.toByteArray(); + assertEquals(bytes, bytes2); + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java new file mode 100644 index 000000000..1e4c1004a --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllLongsSketchTest.java @@ -0,0 +1,719 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static java.lang.Math.min; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +public class KllLongsSketchTest { + private static final String LS = System.getProperty("line.separator"); + private static final double PMF_EPS_FOR_K_8 = KllSketch.getNormalizedRankError(8, true); + private static final double PMF_EPS_FOR_K_128 = KllSketch.getNormalizedRankError(128, true); + private static final double PMF_EPS_FOR_K_256 = KllSketch.getNormalizedRankError(256, true); + private static final double NUMERIC_NOISE_TOLERANCE = 1E-6; + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void empty() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + assertTrue(sketch.isEmpty()); + assertEquals(sketch.getN(), 0); + assertEquals(sketch.getNumRetained(), 0); + try { sketch.getRank(0); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMinItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantile(0.5); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getQuantiles(new double[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getPMF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + try { sketch.getCDF(new long[] {0}); fail(); } catch (SketchesArgumentException e) {} + assertNotNull(sketch.toString(true, true)); + assertNotNull(sketch.toString()); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantileInvalidArg() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.getQuantile(-1.0); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void getQuantilesInvalidArg() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + sketch.getQuantiles(new double[] {2.0}); + } + + @Test + public void oneValue() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(1); + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 1); + assertEquals(sketch.getNumRetained(), 1); + assertEquals(sketch.getRank(0L, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(1L, EXCLUSIVE), 0.0); + assertEquals(sketch.getRank(2L, EXCLUSIVE), 1.0); + assertEquals(sketch.getRank(0L, INCLUSIVE), 0.0); + assertEquals(sketch.getRank(1L, INCLUSIVE), 1.0); + assertEquals(sketch.getRank(2L, INCLUSIVE), 1.0); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), 1L); + assertEquals(sketch.getQuantile(0.5, EXCLUSIVE), 1L); + assertEquals(sketch.getQuantile(0.5, INCLUSIVE), 1L); + } + + @Test + public void tenValues() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 10; i++) { sketch.update(i); } + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getN(), 10); + assertEquals(sketch.getNumRetained(), 10); + for (int i = 1; i <= 10; i++) { + assertEquals(sketch.getRank(i, EXCLUSIVE), (i - 1) / 10.0); + assertEquals(sketch.getRank(i, INCLUSIVE), i / 10.0); + } + final long[] qArr = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + double[] rOut = sketch.getRanks(qArr); //inclusive + for (int i = 0; i < qArr.length; i++) { + assertEquals(rOut[i], (i + 1) / 10.0); + } + rOut = sketch.getRanks(qArr, EXCLUSIVE); //exclusive + for (int i = 0; i < qArr.length; i++) { + assertEquals(rOut[i], i / 10.0); + } + + for (int i = 0; i >= 10; i++) { + double rank = i/10.0; + double q = rank == 1.0 ? i : i + 1; + assertEquals(sketch.getQuantile(rank, EXCLUSIVE), q); + q = rank == 0 ? i + 1 : i; + assertEquals(sketch.getQuantile(rank, INCLUSIVE), q); + } + + { + // getQuantile() and getQuantiles() equivalence EXCLUSIVE + final long[] quantiles = + sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, EXCLUSIVE); + for (int i = 0; i <= 10; i++) { + assertEquals(sketch.getQuantile(i / 10.0, EXCLUSIVE), quantiles[i]); + } + } + { + // getQuantile() and getQuantiles() equivalence INCLUSIVE + final long[] quantiles = + sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0}, INCLUSIVE); + for (int i = 0; i <= 10; i++) { + assertEquals(sketch.getQuantile(i / 10.0, INCLUSIVE), quantiles[i]); + } + } + } + + @Test + public void manyValuesEstimationMode() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final int n = 1_000_000; + + for (int i = 0; i < n; i++) { + sketch.update(i); + } + assertEquals(sketch.getN(), n); + + // test getRank + for (int i = 0; i < n; i++) { + final double trueRank = (double) i / n; + assertEquals(sketch.getRank(i), trueRank, PMF_EPS_FOR_K_256, "for value " + i); + } + + // test getPMF + final double[] pmf = sketch.getPMF(new long[] {n / 2}); // split at median + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, PMF_EPS_FOR_K_256); + assertEquals(pmf[1], 0.5, PMF_EPS_FOR_K_256); + + assertEquals(sketch.getMinItem(), 0); // min value is exact + assertEquals(sketch.getMaxItem(), n - 1); // max value is exact + + // check at every 0.1 percentage point + final double[] fractions = new double[1001]; + final double[] reverseFractions = new double[1001]; // check that ordering doesn't matter + for (int i = 0; i <= 1000; i++) { + fractions[i] = (double) i / 1000; + reverseFractions[1000 - i] = fractions[i]; + } + final long[] quantiles = sketch.getQuantiles(fractions); + final long[] reverseQuantiles = sketch.getQuantiles(reverseFractions); + double previousQuantile = 0; + for (int i = 0; i <= 1000; i++) { + final double quantile = sketch.getQuantile(fractions[i]); + assertEquals(quantile, quantiles[i]); + assertEquals(quantile, reverseQuantiles[1000 - i]); + assertTrue(previousQuantile <= quantile); + previousQuantile = quantile; + } + } + + @Test + public void getRankGetCdfGetPmfConsistency() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final int n = 1000; + final long[] values = new long[n]; + for (int i = 0; i < n; i++) { + sketch.update(i); + values[i] = i; + } + { // inclusive = false (default) + final double[] ranks = sketch.getCDF(values); + final double[] pmf = sketch.getPMF(values); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i]), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + { // inclusive = true + final double[] ranks = sketch.getCDF(values, INCLUSIVE); + final double[] pmf = sketch.getPMF(values, INCLUSIVE); + double sumPmf = 0; + for (int i = 0; i < n; i++) { + assertEquals(ranks[i], sketch.getRank(values[i], INCLUSIVE), NUMERIC_NOISE_TOLERANCE, + "rank vs CDF for value " + i); + sumPmf += pmf[i]; + assertEquals(ranks[i], sumPmf, NUMERIC_NOISE_TOLERANCE, "CDF vs PMF for value " + i); + } + sumPmf += pmf[n]; + assertEquals(sumPmf, 1.0, NUMERIC_NOISE_TOLERANCE); + assertEquals(ranks[n], 1.0, NUMERIC_NOISE_TOLERANCE); + } + } + + @Test + public void merge() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), (n - 1)); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), (2 * n - 1)); + + sketch1.merge(sketch2); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2L * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), (2 * n - 1)); + assertEquals(sketch1.getQuantile(0.5), n, 2 * n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + sketch2.update(2 * n - i - 1); + } + + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1L); + + assertEquals(sketch2.getMinItem(), n); + assertEquals(sketch2.getMaxItem(), 2L * n - 1L); + + assertTrue(sketch1.getNormalizedRankError(false) < sketch2.getNormalizedRankError(false)); + assertTrue(sketch1.getNormalizedRankError(true) < sketch2.getNormalizedRankError(true)); + sketch1.merge(sketch2); + + // sketch1 must get "contaminated" by the lower K in sketch2 + assertEquals(sketch1.getNormalizedRankError(false), sketch2.getNormalizedRankError(false)); + assertEquals(sketch1.getNormalizedRankError(true), sketch2.getNormalizedRankError(true)); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), 2 * n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), 2L * n - 1L); + assertEquals(sketch1.getQuantile(0.5), n, 2L * n * PMF_EPS_FOR_K_128); + } + + @Test + public void mergeEmptyLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10_000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + + // rank error should not be affected by a merge with an empty sketch with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n * PMF_EPS_FOR_K_256); + + //merge the other way + sketch2.merge(sketch1); + assertFalse(sketch1.isEmpty()); + assertEquals(sketch1.getN(), n); + assertEquals(sketch1.getMinItem(), 0); + assertEquals(sketch1.getMaxItem(), n - 1); + assertEquals(sketch1.getQuantile(0.5), n / 2, n * PMF_EPS_FOR_K_256); + } + + @Test + public void mergeExactModeLowerK() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(256); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(128); + final int n = 10000; + for (int i = 0; i < n; i++) { + sketch1.update(i); + } + sketch2.update(1); + + // rank error should not be affected by a merge with a sketch in exact mode with lower K + final double rankErrorBeforeMerge = sketch1.getNormalizedRankError(true); + sketch1.merge(sketch2); + assertEquals(sketch1.getNormalizedRankError(true), rankErrorBeforeMerge); + } + + @Test + public void mergeMinMinValueFromOther() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(); + sketch1.update(1); + sketch2.update(2); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1); + } + + @Test + public void mergeMinAndMaxFromOther() { + final KllLongsSketch sketch1 = KllLongsSketch.newHeapInstance(); + for (int i = 1; i <= 1_000_000; i++) { + sketch1.update(i); + } + final KllLongsSketch sketch2 = KllLongsSketch.newHeapInstance(10); + sketch2.merge(sketch1); + assertEquals(sketch2.getMinItem(), 1); + assertEquals(sketch2.getMaxItem(), 1_000_000); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooSmall() { + KllLongsSketch.newHeapInstance(KllSketch.DEFAULT_M - 1); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void kTooLarge() { + KllLongsSketch.newHeapInstance(KllSketch.MAX_K + 1); + } + + @Test + public void minK() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(KllSketch.DEFAULT_M); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.DEFAULT_M); + assertEquals(sketch.getQuantile(0.5), 500, 1000 * PMF_EPS_FOR_K_8); + } + + @Test + public void maxK() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(KllSketch.MAX_K); + for (int i = 0; i < 1000; i++) { + sketch.update(i); + } + assertEquals(sketch.getK(), KllSketch.MAX_K); + assertEquals(sketch.getQuantile(0.5), 500, 1000 * PMF_EPS_FOR_K_256); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void outOfOrderSplitPoints() { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + sketch.update(0); + sketch.getCDF(new long[] {1L, 0L}); + } + + @Test + public void checkReset() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n1 = sk.getN(); + double min1 = sk.getMinItem(); + double max1 = sk.getMaxItem(); + sk.reset(); + for (int i = 1; i <= 100; i++) { sk.update(i); } + long n2 = sk.getN(); + double min2 = sk.getMinItem(); + double max2 = sk.getMaxItem(); + assertEquals(n2, n1); + assertEquals(min2, min1); + assertEquals(max2, max1); + } + + @Test + public void checkReadOnlyUpdate() { + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + Memory mem = Memory.wrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + try { sk2.update(1); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkNewDirectInstanceAndSize() { + WritableMemory wmem = WritableMemory.allocate(3000); + KllLongsSketch.newDirectInstance(wmem, memReqSvr); + try { KllLongsSketch.newDirectInstance(null, memReqSvr); fail(); } + catch (NullPointerException e) { } + try { KllLongsSketch.newDirectInstance(wmem, null); fail(); } + catch (NullPointerException e) { } + int updateSize = KllSketch.getMaxSerializedSizeBytes(200, 0, LONGS_SKETCH, true); + int compactSize = KllSketch.getMaxSerializedSizeBytes(200, 0, LONGS_SKETCH, false); + assertTrue(compactSize < updateSize); + } + + @Test + public void sortedView() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + sk.update(3); + sk.update(1); + sk.update(2); + + LongsSortedView view = sk.getSortedView(); + LongsSortedViewIterator itr = view.iterator(); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 1); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 0); + assertEquals(itr.getNaturalRank(INCLUSIVE), 1); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 2); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 1); + assertEquals(itr.getNaturalRank(INCLUSIVE), 2); + assertEquals(itr.next(), true); + assertEquals(itr.getQuantile(), 3); + assertEquals(itr.getWeight(), 1); + assertEquals(itr.getNaturalRank(EXCLUSIVE), 2); + assertEquals(itr.getNaturalRank(INCLUSIVE), 3); + assertEquals(itr.next(), false); + } + + @Test //also visual + public void checkCDF_PDF() { + final double[] cdfI = {.25, .50, .75, 1.0, 1.0 }; + final double[] cdfE = {0.0, .25, .50, .75, 1.0 }; + final double[] pmfI = {.25, .25, .25, .25, 0.0 }; + final double[] pmfE = {0.0, .25, .25, .25, .25 }; + final double toll = 1E-10; + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(); + final long[] doublesIn = {10, 20, 30, 40}; + for (int i = 0; i < doublesIn.length; i++) { sketch.update(doublesIn[i]); } + long[] sp = new long[] { 10, 20, 30, 40 }; + println("SplitPoints:"); + for (int i = 0; i < sp.length; i++) { + printf("%10d", sp[i]); + } + println(""); + println("INCLUSIVE:"); + double[] cdf = sketch.getCDF(sp, INCLUSIVE); + double[] pmf = sketch.getPMF(sp, INCLUSIVE); + printf("%10s%10s" + LS, "CDF", "PMF"); + for (int i = 0; i < cdf.length; i++) { + printf("%10.2f%10.2f" + LS, cdf[i], pmf[i]); + assertEquals(cdf[i], cdfI[i], toll); + assertEquals(pmf[i], pmfI[i], toll); + } + println("EXCLUSIVE"); + cdf = sketch.getCDF(sp, EXCLUSIVE); + pmf = sketch.getPMF(sp, EXCLUSIVE); + printf("%10s%10s" + LS, "CDF", "PMF"); + for (int i = 0; i < cdf.length; i++) { + printf("%10.2f%10.2f" + LS, cdf[i], pmf[i]); + assertEquals(cdf[i], cdfE[i], toll); + assertEquals(pmf[i], pmfE[i], toll); + } + } + + @Test + public void checkWrapCase1Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + + assertTrue(mem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkWritableWrapCase6And2Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + WritableMemory wmem = WritableMemory.writableWrap(KllHelper.toByteArray(sk, true)); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertFalse(wmem.isReadOnly()); + assertFalse(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase5Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertFalse(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase3Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true)); + WritableMemory wmem = (WritableMemory) mem; + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertTrue(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkKllSketchCase7Doubles() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + + Memory mem = Memory.wrap(KllHelper.toByteArray(sk, true)); + WritableMemory wmem = (WritableMemory) mem; + KllLongsSketch sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + + assertTrue(wmem.isReadOnly()); + assertTrue(sk2.isReadOnly()); + assertFalse(sk2.isDirect()); + } + + @Test + public void checkReadOnlyExceptions() { + int[] intArr = new int[0]; + int intV = 2; + int idx = 1; + KllLongsSketch sk1 = KllLongsSketch.newHeapInstance(20); + Memory mem = Memory.wrap(sk1.toByteArray()); + KllLongsSketch sk2 = KllLongsSketch.wrap(mem); + try { sk2.setLevelsArray(intArr); fail(); } catch (SketchesArgumentException e) { } + try { sk2.setLevelsArrayAt(idx,intV); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkIsSameResource() { + int cap = 128; + WritableMemory wmem = WritableMemory.allocate(cap); + WritableMemory reg1 = wmem.writableRegion(0, 64); + WritableMemory reg2 = wmem.writableRegion(64, 64); + assertFalse(reg1 == reg2); + assertFalse(reg1.isSameResource(reg2)); + + WritableMemory reg3 = wmem.writableRegion(0, 64); + assertFalse(reg1 == reg3); + assertTrue(reg1.isSameResource(reg3)); + + byte[] byteArr1 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg1.putByteArray(0, byteArr1, 0, byteArr1.length); + KllLongsSketch sk1 = KllLongsSketch.wrap(reg1); + + byte[] byteArr2 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg2.putByteArray(0, byteArr2, 0, byteArr2.length); + assertFalse(sk1.isSameResource(reg2)); + + byte[] byteArr3 = KllLongsSketch.newHeapInstance(20).toByteArray(); + reg3.putByteArray(0, byteArr3, 0, byteArr3.length); + assertTrue(sk1.isSameResource(reg3)); + } + + @Test + public void checkSortedViewAfterReset() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + sk.update(1L); + LongsSortedView sv = sk.getSortedView(); + long dsv = sv.getQuantile(1.0, INCLUSIVE); + assertEquals(dsv, 1L); + sk.reset(); + try { sk.getSortedView(); fail(); } catch (SketchesArgumentException e) { } + } + + @Test + public void checkVectorUpdate() { + boolean withLevels = false; + boolean withLevelsAndItems = true; + int k = 20; + int n = 108; + int maxVsz = 40; //max vector size + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + int j = 1; + int rem; + while ((rem = n - j + 1) > 0) { + int vecSz = min(rem, maxVsz); + long[] v = new long[vecSz]; + for (int i = 0; i < vecSz; i++) { v[i] = j++; } + sk.update(v, 0, vecSz); + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + assertEquals(sk.getN(), 108); + assertEquals(sk.getMaxItem(), 108L); + assertEquals(sk.getMinItem(), 1L); + } + + @Test + public void vectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final long[] values = new long[N]; + long vIn = 1L; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + sketch.update(values, 0, N); //vector input + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), totN / 2, totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d" + LS, N); + printf(" Num Vectors : %,12d" + LS, M); + printf(" Total Input : %,12d" + LS, totN); + printf(" Run Time mS : %,12.3f" + LS, runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f" + LS, trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f" + LS, updateTime); + } + + @Test + public void nonVectorizedUpdates() { + final int trials = 1; + final int M = 1; //number of vectors + final int N = 1000; //vector size + final int K = 256; + final long[] values = new long[N]; + long vIn = 1L; + long totN = 0; + final long startTime = System.nanoTime(); + for (int t = 0; t < trials; t++) { + final KllLongsSketch sketch = KllLongsSketch.newHeapInstance(K); + for (int m = 0; m < M; m++) { + for (int n = 0; n < N; n++) { + values[n] = vIn++; //fill vector + } + for (int i = 0; i < N; i++) { + sketch.update(values[i]); //single item input + } + } + totN = sketch.getN(); + assertEquals(totN, M * N); + assertEquals(sketch.getMinItem(), 1L); + assertEquals(sketch.getMaxItem(), totN); + assertEquals(sketch.getQuantile(0.5), totN / 2, totN * PMF_EPS_FOR_K_256 * 2.0); //wider tolerance + } + final long runTime = System.nanoTime() - startTime; + println("Vectorized Updates"); + printf(" Vector size : %,12d" + LS, N); + printf(" Num Vectors : %,12d" + LS, M); + printf(" Total Input : %,12d" + LS, totN); + printf(" Run Time mS : %,12.3f" + LS, runTime / 1e6); + final double trialTime = runTime / (1e6 * trials); + printf(" mS / Trial : %,12.3f" + LS, trialTime); + final double updateTime = runTime / (1.0 * totN * trials); + printf(" nS / Update : %,12.3f" + LS, updateTime); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } +} diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java new file mode 100644 index 000000000..366f93f7d --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDirectLongsTest.java @@ -0,0 +1,459 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +public class KllMiscDirectLongsTest { + static final String LS = System.getProperty("line.separator"); + private static final DefaultMemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkBounds() { + final KllLongsSketch kll = getDirectLongsSketch(200, 0); + for (int i = 0; i < 1000; i++) { + kll.update(i); + } + final double eps = kll.getNormalizedRankError(false); + final long est = kll.getQuantile(0.5); + final long ub = kll.getQuantileUpperBound(0.5); + final long lb = kll.getQuantileLowerBound(0.5); + assertEquals(ub, kll.getQuantile(.5 + eps)); + assertEquals(lb, kll.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + final double rest = kll.getRank(est); + final double restUB = kll.getRankUpperBound(rest); + final double restLB = kll.getRankLowerBound(rest); + assertTrue(restUB - rest < (2 * eps)); + assertTrue(rest - restLB < (2 * eps)); + } + + //@Test //enable static println(..) for visual checking + public void visualCheckToString() { + final int k = 20; + final KllLongsSketch sk = getDirectLongsSketch(k, 0); + for (int i = 0; i < 10; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + + final KllLongsSketch sk2 = getDirectLongsSketch(k, 0); + for (int i = 0; i < 400; i++) { sk2.update(i + 1); } + println("\n" + sk2.toString(true, true)); + + sk2.merge(sk); + final String s2 = sk2.toString(true, true); + println(LS + s2); + } + + @Test + public void viewDirectCompactions() { + int k = 20; + int u = 108; + KllLongsSketch sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= u; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(sk.toString(true, true)); + sk.update(++i); + println(sk.toString(true, true)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + } + + @Test + public void viewCompactionAndSortedView() { + int k = 20; + KllLongsSketch sk = getDirectLongsSketch(k, 0); + show(sk, 20); + LongsSortedView sv = sk.getSortedView(); + LongsSortedViewIterator itr = sv.iterator(); + printf("%12s%12s\n", "Value", "CumWeight"); + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + printf("%12d%12d\n", v, wt); + } + } + + private static void show(final KllLongsSketch sk, int limit) { + int i = (int) sk.getN(); + for ( ; i < limit; i++) { sk.update(i + 1); } + println(sk.toString(true, true)); + } + + @Test + public void checkSketchInitializeLongHeap() { + int k = 20; //don't change this + KllLongsSketch sk; + + //println("#### CASE: LONG FULL HEAP"); + sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG HEAP EMPTY"); + sk = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG HEAP SINGLE"); + sk = getDirectLongsSketch(k, 0); + sk.update(1); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeLongHeapifyCompactMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: LONG FULL HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG EMPTY HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG SINGLE HEAPIFIED FROM COMPACT"); + sk2 = getDirectLongsSketch(k, 0); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkSketchInitializeLongHeapifyUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + //println("#### CASE: LONG FULL HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2,true); + wmem = WritableMemory.writableWrap(compBytes); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + //println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + // println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + //println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = getDirectLongsSketch(k, 0); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2,true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test + public void checkMemoryToStringLongUpdatable() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL UPDATABLE"); + sk = getDirectLongsSketch(k, 0); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG EMPTY UPDATABLE"); + sk = getDirectLongsSketch(k, 0); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG SINGLE UPDATABL"); + sk = getDirectLongsSketch(k, 0); + sk.update(1); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.writableWrap(wmem, memReqSvr); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int n1 = 21; + int n2 = 21; + KllLongsSketch sk1 = getDirectLongsSketch(k, 0); + KllLongsSketch sk2 = getDirectLongsSketch(k, 0); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + println(sk1.toString(true, true)); + println(sk2.toString(true, true)); + sk1.merge(sk2); + println(sk1.toString(true, true)); + assertEquals(sk1.getMaxItem(), 121L); + assertEquals(sk1.getMinItem(), 1L); + } + + @Test + public void checkSizes() { + KllLongsSketch sk = getDirectLongsSketch(20, 0); + for (int i = 1; i <= 21; i++) { sk.update(i); } + //println(sk.toString(true, true)); + byte[] byteArr1 = KllHelper.toByteArray(sk, true); + int size1 = sk.currentSerializedSizeBytes(true); + assertEquals(size1, byteArr1.length); + byte[] byteArr2 = sk.toByteArray(); + int size2 = sk.currentSerializedSizeBytes(false); + assertEquals(size2, byteArr2.length); + } + + @Test + public void checkNewInstance() { + int k = 200; + WritableMemory dstMem = WritableMemory.allocate(3000); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(k, dstMem, memReqSvr); + for (int i = 1; i <= 10_000; i++) {sk.update(i); } + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 10000L); + //println(sk.toString(true, true)); + } + + @Test + public void checkDifferentM() { + int k = 20; + int m = 4; + WritableMemory dstMem = WritableMemory.allocate(1000); + KllLongsSketch sk = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, dstMem, memReqSvr); + for (int i = 1; i <= 200; i++) {sk.update(i); } + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getMaxItem(), 200L); + } + + private static KllLongsSketch getDirectLongsSketch(final int k, final int n) { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = KllHelper.toByteArray(sk, true); + WritableMemory wmem = WritableMemory.writableWrap(byteArr); + KllLongsSketch dfsk = KllLongsSketch.writableWrap(wmem, memReqSvr); + return dfsk; + } + + @Test + public void printlnTest() { + String s = "PRINTING: printf in " + this.getClass().getName(); + println(s); + printf("%s\n", s); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ...args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java index e58c27419..4ce988d22 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscDoublesTest.java @@ -100,8 +100,8 @@ public void checkHeapifyExceptions2() { @Test(expectedExceptions = SketchesArgumentException.class) public void checkHeapifyExceptions3() { KllDoublesSketch sk = KllDoublesSketch.newHeapInstance(); - sk.update(1.0f); - sk.update(2.0f); + sk.update(1.0); + sk.update(2.0); WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 5 KllDoublesSketch.heapify(wmem); diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java index acf3343d9..5f51a7f1a 100644 --- a/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java +++ b/src/test/java/org/apache/datasketches/kll/KllMiscItemsTest.java @@ -293,7 +293,7 @@ public void checkSketchInitializeItemsHeap() { final int digits = Util.numDigits(n); KllItemsSketch sk; - println("#### CASE: FLOAT FULL HEAP"); + println("#### CASE: ITEM FULL HEAP"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); for (int i = 1; i <= n; i++) { sk.update(Util.longToFixedLengthString(i, digits)); } println(sk.toString(true, true)); @@ -310,7 +310,7 @@ public void checkSketchInitializeItemsHeap() { assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT HEAP EMPTY"); + println("#### CASE: ITEM HEAP EMPTY"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); println(sk.toString(true, true)); assertEquals(sk.getK(), k); @@ -326,7 +326,7 @@ public void checkSketchInitializeItemsHeap() { assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT HEAP SINGLE"); + println("#### CASE: ITEM HEAP SINGLE"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); sk.update("1"); println(sk.toString(true, true)); @@ -354,7 +354,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { byte[] compBytes; Memory mem; - println("#### CASE: FLOAT FULL HEAPIFIED FROM COMPACT"); + println("#### CASE: ITEM FULL HEAPIFIED FROM COMPACT"); sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); for (int i = 1; i <= n; i++) { sk2.update(Util.longToFixedLengthString(i, digits)); } println(sk2.toString(true, true)); @@ -375,7 +375,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { assertEquals(sk.getNumLevels(), 2); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT EMPTY HEAPIFIED FROM COMPACT"); + println("#### CASE: ITEM EMPTY HEAPIFIED FROM COMPACT"); sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); //println(sk.toString(true, true)); compBytes = sk2.toByteArray(); @@ -395,7 +395,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { assertEquals(sk.getNumLevels(), 1); assertFalse(sk.isLevelZeroSorted()); - println("#### CASE: FLOAT SINGLE HEAPIFIED FROM COMPACT"); + println("#### CASE: ITEM SINGLE HEAPIFIED FROM COMPACT"); sk2 = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); sk2.update("1"); //println(sk2.toString(true, true)); @@ -417,7 +417,7 @@ public void checkSketchInitializeItemsHeapifyCompactMem() { assertFalse(sk.isLevelZeroSorted()); } - //public void checkSketchInitializeFloatHeapifyUpdatableMem() Not Supported + //public void checkSketchInitializeItemHeapifyUpdatableMem() Not Supported @Test //set static enablePrinting = true for visual checking public void checkMemoryToStringItemsCompact() { @@ -431,7 +431,7 @@ public void checkMemoryToStringItemsCompact() { Memory mem; String s; - println("#### CASE: FLOAT FULL COMPACT"); + println("#### CASE: ITEM FULL COMPACT"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); for (int i = 1; i <= n; i++) { sk.update(Util.longToFixedLengthString(i, digits)); } compBytes = sk.toByteArray(); @@ -447,7 +447,7 @@ public void checkMemoryToStringItemsCompact() { println(s); assertEquals(compBytes, compBytes2); - println("#### CASE: FLOAT EMPTY COMPACT"); + println("#### CASE: ITEM EMPTY COMPACT"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); compBytes = sk.toByteArray(); mem = Memory.wrap(compBytes); @@ -462,7 +462,7 @@ public void checkMemoryToStringItemsCompact() { println(s); assertEquals(compBytes, compBytes2); - println("#### CASE: FLOAT SINGLE COMPACT"); + println("#### CASE: ITEM SINGLE COMPACT"); sk = KllItemsSketch.newHeapInstance(k, Comparator.naturalOrder(), serDe); sk.update("1"); compBytes = sk.toByteArray(); diff --git a/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java b/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java new file mode 100644 index 000000000..018ad91db --- /dev/null +++ b/src/test/java/org/apache/datasketches/kll/KllMiscLongsTest.java @@ -0,0 +1,790 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.kll; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.kll.KllDirectLongsSketch.KllDirectCompactLongsSketch; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.MemoryRequestServer; +import org.apache.datasketches.memory.WritableMemory; +import org.apache.datasketches.quantilescommon.LongsSortedView; +import org.apache.datasketches.quantilescommon.LongsSortedViewIterator; +import org.testng.annotations.Test; + +import static org.apache.datasketches.common.Util.LS; +import static org.apache.datasketches.common.Util.bitAt; +import static org.apache.datasketches.kll.KllSketch.SketchType.LONGS_SKETCH; +import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +/** + * @author Lee Rhodes + */ +public class KllMiscLongsTest { + private final MemoryRequestServer memReqSvr = new DefaultMemoryRequestServer(); + + @Test + public void checkSortedViewConstruction() { + final KllLongsSketch kll = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 20; i++) { kll.update(i); } + LongsSortedView fsv = kll.getSortedView(); + long[] cumWeights = fsv.getCumulativeWeights(); + long[] values = fsv.getQuantiles(); + assertEquals(cumWeights.length, 20); + assertEquals(values.length, 20); + for (int i = 0; i < 20; i++) { + assertEquals(cumWeights[i], i + 1); + assertEquals(values[i], i + 1); + } + } + + @Test //set static enablePrinting = true for visual checking + public void checkBounds() { + final KllLongsSketch kll = KllLongsSketch.newHeapInstance(); //default k = 200 + for (int i = 0; i < 1000; i++) { + kll.update(i); + } + final double eps = kll.getNormalizedRankError(false); + final long est = kll.getQuantile(0.5); + final long ub = kll.getQuantileUpperBound(0.5); + final long lb = kll.getQuantileLowerBound(0.5); + assertEquals(ub, kll.getQuantile(.5 + eps)); + assertEquals(lb, kll.getQuantile(0.5 - eps)); + println("Ext : " + est); + println("UB : " + ub); + println("LB : " + lb); + final double rest = kll.getRank(est); + final double restUB = kll.getRankUpperBound(rest); + final double restLB = kll.getRankLowerBound(rest); + assertTrue(restUB - rest < (2 * eps)); + assertTrue(rest - restLB < (2 * eps)); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions1() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(6, (byte) 3); //corrupt with odd M + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions2() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 2 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions3() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + sk.update(1); + sk.update(2); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(0, (byte) 1); //corrupt preamble ints, should be 5 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions4() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(1, (byte) 0); //corrupt SerVer, should be 1 or 2 + KllLongsSketch.heapify(wmem); + } + + @Test(expectedExceptions = SketchesArgumentException.class) + public void checkHeapifyExceptions5() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(); + WritableMemory wmem = WritableMemory.writableWrap(sk.toByteArray()); + wmem.putByte(2, (byte) 0); //corrupt FamilyID, should be 15 + KllLongsSketch.heapify(wmem); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMisc() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(8); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) {} //empty + println(sk.toString(true, true)); + for (int i = 0; i < 20; i++) { sk.update(i); } + println(sk.toString(true, true)); + sk.toByteArray(); + final long[] items = sk.getLongItemsArray(); + assertEquals(items.length, 16); + final int[] levels = sk.getLevelsArray(sk.sketchStructure); + assertEquals(levels.length, 3); + assertEquals(sk.getNumLevels(), 2); + } + + @Test //set static enablePrinting = true for visual checking + public void visualCheckToString() { + final KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + int n = 21; + for (int i = 1; i <= n; i++) { sk.update(i); } + println(sk.toString(true, true)); + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getNumRetained(), 11); + + final KllLongsSketch sk2 = KllLongsSketch.newHeapInstance(20); + n = 400; + for (int i = 101; i <= n + 100; i++) { sk2.update(i); } + println(LS + sk2.toString(true, true)); + assertEquals(sk2.getNumLevels(), 5); + assertEquals(sk2.getMinItem(), 101); + assertEquals(sk2.getMaxItem(), 500); + assertEquals(sk2.getNumRetained(), 52); + + sk2.merge(sk); + println(LS + sk2.toString(true, true)); + assertEquals(sk2.getNumLevels(), 5); + assertEquals(sk2.getMinItem(), 1); + assertEquals(sk2.getMaxItem(), 500); + assertEquals(sk2.getNumRetained(), 56); + } + + @Test //set static enablePrinting = true for visual checking + public void viewHeapCompactions() { + int k = 20; + int n = 108; + boolean withLevels = false; + boolean withLevelsAndItems = true; + int compaction = 0; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + sk.update(++i); + println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void viewDirectCompactions() { + int k = 20; + int n = 108; + boolean withLevels = false; + boolean withLevelsAndItems = true; + int compaction = 0; + int sizeBytes = KllSketch.getMaxSerializedSizeBytes(k, n, LONGS_SKETCH, true); + WritableMemory wmem = WritableMemory.allocate(sizeBytes); + KllLongsSketch sk = KllLongsSketch.newDirectInstance(k, wmem, memReqSvr); + for (int i = 1; i <= n; i++) { + sk.update(i); + if (sk.levelsArr[0] == 0) { + println(LS + "#<<< BEFORE COMPACTION # " + (++compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + sk.update(++i); + println(LS + "#<<< AFTER COMPACTION # " + (compaction) + " >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + assertEquals(sk.getLongItemsArray()[sk.levelsArr[0]], i); + } + } + println(LS + "#<<< END STATE # >>>"); + println(sk.toString(withLevels, withLevelsAndItems)); + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void viewCompactionAndSortedView() { + int n = 43; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= n; i++) { sk.update(i); } + println(sk.toString(true, true)); + LongsSortedView sv = sk.getSortedView(); + LongsSortedViewIterator itr = sv.iterator(); + println("### SORTED VIEW"); + printf("%6s %12s %12s" + LS, "Idx", "Value", "Weight"); + int i = 0; + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + printf("%6d %12d %12d" + LS, i, v, wt); + i++; + } + assertEquals(sv.getMinItem(), 1L); + assertEquals(sv.getMaxItem(), n); + } + + @Test //set static enablePrinting = true for visual checking + public void checkWeightedUpdates1() { + int k = 20; + int weight = 127; + long item = 10; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + println(sk.toString(true, true)); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 7); + assertEquals(sk.getN(), weight); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 14); + assertEquals(sk.getN(), 254); + } + + @Test //set static enablePrinting = true for visual checking + public void checkWeightedUpdates2() { + int k = 20; + int initial = 1000; + int weight = 127; + long item = 10; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= initial; i++) { sk.update(i + 1000); } + println(sk.toString(true, true)); + sk.update(item, weight); + println(sk.toString(true, true)); + assertEquals(sk.getNumRetained(), 65); + assertEquals(sk.getN(), 1127); + + LongsSortedViewIterator itr = sk.getSortedView().iterator(); + println("### SORTED VIEW"); + printf("%12s %12s %12s" + LS, "Value", "Weight", "NaturalRank"); + long cumWt = 0; + while (itr.next()) { + long v = itr.getQuantile(); + long wt = itr.getWeight(); + long natRank = itr.getNaturalRank(INCLUSIVE); + cumWt += wt; + assertEquals(cumWt, natRank); + printf("%12d %12d %12d" + LS, v, wt, natRank); + } + assertEquals(cumWt, sk.getN()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateItemsArray() { //used with weighted updates + long item = 10; + int weight = 108; + long[] itemsArr = KllLongsHelper.createItemsArray(item, weight); + assertEquals(itemsArr.length, 4); + for (int i = 0; i < itemsArr.length; i++) { itemsArr[i] = item; } + outputItems(itemsArr); + } + + private static void outputItems(long[] itemsArr) { + String[] hdr2 = {"Index", "Value"}; + String hdr2fmt = "%6s %15s" + LS; + String d2fmt = "%6d %15d" + LS; + println("ItemsArr"); + printf(hdr2fmt, (Object[]) hdr2); + for (int i = 0; i < itemsArr.length; i++) { + printf(d2fmt, i, itemsArr[i]); + } + println(""); + } + + @Test //set static enablePrinting = true for visual checking + public void checkCreateLevelsArray() { //used with weighted updates + int weight = 108; + int[] levelsArr = KllHelper.createLevelsArray(weight); + assertEquals(levelsArr.length, 8); + int[] correct = {0,0,0,1,2,2,3,4}; + for (int i = 0; i < levelsArr.length; i++) { + assertEquals(levelsArr[i], correct[i]); + } + outputLevels(weight, levelsArr); + } + + private static void outputLevels(int weight, int[] levelsArr) { + String[] hdr = {"Lvl", "StartAdr", "BitPattern", "Weight"}; + String hdrfmt = "%3s %9s %10s %s" + LS; + String dfmt = "%3d %9d %10d %d" + LS; + String dfmt_2 = "%3d %9d %s" + LS; + println("Count = " + weight + " => " + (Integer.toBinaryString(weight))); + println("LevelsArr"); + printf(hdrfmt, (Object[]) hdr); + for (int i = 0; i < levelsArr.length; i++) { + if (i == levelsArr.length - 1) { printf(dfmt_2, i, levelsArr[i], "ItemsArr.length"); } + else { + int j = bitAt(weight, i); + printf(dfmt, i, levelsArr[i], j, 1 << (i)); + } + } + println(""); + } + + @Test + public void viewMemorySketchData() { + int k = 20; + int n = 109; + boolean withLevels = true; + boolean withLevelsAndItems = true; + KllLongsSketch sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= n; i++) { sk.update(i); } + byte[] byteArr = sk.toByteArray(); + Memory mem = Memory.wrap(byteArr); + KllLongsSketch fltSk = KllLongsSketch.wrap(mem); + println(fltSk.toString(withLevels, withLevelsAndItems)); + assertEquals(fltSk.getN(), n); + } + + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAux() { + String[] hdr = {"level", "depth", "wt", "cap", "(end)", "MaxN"}; + String hdrFmt = "%6s %6s %28s %10s %10s %34s" + LS; + String dataFmt = "%6d %6d %,28d %,10d %,10d %,34d" + LS; + int k = 1000; + int m = 8; + int numLevels = 20; + println("k=" + k + ", m=" + m + ", numLevels=" + numLevels); + printf(hdrFmt, (Object[]) hdr); + long maxN = 0; + long[] correct = {0,1,1,2,2,3,5,8,12,17,26,39,59,88,132,198,296,444,667,1000}; + for (int i = 0; i < numLevels; i++) { + int depth = numLevels - i - 1; + long cap = KllHelper.intCapAux(k, depth); + long end = Math.max(m, cap); + long wt = 1L << i; + maxN += wt * end; + printf(dataFmt, i, depth, wt, cap, end, maxN); + assertEquals(cap, correct[i]); + } + } + + @Test //set static enablePrinting = true for visual checking + public void checkIntCapAuxAux() { + String[] hdr = {"d","twoK","2k*2^d","3^d","tmp=2k*2^d/3^d","(tmp + 1)/2", "(end)"}; + String hdrFmt = "%6s %10s %20s %20s %15s %12s %10s" + LS; + String dataFmt = "%6d %10d %,20d %,20d %15d %12d %10d" + LS; + long k = (1L << 16) - 1L; + long m = 8; + println("k = " + k + ", m = " + m); + printf(hdrFmt, (Object[]) hdr); + long[] correct = + {65535,43690,29127,19418,12945,8630,5753,3836,2557,1705,1136,758,505,337,224,150,100,67,44,30,20,13,9,6,4,3,2,1,1,1,0}; + for (int i = 0; i < 31; i++) { + long twoK = k << 1; + long twoKxtwoD = twoK << i; + long threeToD = KllHelper.powersOfThree[i]; + long tmp = twoKxtwoD / threeToD; + long result = (tmp + 1L) >>> 1; + long end = Math.max(m, result); //performed later + printf(dataFmt, i, twoK, twoKxtwoD, threeToD, tmp, result, end); + assertEquals(result,correct[i]); + assertEquals(result, KllHelper.intCapAuxAux(k, i)); + } + } + + @Test + public void checkGrowLevels() { + KllLongsSketch sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= 21; i++) { sk.update(i); } + assertEquals(sk.getNumLevels(), 2); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure)[2], 33); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeap() { + int k = 20; //don't change this + KllLongsSketch sk; + + println("#### CASE: LONG FULL HEAP"); + sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG HEAP EMPTY"); + sk = KllLongsSketch.newHeapInstance(k); + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG HEAP SINGLE"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + println(sk.toString(true, true)); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeapifyCompactMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM COMPACT"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk2.toString(true, true)); + compBytes = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllLongsSketch.heapify(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkSketchInitializeLongHeapifyUpdatableMem() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + WritableMemory wmem; + + println("#### CASE: LONG FULL HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk2.update(i); } + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), k + 1); + assertEquals(sk.getNumRetained(), 11); + assertFalse(sk.isEmpty()); + assertTrue(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 33); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 3); + assertEquals(sk.getMaxItem(), 21); + assertEquals(sk.getMinItem(), 1); + assertEquals(sk.getNumLevels(), 2); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG EMPTY HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 0); + assertEquals(sk.getNumRetained(), 0); + assertTrue(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + try { sk.getMaxItem(); fail(); } catch (SketchesArgumentException e) { } + try { sk.getMinItem(); fail(); } catch (SketchesArgumentException e) { } + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + + println("#### CASE: LONG SINGLE HEAPIFIED FROM UPDATABLE"); + sk2 = KllLongsSketch.newHeapInstance(k); + sk2.update(1); + //println(sk.toString(true, true)); + compBytes = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(compBytes); + //println(KllPreambleUtil.toString(wmem, LONGS_SKETCH, true)); + sk = KllHeapLongsSketch.heapifyImpl(wmem); + assertEquals(sk.getK(), k); + assertEquals(sk.getN(), 1); + assertEquals(sk.getNumRetained(), 1); + assertFalse(sk.isEmpty()); + assertFalse(sk.isEstimationMode()); + assertEquals(sk.getMinK(), k); + assertEquals(sk.getLongItemsArray().length, 20); + assertEquals(sk.getLevelsArray(sk.sketchStructure).length, 2); + assertEquals(sk.getMaxItem(), 1L); + assertEquals(sk.getMinItem(), 1L); + assertEquals(sk.getNumLevels(), 1); + assertFalse(sk.isLevelZeroSorted()); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMemoryToStringLongCompact() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] compBytes; + byte[] compBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: LONG EMPTY COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + + println("#### CASE: LONG SINGLE COMPACT"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + compBytes = sk.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllLongsSketch.heapify(wmem); + compBytes2 = sk2.toByteArray(); + wmem = WritableMemory.writableWrap(compBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(compBytes, compBytes2); + } + + @Test //set static enablePrinting = true for visual checking + public void checkMemoryToStringLongUpdatable() { + int k = 20; //don't change this + KllLongsSketch sk; + KllLongsSketch sk2; + byte[] upBytes; + byte[] upBytes2; + WritableMemory wmem; + String s; + + println("#### CASE: LONG FULL UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(20); + for (int i = 1; i <= k + 1; i++) { sk.update(i); } + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); //note: heapify does not copy free space, while toUpdatableByteArray does + assertEquals(sk.getN(), sk2.getN()); + assertEquals(sk.getMinItem(), sk2.getMinItem()); + assertEquals(sk.getMaxItem(), sk2.getMaxItem()); + assertEquals(sk.getNumRetained(), sk2.getNumRetained()); + + println("#### CASE: LONG EMPTY UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(k); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + + println("#### CASE: LONG SINGLE UPDATABLE"); + sk = KllLongsSketch.newHeapInstance(k); + sk.update(1); + upBytes = KllHelper.toByteArray(sk, true); + wmem = WritableMemory.writableWrap(upBytes); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 1: sketch to byte[]/memory & analyze memory"); + println(s); + sk2 = KllHeapLongsSketch.heapifyImpl(wmem); + upBytes2 = KllHelper.toByteArray(sk2, true); + wmem = WritableMemory.writableWrap(upBytes2); + s = KllPreambleUtil.toString(wmem, LONGS_SKETCH, true); + println("step 2: memory to heap sketch, to byte[]/memory & analyze memory. Should match above"); + println(s); + assertEquals(upBytes, upBytes2); + } + + @Test + public void checkSimpleMerge() { + int k = 20; + int m = 8; + int n1 = 21; + int n2 = 43; + WritableMemory wmem = WritableMemory.allocate(3000); + WritableMemory wmem2 = WritableMemory.allocate(3000); + + KllLongsSketch sk1 = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, wmem, memReqSvr); + KllLongsSketch sk2 = KllDirectLongsSketch.newDirectUpdatableInstance(k, m, wmem2, memReqSvr); + for (int i = 1; i <= n1; i++) { + sk1.update(i); + } + for (int i = 1; i <= n2; i++) { + sk2.update(i + 100); + } + sk1.merge(sk2); + assertEquals(sk1.getMinItem(), 1L); + assertEquals(sk1.getMaxItem(), 143L); + } + + @Test + public void checkGetSingleItem() { + int k = 20; + KllLongsSketch skHeap = KllLongsSketch.newHeapInstance(k); + skHeap.update(1); + assertTrue(skHeap instanceof KllHeapLongsSketch); + assertEquals(skHeap.getLongSingleItem(), 1L); + + WritableMemory srcMem = WritableMemory.writableWrap(KllHelper.toByteArray(skHeap, true)); + KllLongsSketch skDirect = KllLongsSketch.writableWrap(srcMem, memReqSvr); + assertTrue(skDirect instanceof KllDirectLongsSketch); + assertEquals(skDirect.getLongSingleItem(), 1L); + + Memory srcMem2 = Memory.wrap(skHeap.toByteArray()); + KllLongsSketch skCompact = KllLongsSketch.wrap(srcMem2); + assertTrue(skCompact instanceof KllDirectCompactLongsSketch); + assertEquals(skCompact.getLongSingleItem(), 1L); + } + + @Test + public void printlnTest() { + String s = "PRINTING: printf in " + this.getClass().getName(); + println(s); + printf("%s" + LS, s); + } + + private final static boolean enablePrinting = false; + + /** + * @param format the format + * @param args the args + */ + private static final void printf(final String format, final Object ... args) { + if (enablePrinting) { System.out.printf(format, args); } + } + + /** + * @param o the Object to println + */ + private static final void println(final Object o) { + if (enablePrinting) { System.out.println(o.toString()); } + } + +} diff --git a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java index bbf89f338..636105ef8 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DebugUnionTest.java @@ -23,10 +23,14 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; import java.util.HashSet; import org.testng.annotations.Test; -import org.apache.datasketches.memory.WritableHandle; + +import jdk.incubator.foreign.ResourceScope; + +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.QuantilesDoublesSketchIterator; @@ -61,8 +65,8 @@ public void test() { DoublesSketch.setRandom(1); //make deterministic for test DoublesUnion dUnion; DoublesSketch dSketch; - try ( WritableHandle wdh = WritableMemory.allocateDirect(10_000_000) ) { - WritableMemory wmem = wdh.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(10_000_000)).scope()) { dUnion = DoublesUnion.builder().setMaxK(8).build(wmem); for (int s = 0; s < numSketches; s++) { dUnion.union(sketchArr[s]); } dSketch = dUnion.getResult(); //result is on heap diff --git a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java index 5c2882c5e..d896bbefa 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DirectQuantilesMemoryRequestTest.java @@ -30,9 +30,10 @@ import org.testng.annotations.Test; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import jdk.incubator.foreign.ResourceScope; + /** * The concept for these tests is that the "MemoryManager" classes below are proxies for the * implementation that owns the native memory allocations, thus is responsible for @@ -44,38 +45,35 @@ public class DirectQuantilesMemoryRequestTest { public void checkLimitedMemoryScenarios() { //Requesting application final int k = 128; final int u = 40 * k; - final int initBytes = ((2 * k) + 4) << 3; //just the BB + final int initBytes = ((2 * k) + 4) << 3; //just the BaseBuffer //########## Owning Implementation - // This part would actually be part of the Memory owning implemention so it is faked here - try (WritableHandle wdh = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - final WritableMemory wmem = wdh.getWritable(); - println("Initial mem size: " + wmem.getCapacity()); - - //########## Receiving Application - // The receiving application has been given wmem to use for a sketch, - // but alas, it is not ultimately large enough. - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); - assertTrue(usk1.isEmpty()); - - //Load the sketch - for (int i = 0; i < u; i++) { - // The sketch uses The MemoryRequest, acquired from wmem, to acquire more memory as - // needed, and requests via the MemoryRequest to free the old allocations. - usk1.update(i); - } - final double result = usk1.getQuantile(0.5); - println("Result: " + result); - assertEquals(result, u / 2.0, 0.05 * u); //Success - - //########## Owning Implementation - //The actual Memory has been re-allocated several times, - // so the above wmem reference is invalid. - println("\nFinal mem size: " + wmem.getCapacity()); - } catch (Exception e) { - throw new RuntimeException(e); + // This part would actually be part of the Memory owning implementation so it is faked here + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + println("Initial mem size: " + wmem.getCapacity()); + + //########## Receiving Application + // The receiving application has been given wmem to use for a sketch, + // but alas, it is not ultimately large enough. + final UpdateDoublesSketch usk = DoublesSketch.builder().setK(k).build(wmem); + assertTrue(usk.isEmpty()); + + //Load the sketch + for (int i = 0; i < u; i++) { + // The sketch uses The MemoryRequest, acquired from wmem, to acquire more memory as + // needed, and requests via the MemoryRequest to free the old allocations. + usk.update(i); } + final double result = usk.getQuantile(0.5); + println("Result: " + result); + assertEquals(result, u / 2.0, 0.05 * u); //Success + + //########## Owning Implementation + //The actual Memory has been re-allocated several times, + // so the above wmem reference is invalid. + println("\nFinal mem size: " + wmem.getCapacity()); + assertFalse(wmem2.isAlive()); } @Test @@ -84,22 +82,17 @@ public void checkGrowBaseBuf() { final int u = 32; // don't need the BB to fill here final int initBytes = (4 + (u / 2)) << 3; // not enough to hold everything - try (WritableHandle memHandler = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - //final MemoryManager memMgr = new MemoryManager(); - //final WritableMemory mem1 = memMgr.request(initBytes); - final WritableMemory mem1 = memHandler.getWritable(); - println("Initial mem size: " + mem1.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(mem1); - for (int i = 1; i <= u; i++) { - usk1.update(i); - } - final int currentSpace = usk1.getCombinedBufferItemCapacity(); - println("curCombBufItemCap: " + currentSpace); - assertEquals(currentSpace, 2 * k); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); + for (int i = 1; i <= u; i++) { + usk1.update(i); } + final int currentSpace = usk1.getCombinedBufferItemCapacity(); + println("curCombBufItemCap: " + currentSpace); + assertEquals(currentSpace, 2 * k); + assertFalse(wmem2.isAlive()); } @Test @@ -108,26 +101,20 @@ public void checkGrowCombBuf() { final int u = (2 * k) - 1; //just to fill the BB final int initBytes = ((2 * k) + 4) << 3; //just room for BB - try (WritableHandle memHandler = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - //final MemoryManager memMgr = new MemoryManager(); - //final WritableMemory mem1 = memMgr.request(initBytes); - final WritableMemory mem1 = memHandler.getWritable(); - println("Initial mem size: " + mem1.getCapacity()); - final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(mem1); - for (int i = 1; i <= u; i++) { - usk1.update(i); - } - final int currentSpace = usk1.getCombinedBufferItemCapacity(); - println("curCombBufItemCap: " + currentSpace); - final double[] newCB = usk1.growCombinedBuffer(currentSpace, 3 * k); - final int newSpace = usk1.getCombinedBufferItemCapacity(); - println("newCombBurItemCap: " + newSpace); - assertEquals(newCB.length, 3 * k); - //memMgr.free(mem1); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + println("Initial mem size: " + wmem.getCapacity()); + final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(wmem); + for (int i = 1; i <= u; i++) { + usk1.update(i); } + final int currentSpace = usk1.getCombinedBufferItemCapacity(); + println("curCombBufItemCap: " + currentSpace); + final double[] newCB = usk1.growCombinedBuffer(currentSpace, 3 * k); + final int newSpace = usk1.getCombinedBufferItemCapacity(); + println("newCombBurItemCap: " + newSpace); + assertEquals(newCB.length, 3 * k); + assertFalse(wmem2.isAlive()); } @Test @@ -138,28 +125,25 @@ public void checkGrowFromWrappedEmptySketch() { final UpdateDoublesSketch usk1 = DoublesSketch.builder().setK(k).build(); final Memory origSketchMem = Memory.wrap(usk1.toByteArray()); - try (WritableHandle memHandle = WritableMemory.allocateDirect(initBytes, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory mem = memHandle.getWritable(); - origSketchMem.copyTo(0, mem, 0, initBytes); - UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(mem); - assertTrue(mem.isSameResource(usk2.getMemory())); - assertEquals(mem.getCapacity(), initBytes); - assertTrue(mem.isDirect()); - assertTrue(usk2.isEmpty()); - - //update the sketch forcing it to grow on-heap - for (int i = 1; i <= 5; i++) { usk2.update(i); } - assertEquals(usk2.getN(), 5); - WritableMemory mem2 = usk2.getMemory(); - assertFalse(mem.isSameResource(mem2)); - assertFalse(mem2.isDirect()); //should now be on-heap - - final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); - assertEquals(mem2.getCapacity(), expectedSize); - } catch (final Exception e) { - throw new RuntimeException(e); - } + WritableMemory wmem = WritableMemory.allocateDirect(initBytes, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + origSketchMem.copyTo(0, wmem, 0, initBytes); + UpdateDoublesSketch usk2 = DirectUpdateDoublesSketch.wrapInstance(wmem); + assertTrue(wmem.isSameResource(usk2.getMemory())); + assertEquals(wmem.getCapacity(), initBytes); + assertTrue(wmem.isDirect()); + assertTrue(usk2.isEmpty()); + + //update the sketch forcing it to grow on-heap + for (int i = 1; i <= 5; i++) { usk2.update(i); } + assertEquals(usk2.getN(), 5); + WritableMemory mem2 = usk2.getMemory(); + assertFalse(wmem.isSameResource(mem2)); + assertFalse(mem2.isDirect()); //should now be on-heap + + final int expectedSize = COMBINED_BUFFER + ((2 * k) << 3); + assertEquals(mem2.getCapacity(), expectedSize); + assertFalse(wmem2.isAlive()); } @Test diff --git a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java index 5bc5b4eaa..8cdc7bf71 100644 --- a/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/DoublesSketchTest.java @@ -28,13 +28,14 @@ import java.nio.ByteOrder; import org.apache.datasketches.memory.DefaultMemoryRequestServer; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.quantilescommon.DoublesSortedView; import org.apache.datasketches.quantilescommon.DoublesSortedViewIterator; import org.testng.Assert; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + public class DoublesSketchTest { @Test @@ -140,46 +141,42 @@ public void checkEmptyExceptions() { @Test public void directSketchShouldMoveOntoHeapEventually() { - try (WritableHandle wdh = WritableMemory.allocateDirect(1000, - ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); - Assert.assertTrue(sketch.isSameResource(mem)); - for (int i = 0; i < 1000; i++) { - sketch.update(i); - } - Assert.assertFalse(sketch.isSameResource(mem)); - } catch (final Exception e) { - throw new RuntimeException(e); + WritableMemory wmem = WritableMemory.allocateDirect(1000, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); + for (int i = 0; i < 1000; i++) { + sketch.update(i); } + Assert.assertFalse(sketch.isSameResource(wmem)); + Assert.assertFalse(wmem2.isAlive()); } @Test public void directSketchShouldMoveOntoHeapEventually2() { int i = 0; - try (WritableHandle wdh = - WritableMemory.allocateDirect(50, ByteOrder.LITTLE_ENDIAN, new DefaultMemoryRequestServer())) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); - Assert.assertTrue(sketch.isSameResource(mem)); - for (; i < 1000; i++) { - if (sketch.isSameResource(mem)) { - sketch.update(i); - } else { - //println("MOVED OUT at i = " + i); - break; - } + WritableMemory wmem = WritableMemory.allocateDirect(50, 1, ByteOrder.nativeOrder(), new DefaultMemoryRequestServer()); + WritableMemory wmem2 = wmem; + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); + Assert.assertTrue(sketch.isSameResource(wmem)); + for (; i < 1000; i++) { + if (sketch.isSameResource(wmem)) { + sketch.update(i); + } else { + //println("MOVED OUT at i = " + i); + break; } - } catch (final Exception e) { - throw new RuntimeException(e); } + Assert.assertFalse(wmem2.isAlive()); } @Test public void checkEmptyDirect() { - try (WritableHandle wdh = WritableMemory.allocateDirect(1000)) { - WritableMemory mem = wdh.getWritable(); - UpdateDoublesSketch sketch = DoublesSketch.builder().build(mem); + WritableMemory wmem ; + try (ResourceScope scope = (wmem = WritableMemory.allocateDirect(1000, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + + UpdateDoublesSketch sketch = DoublesSketch.builder().build(wmem); sketch.toByteArray(); //exercises a specific path } catch (final Exception e) { throw new RuntimeException(e); diff --git a/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java b/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java index be2f328b2..adf916ef3 100644 --- a/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/PreambleUtilTest.java @@ -37,19 +37,24 @@ import static org.apache.datasketches.quantiles.PreambleUtil.insertSerVer; import static org.testng.Assert.assertEquals; -import org.testng.annotations.Test; +import java.nio.ByteOrder; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; +import org.testng.annotations.Test; + +import jdk.incubator.foreign.ResourceScope; public class PreambleUtilTest { @Test public void checkInsertsAndExtracts() { final int bytes = 32; - try (WritableHandle offHeapMemHandler = WritableMemory.allocateDirect(bytes)) { - final WritableMemory offHeapMem = offHeapMemHandler.getWritable(); + WritableMemory offHeapMem; + try (ResourceScope scope = (offHeapMem = WritableMemory.allocateDirect(bytes, 1, + ByteOrder.nativeOrder(), new DefaultMemoryRequestServer())).scope()) { + final WritableMemory onHeapMem = WritableMemory.writableWrap(new byte[bytes]); onHeapMem.clear(); diff --git a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java index 8c80f4399..68347ffb8 100644 --- a/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/quantiles/QuantilesSketchCrossLanguageTest.java @@ -249,7 +249,7 @@ private static void getAndCheck(String ver, int n, double quantile) { Assert.assertEquals(q2, quantile, 0.0); // same thing with compact sketch - qs2 = CompactDoublesSketch.heapify(srcMem); + qs2 = HeapCompactDoublesSketch.heapifyInstance(srcMem); //Test the quantile q2 = qs2.getQuantile(nf, EXCLUSIVE); println("New Median: " + q2); diff --git a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java index db043cff6..e1c6914c7 100644 --- a/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java +++ b/src/test/java/org/apache/datasketches/tdigest/TDigestDoubleTest.java @@ -41,6 +41,8 @@ public void empty() { assertThrows(SketchesStateException.class, () -> td.getMaxValue()); assertThrows(SketchesStateException.class, () -> td.getRank(0)); assertThrows(SketchesStateException.class, () -> td.getQuantile(0.5)); + assertThrows(SketchesStateException.class, () -> td.getPMF(new double[]{0})); + assertThrows(SketchesStateException.class, () -> td.getCDF(new double[]{0})); } @Test @@ -82,6 +84,14 @@ public void manyValues() { assertEquals(td.getQuantile(0.9), n * 0.9, n * 0.9 * 0.01); assertEquals(td.getQuantile(0.95), n * 0.95, n * 0.95 * 0.01); assertEquals(td.getQuantile(1), n - 1); + final double[] pmf = td.getPMF(new double[] {n / 2}); + assertEquals(pmf.length, 2); + assertEquals(pmf[0], 0.5, 0.0001); + assertEquals(pmf[1], 0.5, 0.0001); + final double[] cdf = td.getCDF(new double[] {n / 2}); + assertEquals(cdf.length, 2); + assertEquals(cdf[0], 0.5, 0.0001); + assertEquals(cdf[1], 1.0); } @Test diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java index 42b6069da..188dbf427 100644 --- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java @@ -26,13 +26,17 @@ import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; +import java.nio.ByteOrder; + import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + /** * @author Lee Rhodes */ @@ -78,8 +82,8 @@ public void checkHeapifyWrap(int k, int u, boolean ordered) { //Prepare Memory for direct int bytes = usk.getCompactBytes(); //for Compact - try (WritableHandle wdh = WritableMemory.allocateDirect(bytes)) { - WritableMemory directMem = wdh.getWritable(); + WritableMemory directMem; + try (ResourceScope scope = (directMem = WritableMemory.allocateDirect(bytes)).scope()) { /**Via CompactSketch.compact**/ refSk = usk.compact(ordered, directMem); diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java index e4c112281..6d6af7047 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java @@ -119,7 +119,6 @@ public void checkHeapifyByteArrayExact() { // That is, this is being run for its side-effect of accessing things. // If something is wonky, it will generate an exception and fail the test. local2.toString(true, true, 8, true); - } @Test diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java index 2261edc3b..84ddcb80e 100644 --- a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java @@ -114,7 +114,7 @@ public void checkIllegalSketchID_UpdateSketch() { WritableMemory mem = WritableMemory.writableWrap(byteArray); mem.putByte(FAMILY_BYTE, (byte) 0); //corrupt the Sketch ID byte - //try to heapify the corruped mem + //try to heapify the corrupted mem Sketch.heapify(mem, sl.seed); } diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java index 5191c7c68..f36597b7c 100644 --- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java +++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java @@ -38,19 +38,22 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; +import java.nio.ByteOrder; import java.util.Arrays; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.ResizeFactor; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; +import org.apache.datasketches.memory.DefaultMemoryRequestServer; import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableHandle; import org.apache.datasketches.memory.WritableMemory; import org.apache.datasketches.thetacommon.HashOperations; import org.apache.datasketches.thetacommon.ThetaUtil; import org.testng.annotations.Test; +import jdk.incubator.foreign.ResourceScope; + /** * @author Lee Rhodes */ @@ -59,10 +62,10 @@ public class DirectQuickSelectSketchTest { @Test//(expectedExceptions = SketchesArgumentException.class) public void checkBadSerVer() { int k = 512; - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { - UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(mem); + UpdateSketch usk = UpdateSketch.builder().setNominalEntries(k).build(wmem); DirectQuickSelectSketch sk1 = (DirectQuickSelectSketch)usk; //for internal checks assertTrue(usk.isEmpty()); @@ -73,33 +76,33 @@ public void checkBadSerVer() { assertEquals(usk.getEstimate(), k, 0.0); assertEquals(sk1.getRetainedEntries(false), k); - mem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte + wmem.putByte(SER_VER_BYTE, (byte) 0); //corrupt the SerVer byte - Sketch.wrap(mem); + Sketch.wrap(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } } } - @Test//(expectedExceptions = SketchesArgumentException.class) + @Test public void checkConstructorKtooSmall() { int k = 8; - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); - UpdateSketch.builder().setNominalEntries(k).build(mem); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { + UpdateSketch.builder().setNominalEntries(k).build(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } } } - @Test//(expectedExceptions = SketchesArgumentException.class) + @Test public void checkConstructorMemTooSmall() { int k = 16; - try (WritableHandle h = makeNativeMemory(k/2)) { - WritableMemory mem = h.getWritable(); - UpdateSketch.builder().setNominalEntries(k).build(mem); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k/2)).scope()) { + UpdateSketch.builder().setNominalEntries(k).build(wmem); } catch (final Exception e) { if (e instanceof SketchesArgumentException) {} else { throw new RuntimeException(e); } @@ -124,10 +127,10 @@ public void checkHeapifyMemoryEstimating() { int k = 512; int u = 2*k; //thus estimating - try (WritableHandle h = makeNativeMemory(k)) { - WritableMemory mem = h.getWritable(); + WritableMemory wmem; + try (ResourceScope scope = (wmem = makeNativeMemory(k)).scope()) { - UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(k).build(mem); + UpdateSketch sk1 = UpdateSketch.builder().setNominalEntries(k).build(wmem); for (int i=0; i + diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml index 873a878a0..184a05ad3 100644 --- a/tools/SketchesCheckstyle.xml +++ b/tools/SketchesCheckstyle.xml @@ -230,7 +230,7 @@ under the License. - +