From c8fedfff8a37b702b5530c4c7c40b27bb37b430a Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Fri, 15 Mar 2024 12:02:36 -0400 Subject: [PATCH 1/7] DOC v24.06 Updates [skip ci] --- .../cuda11.8-conda/devcontainer.json | 4 +-- .devcontainer/cuda11.8-pip/devcontainer.json | 8 +++--- .../cuda12.2-conda/devcontainer.json | 4 +-- .devcontainer/cuda12.2-pip/devcontainer.json | 8 +++--- .github/workflows/build.yaml | 16 ++++++------ .github/workflows/pr.yaml | 26 +++++++++---------- .github/workflows/test.yaml | 10 +++---- README.md | 2 +- VERSION | 2 +- .../all_cuda-118_arch-aarch64.yaml | 8 +++--- .../all_cuda-118_arch-x86_64.yaml | 8 +++--- .../all_cuda-122_arch-aarch64.yaml | 8 +++--- .../all_cuda-122_arch-x86_64.yaml | 8 +++--- .../bench_ann_cuda-118_arch-aarch64.yaml | 2 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 2 +- .../bench_ann_cuda-120_arch-aarch64.yaml | 2 +- .../bench_ann_cuda-120_arch-x86_64.yaml | 2 +- .../recipes/raft-dask/conda_build_config.yaml | 2 +- .../cmake/thirdparty/fetch_rapids.cmake | 2 +- dependencies.yaml | 24 ++++++++--------- docs/source/build.md | 2 +- docs/source/developer_guide.md | 8 +++--- docs/source/raft_ann_benchmarks.md | 12 ++++----- python/pylibraft/pyproject.toml | 4 +-- python/raft-ann-bench/pyproject.toml | 2 +- python/raft-dask/pyproject.toml | 8 +++--- 26 files changed, 92 insertions(+), 92 deletions(-) diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 2682510ed1..3f84407d41 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.04-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-llvm16-cuda11.8-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index de039eeb11..c24cddd78e 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,22 +5,22 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.04-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda11.8-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.4": { + "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { "version": "1.14.1" }, - "ghcr.io/rapidsai/devcontainers/features/cuda:24.4": { + "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "11.8", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.devcontainer/cuda12.2-conda/devcontainer.json b/.devcontainer/cuda12.2-conda/devcontainer.json index 4b24d94dd1..1846d0eac3 100644 --- a/.devcontainer/cuda12.2-conda/devcontainer.json +++ b/.devcontainer/cuda12.2-conda/devcontainer.json @@ -5,12 +5,12 @@ "args": { "CUDA": "12.2", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.04-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-mambaforge-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.2-pip/devcontainer.json b/.devcontainer/cuda12.2-pip/devcontainer.json index 489546cb21..291ee56e7f 100644 --- a/.devcontainer/cuda12.2-pip/devcontainer.json +++ b/.devcontainer/cuda12.2-pip/devcontainer.json @@ -5,22 +5,22 @@ "args": { "CUDA": "12.2", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.04-cpp-cuda12.2-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.06-cpp-cuda12.2-ubuntu22.04" } }, "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/ucx:24.4": { + "ghcr.io/rapidsai/devcontainers/features/ucx:24.6": { "version": "1.14.1" }, - "ghcr.io/rapidsai/devcontainers/features/cuda:24.4": { + "ghcr.io/rapidsai/devcontainers/features/cuda:24.6": { "version": "12.2", "installcuBLAS": true, "installcuSOLVER": true, "installcuRAND": true, "installcuSPARSE": true }, - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.4": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.6": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/ucx", diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index bd8b13d21e..c8837afba7 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,7 +57,7 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: arch: "amd64" branch: ${{ inputs.branch }} @@ -69,7 +69,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,7 +79,7 @@ jobs: wheel-publish-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -89,7 +89,7 @@ jobs: wheel-build-raft-dask: needs: wheel-publish-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-publish-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.06 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index ada46141a7..c2d9556859 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,29 +25,29 @@ jobs: - wheel-tests-raft-dask - devcontainer secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.06 checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.06 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.06 with: build_type: pull-request node_type: cpu16 conda-cpp-tests: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.06 with: build_type: pull-request enable_check_symbols: true @@ -55,19 +55,19 @@ jobs: conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.06 with: build_type: pull-request conda-python-tests: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 with: build_type: pull-request docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.06 with: build_type: pull-request node_type: "gpu-v100-latest-1" @@ -77,34 +77,34 @@ jobs: wheel-build-pylibraft: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: build_type: pull-request script: ci/build_wheel_pylibraft.sh wheel-tests-pylibraft: needs: wheel-build-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: build_type: pull-request script: ci/test_wheel_pylibraft.sh wheel-build-raft-dask: needs: wheel-tests-pylibraft secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.06 with: build_type: pull-request script: "ci/build_wheel_raft_dask.sh" wheel-tests-raft-dask: needs: wheel-build-raft-dask secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: build_type: pull-request script: ci/test_wheel_raft_dask.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@fix/devcontainer-json-location + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.06 with: arch: '["amd64"]' cuda: '["12.2"]' diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2a557a8b84..18094cc05a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -26,7 +26,7 @@ jobs: symbol_exclusions: _ZN\d+raft_cutlass conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -34,7 +34,7 @@ jobs: sha: ${{ inputs.sha }} conda-python-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -42,7 +42,7 @@ jobs: sha: ${{ inputs.sha }} wheel-tests-pylibraft: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} @@ -51,7 +51,7 @@ jobs: script: ci/test_wheel_pylibraft.sh wheel-tests-raft-dask: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.06 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/README.md b/README.md index 7833a5cfa3..c501c37b2f 100755 --- a/README.md +++ b/README.md @@ -293,7 +293,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.06/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ### Installing Python through Pip diff --git a/VERSION b/VERSION index 4a2fe8aa57..0bff6981a3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.04.00 +24.06.00 diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index e27532a489..cc09d56057 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.6.* - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - gmock>=1.13.0 @@ -46,9 +46,9 @@ dependencies: - pydata-sphinx-theme - pytest-cov - pytest==7.* -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.6.* - recommonmark -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -56,6 +56,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.38.* - ucx>=1.15.0,<1.16.0 name: all_cuda-118_arch-aarch64 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index bf535c5c04..9fbdcdaad4 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.6.* - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -46,9 +46,9 @@ dependencies: - pydata-sphinx-theme - pytest-cov - pytest==7.* -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.6.* - recommonmark -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -56,6 +56,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.38.* - ucx>=1.15.0,<1.16.0 name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-122_arch-aarch64.yaml b/conda/environments/all_cuda-122_arch-aarch64.yaml index 8ea3843841..1e78e7deca 100644 --- a/conda/environments/all_cuda-122_arch-aarch64.yaml +++ b/conda/environments/all_cuda-122_arch-aarch64.yaml @@ -21,7 +21,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.6.* - doxygen>=1.8.20 - gcc_linux-aarch64=11.* - gmock>=1.13.0 @@ -42,9 +42,9 @@ dependencies: - pydata-sphinx-theme - pytest-cov - pytest==7.* -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.6.* - recommonmark -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -52,6 +52,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-aarch64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.38.* - ucx>=1.15.0,<1.16.0 name: all_cuda-122_arch-aarch64 diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml index a3f6f7e99f..6d88855f30 100644 --- a/conda/environments/all_cuda-122_arch-x86_64.yaml +++ b/conda/environments/all_cuda-122_arch-x86_64.yaml @@ -21,7 +21,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-cuda==24.4.* +- dask-cuda==24.6.* - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 @@ -42,9 +42,9 @@ dependencies: - pydata-sphinx-theme - pytest-cov - pytest==7.* -- rapids-dask-dependency==24.4.* +- rapids-dask-dependency==24.6.* - recommonmark -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - scikit-learn - scipy @@ -52,6 +52,6 @@ dependencies: - sphinx-markdown-tables - sysroot_linux-64==2.17 - ucx-proc=*=gpu -- ucx-py==0.37.* +- ucx-py==0.38.* - ucx>=1.15.0,<1.16.0 name: all_cuda-122_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml index 0e0385ceeb..b5f662ebc1 100644 --- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml @@ -38,7 +38,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-118_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index dfe76a2948..6c56cb688c 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -38,7 +38,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml index 0a6567c646..7f3107e5d6 100644 --- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml @@ -34,7 +34,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - sysroot_linux-aarch64==2.17 name: bench_ann_cuda-120_arch-aarch64 diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml index a89d5317b6..62739354a5 100644 --- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml @@ -34,7 +34,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm==24.4.* +- rmm==24.6.* - scikit-build-core>=0.7.0 - sysroot_linux-64==2.17 name: bench_ann_cuda-120_arch-x86_64 diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml index d2bdcbb351..7db48fb684 100644 --- a/conda/recipes/raft-dask/conda_build_config.yaml +++ b/conda/recipes/raft-dask/conda_build_config.yaml @@ -17,7 +17,7 @@ ucx_version: - ">=1.15.0,<1.16.0" ucx_py_version: - - "0.37.*" + - "0.38.*" cmake_version: - ">=3.26.4" diff --git a/cpp/template/cmake/thirdparty/fetch_rapids.cmake b/cpp/template/cmake/thirdparty/fetch_rapids.cmake index aadfdb0028..11d2403963 100644 --- a/cpp/template/cmake/thirdparty/fetch_rapids.cmake +++ b/cpp/template/cmake/thirdparty/fetch_rapids.cmake @@ -12,7 +12,7 @@ # the License. # Use this variable to update RAPIDS and RAFT versions -set(RAPIDS_VERSION "24.04") +set(RAPIDS_VERSION "24.06") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/dependencies.yaml b/dependencies.yaml index 836775a5a3..658b08421d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -180,7 +180,7 @@ dependencies: common: - output_types: [conda] packages: - - &rmm_conda rmm==24.4.* + - &rmm_conda rmm==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -201,10 +201,10 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - &rmm_cu12 rmm-cu12==24.4.* + - &rmm_cu12 rmm-cu12==24.6.* - matrix: {cuda: "11.*"} packages: - - &rmm_cu11 rmm-cu11==24.4.* + - &rmm_cu11 rmm-cu11==24.6.* - {matrix: null, packages: [*rmm_conda] } checks: common: @@ -435,20 +435,20 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - dask-cuda==24.4.* + - dask-cuda==24.6.* - joblib>=0.11 - numba>=0.57 - *numpy - - rapids-dask-dependency==24.4.* - - ucx-py==0.37.* + - rapids-dask-dependency==24.6.* + - ucx-py==0.38.* - output_types: conda packages: - ucx>=1.15.0,<1.16.0 - ucx-proc=*=gpu - - &ucx_py_conda ucx-py==0.37.* + - &ucx_py_conda ucx-py==0.38.* - output_types: pyproject packages: - - &pylibraft_conda pylibraft==24.4.* + - &pylibraft_conda pylibraft==24.6.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -460,12 +460,12 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - &pylibraft_cu12 pylibraft-cu12==24.4.* - - &ucx_py_cu12 ucx-py-cu12==0.37.* + - &pylibraft_cu12 pylibraft-cu12==24.6.* + - &ucx_py_cu12 ucx-py-cu12==0.38.* - matrix: {cuda: "11.*"} packages: - - &pylibraft_cu11 pylibraft-cu11==24.4.* - - &ucx_py_cu11 ucx-py-cu11==0.37.* + - &pylibraft_cu11 pylibraft-cu11==24.6.* + - &ucx_py_cu11 ucx-py-cu11==0.38.* - {matrix: null, packages: [*pylibraft_conda, *ucx_py_conda]} test_python_common: common: diff --git a/docs/source/build.md b/docs/source/build.md index 7bb6cf515a..c0abf3f995 100644 --- a/docs/source/build.md +++ b/docs/source/build.md @@ -56,7 +56,7 @@ You can also install the conda packages individually using the `mamba` command a mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.0 ``` -If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.04/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. +If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.06/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above. ## Installing Python through Pip diff --git a/docs/source/developer_guide.md b/docs/source/developer_guide.md index d29130add0..e10e8987af 100644 --- a/docs/source/developer_guide.md +++ b/docs/source/developer_guide.md @@ -187,7 +187,7 @@ RAFT relies on `clang-format` to enforce code style across all C++ and CUDA sour 1. Do not split empty functions/records/namespaces. 2. Two-space indentation everywhere, including the line continuations. 3. Disable reflowing of comments. - The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/.clang-format). + The reasons behind these deviations from the Google style guide are given in comments [here](https://github.com/rapidsai/raft/blob/branch-24.06/cpp/.clang-format). [`doxygen`](https://doxygen.nl/) is used as documentation generator and also as a documentation linter. In order to run doxygen as a linter on C++/CUDA code, run @@ -205,7 +205,7 @@ you can run `codespell -i 3 -w .` from the repository root directory. This will bring up an interactive prompt to select which spelling fixes to apply. ### #include style -[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/scripts/include_checker.py) is used to enforce the include style as follows: +[include_checker.py](https://github.com/rapidsai/raft/blob/branch-24.06/cpp/scripts/include_checker.py) is used to enforce the include style as follows: 1. `#include "..."` should be used for referencing local files only. It is acceptable to be used for referencing files in a sub-folder/parent-folder of the same algorithm, but should never be used to include files in other algorithms or between algorithms and the primitives or other dependencies. 2. `#include <...>` should be used for referencing everything else @@ -215,7 +215,7 @@ python ./cpp/scripts/include_checker.py --inplace [cpp/include cpp/test ... list ``` ### Copyright header -[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.04/ci/checks/copyright.py) checks the Copyright header for all git-modified files +[copyright.py](https://github.com/rapidsai/raft/blob/branch-24.06/ci/checks/copyright.py) checks the Copyright header for all git-modified files Manually, you can run the following to bulk-fix the header if only the years need to be updated: ```bash @@ -229,7 +229,7 @@ Call CUDA APIs via the provided helper macros `RAFT_CUDA_TRY`, `RAFT_CUBLAS_TRY` ## Logging ### Introduction -Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.04/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. +Anything and everything about logging is defined inside [logger.hpp](https://github.com/rapidsai/raft/blob/branch-24.06/cpp/include/raft/core/logger.hpp). It uses [spdlog](https://github.com/gabime/spdlog) underneath, but this information is transparent to all. ### Usage ```cpp diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index 68fe80f9ce..3eaa72beae 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -62,7 +62,7 @@ Nightly images are located in [dockerhub](https://hub.docker.com/r/rapidsai/raft - The following command pulls the nightly container for python version 10, cuda version 12, and RAFT version 23.10: ```bash -docker pull rapidsai/raft-ann-bench:24.04a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. +docker pull rapidsai/raft-ann-bench:24.06a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container. ``` The CUDA and python versions can be changed for the supported values: @@ -83,7 +83,7 @@ You can see the exact versions as well in the dockerhub site: [//]: # () [//]: # (```bash) -[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.04-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) +[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.06-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.) [//]: # (```) @@ -344,7 +344,7 @@ For GPU-enabled systems, the `DATA_FOLDER` variable should be a local folder whe export DATA_FOLDER=path/to/store/datasets/and/results docker run --gpus all --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10 \ + rapidsai/raft-ann-bench:24.06a-cuda11.8-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms raft_cagra,raft_ivf_pq --batch-size 10 -k 10" \ @@ -355,7 +355,7 @@ Usage of the above command is as follows: | Argument | Description | |-----------------------------------------------------------|----------------------------------------------------------------------------------------------------| -| `rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | +| `rapidsai/raft-ann-bench:24.06a-cuda11.8-py3.10` | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets` | | `"--dataset deep-image-96-angular"` | Dataset name | | `"--normalize"` | Whether to normalize the dataset | | `"--algorithms raft_cagra,hnswlib --batch-size 10 -k 10"` | Arguments passed to the `run` script, such as the algorithms to benchmark, the batch size, and `k` | @@ -372,7 +372,7 @@ The container arguments in the above section also be used for the CPU-only conta export DATA_FOLDER=path/to/store/datasets/and/results docker run --rm -it -u $(id -u) \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench-cpu:24.04a-py3.10 \ + rapidsai/raft-ann-bench-cpu:24.06a-py3.10 \ "--dataset deep-image-96-angular" \ "--normalize" \ "--algorithms hnswlib --batch-size 10 -k 10" \ @@ -389,7 +389,7 @@ docker run --gpus all --rm -it -u $(id -u) \ --entrypoint /bin/bash \ --workdir /data/benchmarks \ -v $DATA_FOLDER:/data/benchmarks \ - rapidsai/raft-ann-bench:24.04a-cuda11.8-py3.10 + rapidsai/raft-ann-bench:24.06a-cuda11.8-py3.10 ``` This will drop you into a command line in the container, with the `raft-ann-bench` python package ready to use, as described in the [Running the benchmarks](#running-the-benchmarks) section above: diff --git a/python/pylibraft/pyproject.toml b/python/pylibraft/pyproject.toml index d687f70cf5..3e8ca0b6d3 100644 --- a/python/pylibraft/pyproject.toml +++ b/python/pylibraft/pyproject.toml @@ -19,7 +19,7 @@ requires = [ "cuda-python>=11.7.1,<12.0a0", "cython>=3.0.0", "ninja", - "rmm==24.4.*", + "rmm==24.6.*", "scikit-build-core[pyproject]>=0.7.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" @@ -37,7 +37,7 @@ requires-python = ">=3.9" dependencies = [ "cuda-python>=11.7.1,<12.0a0", "numpy>=1.23,<2.0a0", - "rmm==24.4.*", + "rmm==24.6.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml index 4a185b22ca..ba336d841c 100644 --- a/python/raft-ann-bench/pyproject.toml +++ b/python/raft-ann-bench/pyproject.toml @@ -9,7 +9,7 @@ requires = [ [project] name = "raft-ann-bench" -version = "24.04.00" +version = "24.06.00" description = "RAFT ANN benchmarks" authors = [ { name = "NVIDIA Corporation" }, diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index 07e2463c5c..815f6b277c 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -33,13 +33,13 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ - "dask-cuda==24.4.*", + "dask-cuda==24.6.*", "joblib>=0.11", "numba>=0.57", "numpy>=1.23,<2.0a0", - "pylibraft==24.4.*", - "rapids-dask-dependency==24.4.*", - "ucx-py==0.37.*", + "pylibraft==24.6.*", + "rapids-dask-dependency==24.6.*", + "ucx-py==0.38.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", From 335236c705c0c53da8a4bf6a22835fdbe669f1df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Malte=20F=C3=B6rster?= <97973773+mfoerste4@users.noreply.github.com> Date: Wed, 20 Mar 2024 15:08:19 +0100 Subject: [PATCH 2/7] Performance optimization of IVF-flat / select_k (#2221) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR is a followup to #2169. To enable IVF-flat with k>256 we need an additional select_k invocation which was unexpectedly slow. There are two reasons for that: First problem is the data handed to select_k: The valid data length per row is much smaller than the conservative maximum that could be achieved by probing the N largest probes. Therefore each query row contains roughly ~50% dummy values. This is also the case for IVF-PQ, but did not show up as prominent due to the second reason. The second problem, and also a difference to the IVF-PQ algorithm - is that a 64bit payload data type is used for selectK. The performance of selectK with 64bit index type is significantly slower than with 32bit, especially when many elements are in the same range: ``` Benchmark Time CPU Iterations ----------------------------------------------------------------------------------------------------- SelectK/float/uint32_t/kRadix11bitsExtraPass/1/manual_time 1.68 ms 1.74 ms 413 1357#200000#512 SelectK/float/uint32_t/kRadix11bitsExtraPass/3/manual_time 2.31 ms 2.37 ms 302 1357#200000#512#same-leading-bits SelectK/float/int64_t/kRadix11bitsExtraPass/1/manual_time 5.92 ms 5.98 ms 116 1357#200000#512 SelectK/float/int64_t/kRadix11bitsExtraPass/3/manual_time 83.7 ms 83.8 ms 8 1357#200000#512#same-leading-bits ----------------------------------------------------------------------------------------------------- ``` The data distribution within a IVF-flat benchmark resulted in a select_k time of ~24ms. ### scope: * additional parameter added to select_k to optionally pass individual row lengths for every batch entry. This parameter is utilized by both IVF-Flat and IVF-PQ and results in a ~2x speedup (50 nodes out of 5000) of the final `select_k`. * refactor ivf-flat search to work with 32bit indices by storing positions instead of actual indices. This allows to utilize 32bit index type select_k for ~10x speedup in the final `select_k`. FYI @tfeher @achirkin ### not in scope: * General optimization of select_k: In the current implementation there is no difference in the type of the payload and the actual index type. Especially the type of the histogram has a large effect on performance (due to the atomics). Authors: - Malte Förster (https://github.com/mfoerste4) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/2221 --- .../raft/matrix/detail/select_k-ext.cuh | 8 +- .../raft/matrix/detail/select_k-inl.cuh | 16 ++-- .../raft/matrix/detail/select_radix.cuh | 35 ++++++- .../raft/neighbors/detail/ivf_common.cuh | 20 ++-- .../detail/ivf_flat_interleaved_scan-ext.cuh | 4 +- .../detail/ivf_flat_interleaved_scan-inl.cuh | 25 ++--- .../neighbors/detail/ivf_flat_search-inl.cuh | 91 +++++++++++-------- .../raft/neighbors/detail/ivf_pq_search.cuh | 5 +- .../raft/neighbors/detail/refine_device.cuh | 36 +++++++- .../matrix/detail/select_k_double_int64_t.cu | 3 +- .../matrix/detail/select_k_double_uint32_t.cu | 3 +- cpp/src/matrix/detail/select_k_float_int32.cu | 3 +- .../matrix/detail/select_k_float_int64_t.cu | 3 +- .../matrix/detail/select_k_float_uint32_t.cu | 3 +- .../matrix/detail/select_k_half_int64_t.cu | 3 +- .../matrix/detail/select_k_half_uint32_t.cu | 3 +- ...at_interleaved_scan_float_float_int64_t.cu | 2 +- ...flat_interleaved_scan_half_half_int64_t.cu | 2 +- ...interleaved_scan_int8_t_int32_t_int64_t.cu | 2 +- ...terleaved_scan_uint8_t_uint32_t_int64_t.cu | 2 +- cpp/test/neighbors/ann_cagra.cuh | 8 +- cpp/test/neighbors/ann_utils.cuh | 43 ++++++++- 22 files changed, 221 insertions(+), 99 deletions(-) diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh index 6a7847d8a0..506cbffcb9 100644 --- a/cpp/include/raft/matrix/detail/select_k-ext.cuh +++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh @@ -41,8 +41,9 @@ void select_k(raft::resources const& handle, T* out_val, IdxT* out_idx, bool select_min, - bool sorted = false, - SelectAlgo algo = SelectAlgo::kAuto) RAFT_EXPLICIT; + bool sorted = false, + SelectAlgo algo = SelectAlgo::kAuto, + const IdxT* len_i = nullptr) RAFT_EXPLICIT; } // namespace raft::matrix::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY @@ -58,7 +59,8 @@ void select_k(raft::resources const& handle, IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(__half, uint32_t); instantiate_raft_matrix_detail_select_k(__half, int64_t); instantiate_raft_matrix_detail_select_k(float, int64_t); diff --git a/cpp/include/raft/matrix/detail/select_k-inl.cuh b/cpp/include/raft/matrix/detail/select_k-inl.cuh index 8f40e6ae00..93d233152b 100644 --- a/cpp/include/raft/matrix/detail/select_k-inl.cuh +++ b/cpp/include/raft/matrix/detail/select_k-inl.cuh @@ -229,6 +229,9 @@ void segmented_sort_by_key(raft::resources const& handle, * whether to make sure selected pairs are sorted by value * @param[in] algo * the selection algorithm to use + * @param[in] len_i + * array of size (batch_size) providing lengths for each individual row + * only radix select-k supported */ template void select_k(raft::resources const& handle, @@ -240,8 +243,9 @@ void select_k(raft::resources const& handle, T* out_val, IdxT* out_idx, bool select_min, - bool sorted = false, - SelectAlgo algo = SelectAlgo::kAuto) + bool sorted = false, + SelectAlgo algo = SelectAlgo::kAuto, + const IdxT* len_i = nullptr) { common::nvtx::range fun_scope( "matrix::select_k(batch_size = %zu, len = %zu, k = %d)", batch_size, len, k); @@ -262,9 +266,8 @@ void select_k(raft::resources const& handle, out_val, out_idx, select_min, - true // fused_last_filter - ); - + true, // fused_last_filter + len_i); } else { bool fused_last_filter = algo == SelectAlgo::kRadix11bits; detail::select::radix::select_k(handle, @@ -276,7 +279,8 @@ void select_k(raft::resources const& handle, out_val, out_idx, select_min, - fused_last_filter); + fused_last_filter, + len_i); } if (sorted) { auto offsets = make_device_mdarray( diff --git a/cpp/include/raft/matrix/detail/select_radix.cuh b/cpp/include/raft/matrix/detail/select_radix.cuh index 82983b7cd2..36a346fda3 100644 --- a/cpp/include/raft/matrix/detail/select_radix.cuh +++ b/cpp/include/raft/matrix/detail/select_radix.cuh @@ -557,6 +557,7 @@ RAFT_KERNEL radix_kernel(const T* in, Counter* counters, IdxT* histograms, const IdxT len, + const IdxT* len_i, const IdxT k, const bool select_min, const int pass) @@ -598,6 +599,14 @@ RAFT_KERNEL radix_kernel(const T* in, in_buf += batch_id * buf_len; in_idx_buf += batch_id * buf_len; } + + // in case we have individual len for each query defined we want to make sure + // that we only iterate valid elements. + if (len_i != nullptr) { + const IdxT max_len = max(len_i[batch_id], k); + if (max_len < previous_len) previous_len = max_len; + } + // "current_len > buf_len" means current pass will skip writing buffer if (pass == 0 || current_len > buf_len) { out_buf = nullptr; @@ -829,6 +838,7 @@ void radix_topk(const T* in, IdxT* out_idx, bool select_min, bool fused_last_filter, + const IdxT* len_i, unsigned grid_dim, int sm_cnt, rmm::cuda_stream_view stream, @@ -868,6 +878,7 @@ void radix_topk(const T* in, const IdxT* chunk_in_idx = in_idx ? (in_idx + offset * len) : nullptr; T* chunk_out = out + offset * k; IdxT* chunk_out_idx = out_idx + offset * k; + const IdxT* chunk_len_i = len_i ? (len_i + offset) : nullptr; const T* in_buf = nullptr; const IdxT* in_idx_buf = nullptr; @@ -905,6 +916,7 @@ void radix_topk(const T* in, counters.data(), histograms.data(), len, + chunk_len_i, k, select_min, pass); @@ -1007,6 +1019,7 @@ template RAFT_KERNEL radix_topk_one_block_kernel(const T* in, const IdxT* in_idx, const IdxT len, + const IdxT* len_i, const IdxT k, T* out, IdxT* out_idx, @@ -1057,6 +1070,13 @@ RAFT_KERNEL radix_topk_one_block_kernel(const T* in, out_idx_buf = nullptr; } + // in case we have individual len for each query defined we want to make sure + // that we only iterate valid elements. + if (len_i != nullptr) { + const IdxT max_len = max(len_i[batch_id], k); + if (max_len < previous_len) previous_len = max_len; + } + filter_and_histogram_for_one_block(in_buf, in_idx_buf, out_buf, @@ -1106,6 +1126,7 @@ void radix_topk_one_block(const T* in, T* out, IdxT* out_idx, bool select_min, + const IdxT* len_i, int sm_cnt, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -1121,10 +1142,12 @@ void radix_topk_one_block(const T* in, max_chunk_size * buf_len * 2 * (sizeof(T) + sizeof(IdxT)), stream, mr); for (size_t offset = 0; offset < static_cast(batch_size); offset += max_chunk_size) { - int chunk_size = std::min(max_chunk_size, batch_size - offset); + int chunk_size = std::min(max_chunk_size, batch_size - offset); + const IdxT* chunk_len_i = len_i ? (len_i + offset) : nullptr; kernel<<>>(in + offset * len, in_idx ? (in_idx + offset * len) : nullptr, len, + chunk_len_i, k, out + offset * k, out_idx + offset * k, @@ -1188,6 +1211,8 @@ void radix_topk_one_block(const T* in, * blocks is called. The later case is preferable when leading bits of input data are almost the * same. That is, when the value range of input data is narrow. In such case, there could be a * large number of inputs for the last filter, hence using multiple thread blocks is beneficial. + * @param len_i + * optional array of size (batch_size) providing lengths for each individual row */ template void select_k(raft::resources const& res, @@ -1199,7 +1224,8 @@ void select_k(raft::resources const& res, T* out, IdxT* out_idx, bool select_min, - bool fused_last_filter) + bool fused_last_filter, + const IdxT* len_i) { auto stream = resource::get_cuda_stream(res); auto mr = resource::get_workspace_resource(res); @@ -1223,13 +1249,13 @@ void select_k(raft::resources const& res, if (len <= BlockSize * items_per_thread) { impl::radix_topk_one_block( - in, in_idx, batch_size, len, k, out, out_idx, select_min, sm_cnt, stream, mr); + in, in_idx, batch_size, len, k, out, out_idx, select_min, len_i, sm_cnt, stream, mr); } else { unsigned grid_dim = impl::calc_grid_dim(batch_size, len, sm_cnt); if (grid_dim == 1) { impl::radix_topk_one_block( - in, in_idx, batch_size, len, k, out, out_idx, select_min, sm_cnt, stream, mr); + in, in_idx, batch_size, len, k, out, out_idx, select_min, len_i, sm_cnt, stream, mr); } else { impl::radix_topk(in, in_idx, @@ -1240,6 +1266,7 @@ void select_k(raft::resources const& res, out_idx, select_min, fused_last_filter, + len_i, grid_dim, sm_cnt, stream, diff --git a/cpp/include/raft/neighbors/detail/ivf_common.cuh b/cpp/include/raft/neighbors/detail/ivf_common.cuh index ef7ae7c804..df0319e181 100644 --- a/cpp/include/raft/neighbors/detail/ivf_common.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_common.cuh @@ -147,11 +147,11 @@ __device__ inline auto find_chunk_ix(uint32_t& sample_ix, // NOLINT return ix_min; } -template +template __launch_bounds__(BlockDim) RAFT_KERNEL - postprocess_neighbors_kernel(IdxT1* neighbors_out, // [n_queries, topk] - const IdxT2* neighbors_in, // [n_queries, topk] - const IdxT1* const* db_indices, // [n_clusters][..] + postprocess_neighbors_kernel(IdxT* neighbors_out, // [n_queries, topk] + const uint32_t* neighbors_in, // [n_queries, topk] + const IdxT* const* db_indices, // [n_clusters][..] const uint32_t* clusters_to_probe, // [n_queries, n_probes] const uint32_t* chunk_indices, // [n_queries, n_probes] uint32_t n_queries, @@ -170,7 +170,7 @@ __launch_bounds__(BlockDim) RAFT_KERNEL const uint32_t chunk_ix = find_chunk_ix(data_ix, n_probes, chunk_indices); const bool valid = chunk_ix < n_probes; neighbors_out[k] = - valid ? db_indices[clusters_to_probe[chunk_ix]][data_ix] : kOutOfBoundsRecord; + valid ? db_indices[clusters_to_probe[chunk_ix]][data_ix] : kOutOfBoundsRecord; } /** @@ -180,10 +180,10 @@ __launch_bounds__(BlockDim) RAFT_KERNEL * probed clusters / defined by the `chunk_indices`. * We assume the searched sample sizes (for a single query) fit into `uint32_t`. */ -template -void postprocess_neighbors(IdxT1* neighbors_out, // [n_queries, topk] - const IdxT2* neighbors_in, // [n_queries, topk] - const IdxT1* const* db_indices, // [n_clusters][..] +template +void postprocess_neighbors(IdxT* neighbors_out, // [n_queries, topk] + const uint32_t* neighbors_in, // [n_queries, topk] + const IdxT* const* db_indices, // [n_clusters][..] const uint32_t* clusters_to_probe, // [n_queries, n_probes] const uint32_t* chunk_indices, // [n_queries, n_probes] uint32_t n_queries, @@ -193,7 +193,7 @@ void postprocess_neighbors(IdxT1* neighbors_out, // [n_queries, to { constexpr int kPNThreads = 256; const int pn_blocks = raft::div_rounding_up_unsafe(n_queries * topk, kPNThreads); - postprocess_neighbors_kernel + postprocess_neighbors_kernel <<>>(neighbors_out, neighbors_in, db_indices, diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh index 7c2d1d2157..140a9f17c8 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-ext.cuh @@ -45,7 +45,7 @@ void ivfflat_interleaved_scan(const raft::neighbors::ivf_flat::index& i const uint32_t* chunk_indices, const bool select_min, IvfSampleFilterT sample_filter, - IdxT* neighbors, + uint32_t* neighbors, float* distances, uint32_t& grid_dim_x, rmm::cuda_stream_view stream) RAFT_EXPLICIT; @@ -70,7 +70,7 @@ void ivfflat_interleaved_scan(const raft::neighbors::ivf_flat::index& i const uint32_t* chunk_indices, \ const bool select_min, \ IvfSampleFilterT sample_filter, \ - IdxT* neighbors, \ + uint32_t* neighbors, \ float* distances, \ uint32_t& grid_dim_x, \ rmm::cuda_stream_view stream) diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh index 6fc528e26b..9cd8b70148 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_interleaved_scan-inl.cuh @@ -690,7 +690,6 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) const uint32_t query_smem_elems, const T* query, const uint32_t* coarse_index, - const IdxT* const* list_indices_ptrs, const T* const* list_data_ptrs, const uint32_t* list_sizes, const uint32_t queries_offset, @@ -700,7 +699,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) const uint32_t* chunk_indices, const uint32_t dim, IvfSampleFilterT sample_filter, - IdxT* neighbors, + uint32_t* neighbors, float* distances) { extern __shared__ __align__(256) uint8_t interleaved_scan_kernel_smem[]; @@ -719,8 +718,8 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) distances += query_id * k * gridDim.x + blockIdx.x * k; } else { distances += query_id * uint64_t(max_samples); - chunk_indices += (n_probes * query_id); } + chunk_indices += (n_probes * query_id); coarse_index += query_id * n_probes; } @@ -728,7 +727,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) copy_vectorized(query_shared, query, std::min(dim, query_smem_elems)); __syncthreads(); - using local_topk_t = block_sort_t; + using local_topk_t = block_sort_t; local_topk_t queue(k); { using align_warp = Pow2; @@ -752,11 +751,9 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) align_warp::div(list_length + align_warp::Mask); // ceildiv by power of 2 uint32_t sample_offset = 0; - if constexpr (!kManageLocalTopK) { - if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; } - assert(list_length == chunk_indices[probe_id] - sample_offset); - assert(sample_offset + list_length <= max_samples); - } + if (probe_id > 0) { sample_offset = chunk_indices[probe_id - 1]; } + assert(list_length == chunk_indices[probe_id] - sample_offset); + assert(sample_offset + list_length <= max_samples); constexpr int kUnroll = WarpSize / Veclen; constexpr uint32_t kNumWarps = kThreadsPerBlock / WarpSize; @@ -806,8 +803,7 @@ RAFT_KERNEL __launch_bounds__(kThreadsPerBlock) // Enqueue one element per thread const float val = valid ? static_cast(dist) : local_topk_t::queue_t::kDummy; if constexpr (kManageLocalTopK) { - const size_t idx = valid ? static_cast(list_indices_ptrs[list_id][vec_id]) : 0; - queue.add(val, idx); + queue.add(val, sample_offset + vec_id); } else { if (vec_id < list_length) distances[sample_offset + vec_id] = val; } @@ -873,7 +869,7 @@ void launch_kernel(Lambda lambda, const uint32_t max_samples, const uint32_t* chunk_indices, IvfSampleFilterT sample_filter, - IdxT* neighbors, + uint32_t* neighbors, float* distances, uint32_t& grid_dim_x, rmm::cuda_stream_view stream) @@ -927,7 +923,6 @@ void launch_kernel(Lambda lambda, query_smem_elems, queries, coarse_index, - index.inds_ptrs().data_handle(), index.data_ptrs().data_handle(), index.list_sizes().data_handle(), queries_offset + query_offset, @@ -945,8 +940,8 @@ void launch_kernel(Lambda lambda, distances += grid_dim_y * grid_dim_x * k; } else { distances += grid_dim_y * max_samples; - chunk_indices += grid_dim_y * n_probes; } + chunk_indices += grid_dim_y * n_probes; coarse_index += grid_dim_y * n_probes; } } @@ -1161,7 +1156,7 @@ void ivfflat_interleaved_scan(const index& index, const uint32_t* chunk_indices, const bool select_min, IvfSampleFilterT sample_filter, - IdxT* neighbors, + uint32_t* neighbors, float* distances, uint32_t& grid_dim_x, rmm::cuda_stream_view stream) diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh index 98bdeda42f..441fb76b2f 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh @@ -67,13 +67,16 @@ void search_impl(raft::resources const& handle, // Optional structures if postprocessing is required // The topk distance value of candidate vectors from each cluster(list) rmm::device_uvector distances_tmp_dev(0, stream, search_mr); - // The topk index of candidate vectors from each cluster(list) - rmm::device_uvector indices_tmp_dev(0, stream, search_mr); // Number of samples for each query rmm::device_uvector num_samples(0, stream, search_mr); // Offsets per probe for each query rmm::device_uvector chunk_index(0, stream, search_mr); + // The topk index of candidate vectors from each cluster(list), local index offset + // also we might need additional storage for select_k + rmm::device_uvector indices_tmp_dev(0, stream, search_mr); + rmm::device_uvector neighbors_uint32_buf(0, stream, search_mr); + size_t float_query_size; if constexpr (std::is_integral_v) { float_query_size = n_queries * index.dim(); @@ -175,23 +178,29 @@ void search_impl(raft::resources const& handle, grid_dim_x = 1; } + num_samples.resize(n_queries, stream); + chunk_index.resize(n_queries_probes, stream); + + ivf::detail::calc_chunk_indices::configure(n_probes, n_queries)(index.list_sizes().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + num_samples.data(), + stream); + auto distances_dev_ptr = distances; - auto indices_dev_ptr = neighbors; + + uint32_t* neighbors_uint32 = nullptr; + if constexpr (sizeof(IdxT) == sizeof(uint32_t)) { + neighbors_uint32 = reinterpret_cast(neighbors); + } else { + neighbors_uint32_buf.resize(std::size_t(n_queries) * std::size_t(k), stream); + neighbors_uint32 = neighbors_uint32_buf.data(); + } + + uint32_t* indices_dev_ptr = nullptr; bool manage_local_topk = is_local_topk_feasible(k); if (!manage_local_topk || grid_dim_x > 1) { - if (!manage_local_topk) { - num_samples.resize(n_queries, stream); - chunk_index.resize(n_queries_probes, stream); - - ivf::detail::calc_chunk_indices::configure(n_probes, n_queries)( - index.list_sizes().data_handle(), - coarse_indices_dev.data(), - chunk_index.data(), - num_samples.data(), - stream); - } - auto target_size = std::size_t(n_queries) * (manage_local_topk ? grid_dim_x * k : max_samples); distances_tmp_dev.resize(target_size, stream); @@ -199,6 +208,8 @@ void search_impl(raft::resources const& handle, distances_dev_ptr = distances_tmp_dev.data(); indices_dev_ptr = indices_tmp_dev.data(); + } else { + indices_dev_ptr = neighbors_uint32; } ivfflat_interleaved_scan::value_t, IdxT, IvfSampleFilterT>( @@ -224,31 +235,33 @@ void search_impl(raft::resources const& handle, // Merge topk values from different blocks if (!manage_local_topk || grid_dim_x > 1) { - matrix::detail::select_k(handle, - distances_tmp_dev.data(), - indices_tmp_dev.data(), - n_queries, - manage_local_topk ? (k * grid_dim_x) : max_samples, - k, - distances, - neighbors, - select_min); - - if (!manage_local_topk) { - // post process distances && neighbor IDs - ivf::detail::postprocess_distances( - distances, distances, index.metric(), n_queries, k, 1.0, false, stream); - ivf::detail::postprocess_neighbors(neighbors, - neighbors, - index.inds_ptrs().data_handle(), - coarse_indices_dev.data(), - chunk_index.data(), - n_queries, - n_probes, - k, - stream); - } + matrix::detail::select_k(handle, + distances_tmp_dev.data(), + indices_tmp_dev.data(), + n_queries, + manage_local_topk ? (k * grid_dim_x) : max_samples, + k, + distances, + neighbors_uint32, + select_min, + false, + matrix::SelectAlgo::kAuto, + manage_local_topk ? nullptr : num_samples.data()); + } + if (!manage_local_topk) { + // post process distances && neighbor IDs + ivf::detail::postprocess_distances( + distances, distances, index.metric(), n_queries, k, 1.0, false, stream); } + ivf::detail::postprocess_neighbors(neighbors, + neighbors_uint32, + index.inds_ptrs().data_handle(), + coarse_indices_dev.data(), + chunk_index.data(), + n_queries, + n_probes, + k, + stream); } /** See raft::neighbors::ivf_flat::search docs */ diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh index d445f909e5..4c5da38092 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_search.cuh @@ -447,7 +447,10 @@ void ivfpq_search_worker(raft::resources const& handle, topK, topk_dists.data(), neighbors_uint32, - true); + true, + false, + matrix::SelectAlgo::kAuto, + manage_local_topk ? nullptr : num_samples.data()); // Postprocessing ivf::detail::postprocess_distances( diff --git a/cpp/include/raft/neighbors/detail/refine_device.cuh b/cpp/include/raft/neighbors/detail/refine_device.cuh index e76e52657b..bdc29ca121 100644 --- a/cpp/include/raft/neighbors/detail/refine_device.cuh +++ b/cpp/include/raft/neighbors/detail/refine_device.cuh @@ -88,6 +88,27 @@ void refine_device(raft::resources const& handle, n_queries, n_candidates); uint32_t grid_dim_x = 1; + + // the neighbor ids will be computed in uint32_t as offset + rmm::device_uvector neighbors_uint32_buf(0, resource::get_cuda_stream(handle)); + // Offsets per probe for each query [n_queries] as n_probes = 1 + rmm::device_uvector chunk_index(n_queries, resource::get_cuda_stream(handle)); + + // we know that each cluster has exactly n_candidates entries + thrust::fill(resource::get_thrust_policy(handle), + chunk_index.data(), + chunk_index.data() + n_queries, + uint32_t(n_candidates)); + + uint32_t* neighbors_uint32 = nullptr; + if constexpr (sizeof(idx_t) == sizeof(uint32_t)) { + neighbors_uint32 = reinterpret_cast(indices.data_handle()); + } else { + neighbors_uint32_buf.resize(std::size_t(n_queries) * std::size_t(k), + resource::get_cuda_stream(handle)); + neighbors_uint32 = neighbors_uint32_buf.data(); + } + raft::neighbors::ivf_flat::detail::ivfflat_interleaved_scan< data_t, typename raft::spatial::knn::detail::utils::config::value_t, @@ -100,13 +121,24 @@ void refine_device(raft::resources const& handle, 1, k, 0, - nullptr, + chunk_index.data(), raft::distance::is_min_close(metric), raft::neighbors::filtering::none_ivf_sample_filter(), - indices.data_handle(), + neighbors_uint32, distances.data_handle(), grid_dim_x, resource::get_cuda_stream(handle)); + + // postprocessing -- neighbors from position to actual id + ivf::detail::postprocess_neighbors(indices.data_handle(), + neighbors_uint32, + refinement_index.inds_ptrs().data_handle(), + fake_coarse_idx.data(), + chunk_index.data(), + n_queries, + 1, + k, + resource::get_cuda_stream(handle)); } } // namespace raft::neighbors::detail diff --git a/cpp/src/matrix/detail/select_k_double_int64_t.cu b/cpp/src/matrix/detail/select_k_double_int64_t.cu index e32b4ef6f0..bf234aacbf 100644 --- a/cpp/src/matrix/detail/select_k_double_int64_t.cu +++ b/cpp/src/matrix/detail/select_k_double_int64_t.cu @@ -27,7 +27,8 @@ IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(double, int64_t); diff --git a/cpp/src/matrix/detail/select_k_double_uint32_t.cu b/cpp/src/matrix/detail/select_k_double_uint32_t.cu index 21c954ca46..7f0511a76a 100644 --- a/cpp/src/matrix/detail/select_k_double_uint32_t.cu +++ b/cpp/src/matrix/detail/select_k_double_uint32_t.cu @@ -29,7 +29,8 @@ IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(double, uint32_t); diff --git a/cpp/src/matrix/detail/select_k_float_int32.cu b/cpp/src/matrix/detail/select_k_float_int32.cu index 7f163a0b0d..e68b1e32df 100644 --- a/cpp/src/matrix/detail/select_k_float_int32.cu +++ b/cpp/src/matrix/detail/select_k_float_int32.cu @@ -27,7 +27,8 @@ IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(float, int); diff --git a/cpp/src/matrix/detail/select_k_float_int64_t.cu b/cpp/src/matrix/detail/select_k_float_int64_t.cu index 87b6525356..5aa40d8c9d 100644 --- a/cpp/src/matrix/detail/select_k_float_int64_t.cu +++ b/cpp/src/matrix/detail/select_k_float_int64_t.cu @@ -27,7 +27,8 @@ IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(float, int64_t); diff --git a/cpp/src/matrix/detail/select_k_float_uint32_t.cu b/cpp/src/matrix/detail/select_k_float_uint32_t.cu index e698f811d8..9aba147edf 100644 --- a/cpp/src/matrix/detail/select_k_float_uint32_t.cu +++ b/cpp/src/matrix/detail/select_k_float_uint32_t.cu @@ -27,7 +27,8 @@ IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(float, uint32_t); diff --git a/cpp/src/matrix/detail/select_k_half_int64_t.cu b/cpp/src/matrix/detail/select_k_half_int64_t.cu index 0eee20b1fa..bc513e4aeb 100644 --- a/cpp/src/matrix/detail/select_k_half_int64_t.cu +++ b/cpp/src/matrix/detail/select_k_half_int64_t.cu @@ -27,7 +27,8 @@ IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(__half, int64_t); diff --git a/cpp/src/matrix/detail/select_k_half_uint32_t.cu b/cpp/src/matrix/detail/select_k_half_uint32_t.cu index f4e6bae21f..e46c7d46bb 100644 --- a/cpp/src/matrix/detail/select_k_half_uint32_t.cu +++ b/cpp/src/matrix/detail/select_k_half_uint32_t.cu @@ -27,7 +27,8 @@ IdxT* out_idx, \ bool select_min, \ bool sorted, \ - raft::matrix::SelectAlgo algo) + raft::matrix::SelectAlgo algo, \ + const IdxT* len_i) instantiate_raft_matrix_detail_select_k(__half, uint32_t); diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu index def33e493e..5ac820e0dd 100644 --- a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu +++ b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu @@ -33,7 +33,7 @@ const uint32_t* chunk_indices, \ const bool select_min, \ IvfSampleFilterT sample_filter, \ - IdxT* neighbors, \ + uint32_t* neighbors, \ float* distances, \ uint32_t& grid_dim_x, \ rmm::cuda_stream_view stream) diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu index e96600ee02..4d847cdeb1 100644 --- a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu +++ b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu @@ -35,7 +35,7 @@ const uint32_t* chunk_indices, \ const bool select_min, \ IvfSampleFilterT sample_filter, \ - IdxT* neighbors, \ + uint32_t* neighbors, \ float* distances, \ uint32_t& grid_dim_x, \ rmm::cuda_stream_view stream) diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu index 13c9d2e283..8a0e8f0118 100644 --- a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu +++ b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu @@ -33,7 +33,7 @@ const uint32_t* chunk_indices, \ const bool select_min, \ IvfSampleFilterT sample_filter, \ - IdxT* neighbors, \ + uint32_t* neighbors, \ float* distances, \ uint32_t& grid_dim_x, \ rmm::cuda_stream_view stream) diff --git a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu index 51f02343fc..7cad992e2b 100644 --- a/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu +++ b/cpp/src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu @@ -33,7 +33,7 @@ const uint32_t* chunk_indices, \ const bool select_min, \ IvfSampleFilterT sample_filter, \ - IdxT* neighbors, \ + uint32_t* neighbors, \ float* distances, \ uint32_t& grid_dim_x, \ rmm::cuda_stream_view stream) diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index a111de0762..7278f71a24 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -549,6 +549,7 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { EXPECT_FALSE(unacceptable_node); double min_recall = ps.min_recall; + // TODO(mfoerster): re-enable uniquenes test EXPECT_TRUE(eval_neighbours(indices_naive, indices_Cagra, distances_naive, @@ -556,7 +557,8 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { ps.n_queries, ps.k, 0.003, - min_recall)); + min_recall, + false)); EXPECT_TRUE(eval_distances(handle_, database.data(), search_queries.data(), @@ -668,6 +670,7 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { } double min_recall = ps.min_recall; + // TODO(mfoerster): re-enable uniquenes test EXPECT_TRUE(eval_neighbours(indices_naive, indices_Cagra, distances_naive, @@ -675,7 +678,8 @@ class AnnCagraFilterTest : public ::testing::TestWithParam { ps.n_queries, ps.k, 0.003, - min_recall)); + min_recall, + false)); EXPECT_TRUE(eval_distances(handle_, database.data(), search_queries.data(), diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index afd083d512..6be2ac7fc7 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -35,6 +35,7 @@ #include #include +#include namespace raft::neighbors { @@ -153,13 +154,40 @@ auto calc_recall(const std::vector& expected_idx, static_cast(match_count) / static_cast(total_count), match_count, total_count); } +/** check uniqueness of indices + */ +template +auto check_unique_indices(const std::vector& actual_idx, size_t rows, size_t cols) +{ + size_t max_count; + std::set unique_indices; + for (size_t i = 0; i < rows; ++i) { + unique_indices.clear(); + max_count = 0; + for (size_t k = 0; k < cols; ++k) { + size_t idx_k = i * cols + k; // row major assumption! + auto act_idx = actual_idx[idx_k]; + if (act_idx == std::numeric_limits::max()) { + max_count++; + } else if (unique_indices.find(act_idx) == unique_indices.end()) { + unique_indices.insert(act_idx); + } else { + return testing::AssertionFailure() + << "Duplicated index " << act_idx << " at k " << k << " for query " << i << "! "; + } + } + } + return testing::AssertionSuccess(); +} + template auto eval_recall(const std::vector& expected_idx, const std::vector& actual_idx, size_t rows, size_t cols, double eps, - double min_recall) -> testing::AssertionResult + double min_recall, + bool test_unique = true) -> testing::AssertionResult { auto [actual_recall, match_count, total_count] = calc_recall(expected_idx, actual_idx, rows, cols); @@ -176,7 +204,10 @@ auto eval_recall(const std::vector& expected_idx, << "actual recall (" << actual_recall << ") is lower than the minimum expected recall (" << min_recall << "); eps = " << eps << ". "; } - return testing::AssertionSuccess(); + if (test_unique) + return check_unique_indices(actual_idx, rows, cols); + else + return testing::AssertionSuccess(); } /** Overload of calc_recall to account for distances @@ -224,7 +255,8 @@ auto eval_neighbours(const std::vector& expected_idx, size_t rows, size_t cols, double eps, - double min_recall) -> testing::AssertionResult + double min_recall, + bool test_unique = true) -> testing::AssertionResult { auto [actual_recall, match_count, total_count] = calc_recall(expected_idx, actual_idx, expected_dist, actual_dist, rows, cols, eps); @@ -241,7 +273,10 @@ auto eval_neighbours(const std::vector& expected_idx, << "actual recall (" << actual_recall << ") is lower than the minimum expected recall (" << min_recall << "); eps = " << eps << ". "; } - return testing::AssertionSuccess(); + if (test_unique) + return check_unique_indices(actual_idx, rows, cols); + else + return testing::AssertionSuccess(); } template From de7341e795fc880bb4fa34e4c833adfc9a54afc1 Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Thu, 21 Mar 2024 14:52:39 +0900 Subject: [PATCH 3/7] CAGRA-Q search (#2206) Rel: #1889 ## Limitations - Only 8-bit PQ is supported - Sub-space size is only 2 supported Authors: - tsuki (https://github.com/enp1s0) - Artem M. Chirkin (https://github.com/achirkin) - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/2206 --- cpp/CMakeLists.txt | 96 +++ cpp/include/raft/neighbors/dataset.hpp | 2 +- .../neighbors/detail/cagra/cagra_search.cuh | 228 ++++++-- .../detail/cagra/compute_distance.hpp | 298 ++++------ .../detail/cagra/compute_distance_vpq.cuh | 227 ++++++++ .../neighbors/detail/cagra/device_common.hpp | 8 +- .../raft/neighbors/detail/cagra/factory.cuh | 28 +- .../detail/cagra/search_multi_cta.cuh | 100 ++-- .../cagra/search_multi_cta_kernel-ext.cuh | 399 +++++++++++-- .../cagra/search_multi_cta_kernel-inl.cuh | 185 +++--- .../detail/cagra/search_multi_kernel.cuh | 386 ++++++------ .../neighbors/detail/cagra/search_plan.cuh | 8 +- .../detail/cagra/search_single_cta.cuh | 86 +-- .../cagra/search_single_cta_kernel-ext.cuh | 548 ++++++++++++++++-- .../cagra/search_single_cta_kernel-inl.cuh | 230 ++++---- .../raft/neighbors/detail/cagra/utils.hpp | 5 + .../raft/neighbors/detail/refine_host-ext.hpp | 1 + .../raft/neighbors/detail/vpq_dataset.cuh | 2 +- cpp/include/raft/neighbors/refine-ext.cuh | 21 +- .../cagra/q_search_multi_cta_00_generate.py | 84 +++ ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 37 ++ ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 37 ++ ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 37 ++ ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 37 ++ ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 37 ++ ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 37 ++ ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 37 ++ ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 37 ++ ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 37 ++ ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 37 ++ ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 37 ++ ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 37 ++ ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 37 ++ ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 37 ++ ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ .../cagra/q_search_single_cta_00_generate.py | 89 +++ ...float_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ...float_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...a_float_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...a_float_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ..._float_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ..._float_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ..._float_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ..._float_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ ...float_uint64_dim1024_t32_8pq_2subd_half.cu | 37 ++ ...float_uint64_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...a_float_uint64_dim128_t8_8pq_2subd_half.cu | 37 ++ ...a_float_uint64_dim128_t8_8pq_4subd_half.cu | 37 ++ ..._float_uint64_dim256_t16_8pq_2subd_half.cu | 37 ++ ..._float_uint64_dim256_t16_8pq_4subd_half.cu | 37 ++ ..._float_uint64_dim512_t32_8pq_2subd_half.cu | 37 ++ ..._float_uint64_dim512_t32_8pq_4subd_half.cu | 37 ++ ..._half_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ..._half_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...ta_half_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...ta_half_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ...a_half_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ...a_half_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ...a_half_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ...a_half_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ ..._half_uint64_dim1024_t32_8pq_2subd_half.cu | 37 ++ ..._half_uint64_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...ta_half_uint64_dim128_t8_8pq_2subd_half.cu | 37 ++ ...ta_half_uint64_dim128_t8_8pq_4subd_half.cu | 37 ++ ...a_half_uint64_dim256_t16_8pq_2subd_half.cu | 37 ++ ...a_half_uint64_dim256_t16_8pq_4subd_half.cu | 37 ++ ...a_half_uint64_dim512_t32_8pq_2subd_half.cu | 37 ++ ...a_half_uint64_dim512_t32_8pq_4subd_half.cu | 37 ++ ..._int8_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ..._int8_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...ta_int8_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...ta_int8_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ...a_int8_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ...a_int8_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ...a_int8_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ...a_int8_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ ...uint8_uint32_dim1024_t32_8pq_2subd_half.cu | 37 ++ ...uint8_uint32_dim1024_t32_8pq_4subd_half.cu | 37 ++ ...a_uint8_uint32_dim128_t8_8pq_2subd_half.cu | 37 ++ ...a_uint8_uint32_dim128_t8_8pq_4subd_half.cu | 37 ++ ..._uint8_uint32_dim256_t16_8pq_2subd_half.cu | 37 ++ ..._uint8_uint32_dim256_t16_8pq_4subd_half.cu | 37 ++ ..._uint8_uint32_dim512_t32_8pq_2subd_half.cu | 37 ++ ..._uint8_uint32_dim512_t32_8pq_4subd_half.cu | 37 ++ .../detail/cagra/search_multi_cta.cuh | 51 ++ .../cagra/search_multi_cta_00_generate.py | 42 +- ...arch_multi_cta_float_uint32_dim1024_t32.cu | 45 +- ...search_multi_cta_float_uint32_dim128_t8.cu | 45 +- ...earch_multi_cta_float_uint32_dim256_t16.cu | 45 +- ...earch_multi_cta_float_uint32_dim512_t32.cu | 45 +- ...arch_multi_cta_float_uint64_dim1024_t32.cu | 45 +- ...search_multi_cta_float_uint64_dim128_t8.cu | 45 +- ...earch_multi_cta_float_uint64_dim256_t16.cu | 45 +- ...earch_multi_cta_float_uint64_dim512_t32.cu | 45 +- ...earch_multi_cta_half_uint32_dim1024_t32.cu | 43 +- .../search_multi_cta_half_uint32_dim128_t8.cu | 43 +- ...search_multi_cta_half_uint32_dim256_t16.cu | 43 +- ...search_multi_cta_half_uint32_dim512_t32.cu | 43 +- ...earch_multi_cta_half_uint64_dim1024_t32.cu | 43 +- .../search_multi_cta_half_uint64_dim128_t8.cu | 43 +- ...search_multi_cta_half_uint64_dim256_t16.cu | 43 +- ...search_multi_cta_half_uint64_dim512_t32.cu | 43 +- ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 45 +- .../search_multi_cta_int8_uint32_dim128_t8.cu | 45 +- ...search_multi_cta_int8_uint32_dim256_t16.cu | 45 +- ...search_multi_cta_int8_uint32_dim512_t32.cu | 45 +- ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 45 +- ...search_multi_cta_uint8_uint32_dim128_t8.cu | 45 +- ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 45 +- ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 45 +- .../detail/cagra/search_single_cta.cuh | 52 ++ .../cagra/search_single_cta_00_generate.py | 43 +- ...rch_single_cta_float_uint32_dim1024_t32.cu | 48 +- ...earch_single_cta_float_uint32_dim128_t8.cu | 48 +- ...arch_single_cta_float_uint32_dim256_t16.cu | 48 +- ...arch_single_cta_float_uint32_dim512_t32.cu | 48 +- ...rch_single_cta_float_uint64_dim1024_t32.cu | 48 +- ...earch_single_cta_float_uint64_dim128_t8.cu | 48 +- ...arch_single_cta_float_uint64_dim256_t16.cu | 48 +- ...arch_single_cta_float_uint64_dim512_t32.cu | 48 +- ...arch_single_cta_half_uint32_dim1024_t32.cu | 46 +- ...search_single_cta_half_uint32_dim128_t8.cu | 46 +- ...earch_single_cta_half_uint32_dim256_t16.cu | 46 +- ...earch_single_cta_half_uint32_dim512_t32.cu | 46 +- ...arch_single_cta_half_uint64_dim1024_t32.cu | 46 +- ...search_single_cta_half_uint64_dim128_t8.cu | 46 +- ...earch_single_cta_half_uint64_dim256_t16.cu | 46 +- ...earch_single_cta_half_uint64_dim512_t32.cu | 46 +- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 48 +- ...search_single_cta_int8_uint32_dim128_t8.cu | 48 +- ...earch_single_cta_int8_uint32_dim256_t16.cu | 48 +- ...earch_single_cta_int8_uint32_dim512_t32.cu | 48 +- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 48 +- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 48 +- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 48 +- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 48 +- .../detail/refine_host_float_float.cpp | 3 +- cpp/src/neighbors/refine_float_float.cu | 30 +- cpp/test/CMakeLists.txt | 2 + .../ann_cagra/search_kernel_uint64_t.cuh | 192 +++--- cpp/test/neighbors/ann_cagra_vpq.cuh | 336 +++++++++++ .../ann_cagra_vpq/test_float_int64_t.cu | 29 + .../ann_cagra_vpq/test_float_uint32_t.cu | 28 + cpp/test/neighbors/ann_utils.cuh | 2 +- 177 files changed, 6795 insertions(+), 2798 deletions(-) create mode 100644 cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu create mode 100644 cpp/src/neighbors/detail/cagra/search_multi_cta.cuh create mode 100644 cpp/src/neighbors/detail/cagra/search_single_cta.cuh create mode 100755 cpp/test/neighbors/ann_cagra_vpq.cuh create mode 100644 cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu create mode 100644 cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6107b9325a..cbae4bfb3f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -372,6 +372,102 @@ if(RAFT_COMPILE_LIBRARY) src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu + src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu diff --git a/cpp/include/raft/neighbors/dataset.hpp b/cpp/include/raft/neighbors/dataset.hpp index e7a3ba97a4..a6444775f4 100644 --- a/cpp/include/raft/neighbors/dataset.hpp +++ b/cpp/include/raft/neighbors/dataset.hpp @@ -72,7 +72,7 @@ struct strided_dataset : public dataset { return static_cast(v.stride(0) > 0 ? v.stride(0) : v.extent(1)); } /** Get the view of the data. */ - [[nodiscard]] virtual auto view() const noexcept -> view_type; + [[nodiscard]] virtual auto view() const noexcept -> view_type = 0; }; template diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 0832e75633..d30f69ddcd 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -16,6 +16,7 @@ #pragma once +#include "compute_distance_vpq.cuh" #include "factory.cuh" #include "search_plan.cuh" #include "search_single_cta.cuh" @@ -77,46 +78,24 @@ inline return filter; } -/** - * @brief Search ANN using the constructed index. - * - * See the [build](#build) documentation for a usage example. - * - * @tparam T data element type - * @tparam IdxT type of database vector indices - * @tparam internal_IdxT during search we map IdxT to internal_IdxT, this way we do not need - * separate kernels for int/uint. - * - * @param[in] handle - * @param[in] params configure the search - * @param[in] idx ivf-pq constructed index - * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] - * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset - * [n_queries, k] - * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, - * k] - */ - -template -void search_main(raft::resources const& res, - search_params params, - const index& index, - raft::device_matrix_view queries, - raft::device_matrix_view neighbors, - raft::device_matrix_view distances, - CagraSampleFilterT sample_filter = CagraSampleFilterT()) +template +void search_main_core( + raft::resources const& res, + search_params params, + DatasetDescriptorT dataset_desc, + raft::device_matrix_view graph, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) { RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", - static_cast(index.dataset().extent(0)), - static_cast(index.dataset().extent(1))); + static_cast(index.data().n_rows()), + static_cast(index.data().dim())); RAFT_LOG_DEBUG("# query size = %lu, dim = %lu\n", static_cast(queries.extent(0)), static_cast(queries.extent(1))); - RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); + RAFT_EXPECTS(queries.extent(1) == dataset_desc.dim, "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); cudaDeviceProp deviceProp = resource::get_device_properties(res); @@ -125,12 +104,15 @@ void search_main(raft::resources const& res, } common::nvtx::range fun_scope( - "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); + "cagra::search(max_queries = %u, k = %u, dim = %zu)", + params.max_queries, + topk, + dataset_desc.dim); using CagraSampleFilterT_s = typename CagraSampleFilterT_Selector::type; - std::unique_ptr> plan = - factory::create( - res, params, index.dim(), index.graph_degree(), topk); + std::unique_ptr> plan = + factory::create( + res, params, dataset_desc.dim, graph.extent(1), topk); plan->check(topk); @@ -140,30 +122,22 @@ void search_main(raft::resources const& res, for (unsigned qid = 0; qid < queries.extent(0); qid += max_queries) { const uint32_t n_queries = std::min(max_queries, queries.extent(0) - qid); - internal_IdxT* _topk_indices_ptr = - reinterpret_cast(neighbors.data_handle()) + (topk * qid); - DistanceT* _topk_distances_ptr = distances.data_handle() + (topk * qid); + auto _topk_indices_ptr = + reinterpret_cast(neighbors.data_handle()) + + (topk * qid); + auto _topk_distances_ptr = distances.data_handle() + (topk * qid); // todo(tfeher): one could keep distances optional and pass nullptr - const T* _query_ptr = queries.data_handle() + (query_dim * qid); - const internal_IdxT* _seed_ptr = + const auto* _query_ptr = queries.data_handle() + (query_dim * qid); + const auto* _seed_ptr = plan->num_seeds > 0 - ? reinterpret_cast(plan->dev_seed.data()) + (plan->num_seeds * qid) + ? reinterpret_cast(plan->dev_seed.data()) + + (plan->num_seeds * qid) : nullptr; uint32_t* _num_executed_iterations = nullptr; - auto dataset_internal = - make_device_strided_matrix_view(index.dataset().data_handle(), - index.dataset().extent(0), - index.dataset().extent(1), - index.dataset().stride(0)); - auto graph_internal = raft::make_device_matrix_view( - reinterpret_cast(index.graph().data_handle()), - index.graph().extent(0), - index.graph().extent(1)); - (*plan)(res, - dataset_internal, - graph_internal, + dataset_desc, + graph, _topk_indices_ptr, _topk_distances_ptr, _query_ptr, @@ -173,6 +147,146 @@ void search_main(raft::resources const& res, topk, set_offset(sample_filter, qid)); } +} + +template +void launch_vpq_search_main_core( + raft::resources const& res, + const vpq_dataset* vpq_dset, + search_params params, + raft::device_matrix_view graph, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter) +{ + RAFT_EXPECTS(vpq_dset->pq_bits() == 8, "Only pq_bits = 8 is supported for now"); + RAFT_EXPECTS(vpq_dset->pq_len() == 2, "Only pq_len 2 is supported for now"); + RAFT_EXPECTS(vpq_dset->dim() % vpq_dset->pq_dim() == 0, + "dim must be a multiple of pq_dim at the moment"); + + const float vq_scale = 1.0f; + const float pq_scale = 1.0f; + + if (vpq_dset->pq_bits() == 8) { + if (vpq_dset->pq_len() == 2) { + using dataset_desc_t = cagra_q_dataset_descriptor_t; + dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), + vpq_dset->encoded_row_length(), + vpq_dset->pq_dim(), + vpq_dset->vq_code_book.data_handle(), + vq_scale, + vpq_dset->pq_code_book.data_handle(), + pq_scale, + size_t(vpq_dset->n_rows()), + vpq_dset->dim()); + search_main_core( + res, params, dataset_desc, graph, queries, neighbors, distances, sample_filter); + } else if (vpq_dset->pq_len() == 4) { + using dataset_desc_t = cagra_q_dataset_descriptor_t; + dataset_desc_t dataset_desc(vpq_dset->data.data_handle(), + vpq_dset->encoded_row_length(), + vpq_dset->pq_dim(), + vpq_dset->vq_code_book.data_handle(), + vq_scale, + vpq_dset->pq_code_book.data_handle(), + pq_scale, + size_t(vpq_dset->n_rows()), + vpq_dset->dim()); + search_main_core( + res, params, dataset_desc, graph, queries, neighbors, distances, sample_filter); + } else { + RAFT_FAIL("Subspace dimension must be 2 or 4"); + } + } else { + RAFT_FAIL("Only 8-bit PQ is supported now"); + } +} + +/** + * @brief Search ANN using the constructed index. + * + * See the [build](#build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of database vector indices + * @tparam internal_IdxT during search we map IdxT to internal_IdxT, this way we do not need + * separate kernels for int/uint. + * + * @param[in] handle + * @param[in] params configure the search + * @param[in] idx ivf-pq constructed index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + */ +template +void search_main(raft::resources const& res, + search_params params, + const index& index, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) +{ + const auto& graph = index.graph(); + auto graph_internal = raft::make_device_matrix_view( + reinterpret_cast(graph.data_handle()), graph.extent(0), graph.extent(1)); + + // n_rows has the same type as the dataset index (the array extents type) + using ds_idx_type = decltype(index.data().n_rows()); + // Dispatch search parameters based on the dataset kind. + if (auto* strided_dset = dynamic_cast*>(&index.data()); + strided_dset != nullptr) { + // Set TEAM_SIZE and DATASET_BLOCK_SIZE to zero tentatively since these parameters cannot be + // determined here. They are set just before kernel launch. + using dataset_desc_t = standard_dataset_descriptor_t; + // Search using a plain (strided) row-major dataset + const dataset_desc_t dataset_desc(strided_dset->view().data_handle(), + strided_dset->n_rows(), + strided_dset->dim(), + strided_dset->stride()); + + search_main_core( + res, params, dataset_desc, graph_internal, queries, neighbors, distances, sample_filter); + } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); + vpq_dset != nullptr) { + // Search using a compressed dataset + RAFT_FAIL("FP32 VPQ dataset support is coming soon"); + } else if (auto* vpq_dset = dynamic_cast*>(&index.data()); + vpq_dset != nullptr) { + launch_vpq_search_main_core( + res, vpq_dset, params, graph_internal, queries, neighbors, distances, sample_filter); + } else if (auto* empty_dset = dynamic_cast*>(&index.data()); + empty_dset != nullptr) { + // Forgot to add a dataset. + RAFT_FAIL( + "Attempted to search without a dataset. Please call index.update_dataset(...) first."); + } else { + // This is a logic error. + RAFT_FAIL("Unrecognized dataset format"); + } static_assert(std::is_same_v, "only float distances are supported at the moment"); diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 3732dcf3fe..49e14be73d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -20,6 +20,7 @@ #include "utils.hpp" #include +#include #include @@ -36,152 +37,16 @@ _RAFT_DEVICE constexpr unsigned get_vlen() return utils::size_of() / utils::size_of(); } -template -struct data_load_t { - union { - LOAD_T load; - DATA_T data[VLEN]; - }; -}; - -template -struct distance_op; -template -struct distance_op { - const float* const query_buffer; - __device__ distance_op(const float* const query_buffer) : query_buffer(query_buffer) {} - - __device__ DISTANCE_T operator()(const DATA_T* const dataset_ptr, - const std::uint32_t dataset_dim, - const bool valid) - { - const unsigned lane_id = threadIdx.x % TEAM_SIZE; - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - data_load_t dl_buff[reg_nelem]; - - DISTANCE_T norm2 = 0; - if (valid) { - for (uint32_t elem_offset = 0; elem_offset < dataset_dim; elem_offset += DATASET_BLOCK_DIM) { -#pragma unroll - for (uint32_t e = 0; e < reg_nelem; e++) { - const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; - if (k >= dataset_dim) break; - dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); - } -#pragma unroll - for (uint32_t e = 0; e < reg_nelem; e++) { - const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; - if (k >= dataset_dim) break; -#pragma unroll - for (uint32_t v = 0; v < vlen; v++) { - const uint32_t kv = k + v; - // if (kv >= dataset_dim) break; - DISTANCE_T diff = query_buffer[device::swizzling(kv)]; - diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); - norm2 += diff * diff; - } - } - } - } - for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { - norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); - } - return norm2; - } -}; -template -struct distance_op { - static constexpr unsigned N_FRAGS = (DATASET_BLOCK_DIM + TEAM_SIZE - 1) / TEAM_SIZE; - float query_frags[N_FRAGS]; - - __device__ distance_op(const float* const query_buffer) - { - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - const std::uint32_t lane_id = threadIdx.x % TEAM_SIZE; - // Pre-load query vectors into registers when register usage is not too large. -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - // if (k >= dataset_dim) break; -#pragma unroll - for (unsigned v = 0; v < vlen; v++) { - const unsigned kv = k + v; - const unsigned ev = (vlen * e) + v; - query_frags[ev] = query_buffer[device::swizzling(kv)]; - } - } - } - - __device__ DISTANCE_T operator()(const DATA_T* const dataset_ptr, - const std::uint32_t dataset_dim, - const bool valid) - { - const unsigned lane_id = threadIdx.x % TEAM_SIZE; - constexpr unsigned vlen = get_vlen(); - constexpr unsigned reg_nelem = - (DATASET_BLOCK_DIM + (TEAM_SIZE * vlen) - 1) / (TEAM_SIZE * vlen); - data_load_t dl_buff[reg_nelem]; - - DISTANCE_T norm2 = 0; - if (valid) { -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= dataset_dim) break; - dl_buff[e].load = *reinterpret_cast(dataset_ptr + k); - } -#pragma unroll - for (unsigned e = 0; e < reg_nelem; e++) { - const unsigned k = (lane_id + (TEAM_SIZE * e)) * vlen; - if (k >= dataset_dim) break; -#pragma unroll - for (unsigned v = 0; v < vlen; v++) { - DISTANCE_T diff; - const unsigned ev = (vlen * e) + v; - diff = query_frags[ev]; - diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].data[v]); - norm2 += diff * diff; - } - } - } - for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { - norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); - } - return norm2; - } -}; - template _RAFT_DEVICE void compute_distance_to_random_nodes( INDEX_T* const result_indices_ptr, // [num_pickup] DISTANCE_T* const result_distances_ptr, // [num_pickup] - const float* const query_buffer, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, + const typename DATASET_DESCRIPTOR_T::QUERY_T* const query_buffer, + const DATASET_DESCRIPTOR_T& dataset_desc, const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, @@ -195,9 +60,6 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( uint32_t max_i = num_pickup; if (max_i % (32 / TEAM_SIZE)) { max_i += (32 / TEAM_SIZE) - (max_i % (32 / TEAM_SIZE)); } - distance_op dist_op( - query_buffer); - for (uint32_t i = threadIdx.x / TEAM_SIZE; i < max_i; i += blockDim.x / TEAM_SIZE) { const bool valid_i = (i < num_pickup); @@ -212,11 +74,12 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( if (seed_ptr && (gid < num_seeds)) { seed_index = seed_ptr[gid]; } else { - seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_size; + seed_index = device::xorshift64(gid ^ rand_xor_mask) % dataset_desc.size; } } - const auto norm2 = dist_op(dataset_ptr + dataset_ld * seed_index, dataset_dim, valid_i); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, seed_index, valid_i); if (valid_i && (norm2 < best_norm2_team_local)) { best_norm2_team_local = norm2; @@ -240,27 +103,25 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( template -_RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_indices_ptr, - DISTANCE_T* const result_child_distances_ptr, - // query - const float* const query_buffer, - // [dataset_dim, dataset_size] - const DATA_T* const dataset_ptr, - const std::size_t dataset_dim, - const std::size_t dataset_ld, - // [knn_k, dataset_size] - const INDEX_T* const knn_graph, - const std::uint32_t knn_k, - // hashmap - INDEX_T* const visited_hashmap_ptr, - const std::uint32_t hash_bitlen, - const INDEX_T* const parent_indices, - const INDEX_T* const internal_topk_list, - const std::uint32_t search_width) +_RAFT_DEVICE void compute_distance_to_child_nodes( + INDEX_T* const result_child_indices_ptr, + DISTANCE_T* const result_child_distances_ptr, + // query + const typename DATASET_DESCRIPTOR_T::QUERY_T* const query_buffer, + // [dataset_dim, dataset_size] + const DATASET_DESCRIPTOR_T& dataset_desc, + // [knn_k, dataset_size] + const INDEX_T* const knn_graph, + const std::uint32_t knn_k, + // hashmap + INDEX_T* const visited_hashmap_ptr, + const std::uint32_t hash_bitlen, + const INDEX_T* const parent_indices, + const INDEX_T* const internal_topk_list, + const std::uint32_t search_width) { constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; const INDEX_T invalid_index = utils::get_max_value(); @@ -281,16 +142,6 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in } result_child_indices_ptr[i] = child_id; } - - // [Notice] - // Loading the query vector here from shared memory into registers reduces - // shared memory trafiic. However, register usage increase. The - // MAX_N_FRAGS below is used as the threshold to enable or disable this, - // but the appropriate value should be discussed. - constexpr unsigned N_FRAGS = (DATASET_BLOCK_DIM + TEAM_SIZE - 1) / TEAM_SIZE; - constexpr bool use_fragment = N_FRAGS <= MAX_N_FRAGS; - distance_op dist_op( - query_buffer); __syncthreads(); // Compute the distance to child nodes @@ -302,8 +153,8 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in INDEX_T child_id = invalid_index; if (valid_i) { child_id = result_child_indices_ptr[i]; } - DISTANCE_T norm2 = - dist_op(dataset_ptr + child_id * dataset_ld, dataset_dim, child_id != invalid_index); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, child_id, child_id != invalid_index); // Store the distance const unsigned lane_id = threadIdx.x % TEAM_SIZE; @@ -318,4 +169,101 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in } } // namespace device + +template +struct dataset_descriptor_base_t { + using INDEX_T = INDEX_T_; + using QUERY_T = QUERY_T_; + using DISTANCE_T = DISTANCE_T_; + + const INDEX_T size; + const std::uint32_t dim; + + dataset_descriptor_base_t(const INDEX_T size, const std::uint32_t dim) : size(size), dim(dim) {} +}; + +template +struct standard_dataset_descriptor_t + : public dataset_descriptor_base_t { + using LOAD_T = device::LOAD_128BIT_T; + using DATA_T = DATA_T_; + using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; + + const DATA_T* const ptr; + const std::size_t ld; + using dataset_descriptor_base_t::size; + using dataset_descriptor_base_t::dim; + + standard_dataset_descriptor_t(const DATA_T* const ptr, + const std::size_t size, + const std::uint32_t dim, + const std::size_t ld) + : dataset_descriptor_base_t(size, dim), ptr(ptr), ld(ld) + { + } + + static const std::uint32_t smem_buffer_size_in_byte = 0; + __device__ void set_smem_ptr(void* const){}; + + template + __device__ void copy_query(const DATA_T* const dmem_query_ptr, + QUERY_T* const smem_query_ptr, + const std::uint32_t query_smem_buffer_length) + { + for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { + unsigned j = device::swizzling(i); + if (i < dim) { + smem_query_ptr[j] = spatial::knn::detail::utils::mapping{}(dmem_query_ptr[i]); + } else { + smem_query_ptr[j] = 0.0; + } + } + } + + template + __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, + const INDEX_T dataset_i, + const bool valid) const + { + const auto dataset_ptr = ptr + dataset_i * ld; + const unsigned lane_id = threadIdx.x % TEAM_SIZE; + constexpr unsigned vlen = device::get_vlen(); + // #include (DATASET_BLOCK_DIM, TEAM_SIZE * vlen); + raft::TxN_t dl_buff[reg_nelem]; + + DISTANCE_T norm2 = 0; + if (valid) { + for (uint32_t elem_offset = 0; elem_offset < dim; elem_offset += DATASET_BLOCK_DIM) { +#pragma unroll + for (uint32_t e = 0; e < reg_nelem; e++) { + const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; + if (k >= dim) break; + dl_buff[e].load(dataset_ptr, k); + } +#pragma unroll + for (uint32_t e = 0; e < reg_nelem; e++) { + const uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset; + if (k >= dim) break; +#pragma unroll + for (uint32_t v = 0; v < vlen; v++) { + const uint32_t kv = k + v; + // Note this loop can go above the dataset_dim for padded arrays. This is not a problem + // because: + // - Above the last element (dataset_dim-1), the query array is filled with zeros. + // - The data buffer has to be also padded with zeros. + DISTANCE_T diff = query_ptr[device::swizzling(kv)]; + diff -= spatial::knn::detail::utils::mapping{}(dl_buff[e].val.data[v]); + norm2 += diff * diff; + } + } + } + } + for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { + norm2 += __shfl_xor_sync(0xffffffff, norm2, offset); + } + return norm2; + } +}; + } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh new file mode 100644 index 0000000000..0204addba7 --- /dev/null +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance_vpq.cuh @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "compute_distance.hpp" + +#include + +namespace raft::neighbors::cagra::detail { +template +struct cagra_q_dataset_descriptor_t : public dataset_descriptor_base_t { + using LOAD_T = device::LOAD_128BIT_T; + using DATA_T = DATA_T_; + using CODE_BOOK_T = CODE_BOOK_T_; + using QUERY_T = typename dataset_descriptor_base_t::QUERY_T; + + const std::uint8_t* encoded_dataset_ptr; + const std::uint32_t encoded_dataset_dim; + const std::uint32_t n_subspace; + const CODE_BOOK_T* vq_code_book_ptr; + const float vq_scale; + const CODE_BOOK_T* pq_code_book_ptr; + const float pq_scale; + using dataset_descriptor_base_t::size; + using dataset_descriptor_base_t::dim; + + // Set on device + CODE_BOOK_T* smem_pq_code_book_ptr; + static const std::uint32_t smem_buffer_size_in_byte = + (1 << PQ_BITS) * PQ_LEN * utils::size_of(); + + __device__ void set_smem_ptr(void* const smem_ptr) + { + smem_pq_code_book_ptr = reinterpret_cast(smem_ptr); + + // Copy PQ table + if constexpr (std::is_same::value) { + for (unsigned i = threadIdx.x * 2; i < (1 << PQ_BITS) * PQ_LEN; i += blockDim.x * 2) { + half2 buf2; + buf2.x = pq_code_book_ptr[i]; + buf2.y = pq_code_book_ptr[i + 1]; + (reinterpret_cast(smem_pq_code_book_ptr + i))[0] = buf2; + } + } else { + for (unsigned i = threadIdx.x; i < (1 << PQ_BITS) * PQ_LEN; i += blockDim.x) { + // TODO: vectorize + smem_pq_code_book_ptr[i] = pq_code_book_ptr[i]; + } + } + } + + cagra_q_dataset_descriptor_t(const std::uint8_t* encoded_dataset_ptr, + const std::uint32_t encoded_dataset_dim, + const std::uint32_t n_subspace, + const CODE_BOOK_T* const vq_code_book_ptr, + const float vq_scale, + const CODE_BOOK_T* const pq_code_book_ptr, + const float pq_scale, + const std::size_t size, + const std::uint32_t dim) + : dataset_descriptor_base_t(size, dim), + encoded_dataset_ptr(encoded_dataset_ptr), + encoded_dataset_dim(encoded_dataset_dim), + n_subspace(n_subspace), + vq_code_book_ptr(vq_code_book_ptr), + vq_scale(vq_scale), + pq_code_book_ptr(pq_code_book_ptr), + pq_scale(pq_scale) + { + } + + template + __device__ void copy_query(const DATA_T* const dmem_query_ptr, + QUERY_T* const smem_query_ptr, + const std::uint32_t query_smem_buffer_length) + { + constexpr spatial::knn::detail::utils::mapping mapping{}; + for (unsigned i = threadIdx.x * 2; i < dim; i += blockDim.x * 2) { + half2 buf2{0, 0}; + if (i < dim) { buf2.x = mapping(dmem_query_ptr[i]); } + if (i + 1 < dim) { buf2.y = mapping(dmem_query_ptr[i + 1]); } + if ((PQ_BITS == 8) && (PQ_LEN % 2 == 0)) { + // Use swizzling in the condition to reduce bank conflicts in shared + // memory, which are likely to occur when pq_code_book_dim is large. + ((half2*)smem_query_ptr)[device::swizzling(i / 2)] = + buf2; + } else { + (reinterpret_cast(smem_query_ptr + i))[0] = buf2; + } + } + } + + template + __device__ DISTANCE_T compute_similarity(const QUERY_T* const query_ptr, + const INDEX_T node_id, + const bool valid) const + { + float norm = 0; + if (valid) { + const unsigned lane_id = threadIdx.x % TEAM_SIZE; + const uint32_t vq_code = *(reinterpret_cast( + encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id))); + if (PQ_BITS == 8) { + for (uint32_t elem_offset = 0; elem_offset < dim; elem_offset += DATASET_BLOCK_DIM) { + constexpr unsigned vlen = 4; // **** DO NOT CHANGE **** + constexpr unsigned nelem = + raft::div_rounding_up_unsafe(DATASET_BLOCK_DIM / PQ_LEN, TEAM_SIZE * vlen); + // Loading PQ codes + uint32_t pq_codes[nelem]; +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset / PQ_LEN; + if (k >= n_subspace) break; + // Loading 4 x 8-bit PQ-codes using 32-bit load ops (from device memory) + pq_codes[e] = *(reinterpret_cast( + encoded_dataset_ptr + (static_cast(encoded_dataset_dim) * node_id) + + 4 + k)); + } + // + if constexpr ((std::is_same::value) && (PQ_LEN % 2 == 0)) { + // **** Use half2 for distance computation **** + half2 norm2{0, 0}; +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset / PQ_LEN; + if (k >= n_subspace) break; + // Loading VQ code-book + raft::TxN_t vq_vals[PQ_LEN]; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m += 1) { + const uint32_t d = (vlen * m) + (PQ_LEN * k); + if (d >= dim) break; + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); + } + // Compute distance + std::uint32_t pq_code = pq_codes[e]; +#pragma unroll + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_LEN * (v + k) >= dim) break; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m += 2) { + const std::uint32_t d1 = m + (PQ_LEN * v); + const std::uint32_t d = d1 + (PQ_LEN * k); + // Loading query vector in smem + half2 diff2 = (reinterpret_cast( + query_ptr))[device::swizzling(d / 2)]; + // Loading PQ code book in smem + diff2 -= *(reinterpret_cast( + smem_pq_code_book_ptr + (1 << PQ_BITS) * 2 * (m / 2) + (2 * (pq_code & 0xff)))); + diff2 -= vq_vals[d1 / vlen].val.data[(d1 % vlen) / 2]; + norm2 += diff2 * diff2; + } + pq_code >>= 8; + } + } + norm += static_cast(norm2.x + norm2.y); + } else { + // **** Use float for distance computation **** +#pragma unroll + for (std::uint32_t e = 0; e < nelem; e++) { + const std::uint32_t k = (lane_id + (TEAM_SIZE * e)) * vlen + elem_offset / PQ_LEN; + if (k >= n_subspace) break; + // Loading VQ code-book + raft::TxN_t vq_vals[PQ_LEN]; +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m++) { + const std::uint32_t d = (vlen * m) + (PQ_LEN * k); + if (d >= dim) break; + // Loading 4 x 8/16-bit VQ-values using 32/64-bit load ops (from L2$ or device + // memory) + vq_vals[m].load( + reinterpret_cast(vq_code_book_ptr + d + (dim * vq_code)), 0); + } + // Compute distance + std::uint32_t pq_code = pq_codes[e]; +#pragma unroll + for (std::uint32_t v = 0; v < vlen; v++) { + if (PQ_LEN * (v + k) >= dim) break; + raft::TxN_t pq_vals; + pq_vals.load( + reinterpret_cast(smem_pq_code_book_ptr + PQ_LEN * (pq_code & 0xff)), + 0); // (from L1$ or smem) +#pragma unroll + for (std::uint32_t m = 0; m < PQ_LEN; m++) { + const std::uint32_t d1 = m + (PQ_LEN * v); + const std::uint32_t d = d1 + (PQ_LEN * k); + // if (d >= dataset_dim) break; + DISTANCE_T diff = query_ptr[d]; // (from smem) + diff -= pq_scale * static_cast(pq_vals.data[m]); + diff -= vq_scale * static_cast(vq_vals[d1 / vlen].val.data[d1 % vlen]); + norm += diff * diff; + } + pq_code >>= 8; + } + } + } + } + } + } + for (uint32_t offset = TEAM_SIZE / 2; offset > 0; offset >>= 1) { + norm += __shfl_xor_sync(0xffffffff, norm, offset); + } + return norm; + } +}; + +} // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp index cd7469b55e..d4d69e6a67 100644 --- a/cpp/include/raft/neighbors/detail/cagra/device_common.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/device_common.hpp @@ -42,13 +42,17 @@ _RAFT_HOST_DEVICE inline uint64_t xorshift64(uint64_t u) return u * 0x2545F4914F6CDD1DULL; } -template +template _RAFT_DEVICE inline T swizzling(T x) { // Address swizzling reduces bank conflicts in shared memory, but increases // the amount of operation instead. // return x; - return x ^ (x >> 5); // "x" must be less than 1024 + if constexpr (X_MAX <= 1024) { + return (x) ^ ((x) >> 5); + } else { + return (x) ^ (((x) >> 5) & 0x1f); + } } } // namespace device diff --git a/cpp/include/raft/neighbors/detail/cagra/factory.cuh b/cpp/include/raft/neighbors/detail/cagra/factory.cuh index 0aee912e25..4944b57c46 100644 --- a/cpp/include/raft/neighbors/detail/cagra/factory.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/factory.cuh @@ -25,16 +25,18 @@ namespace raft::neighbors::cagra::detail { -template class factory { + using T = typename DATASET_DESCRIPTOR_T::DATA_T; + using IdxT = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DistanceT = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + public: /** * Create a search structure for dataset with dim features. */ - static std::unique_ptr> create( + static std::unique_ptr> create( raft::resources const& res, search_params const& params, int64_t dim, @@ -63,28 +65,28 @@ class factory { break; default: THROW("Incorrect dataset_block_dim (%lu)\n", plan.dataset_block_dim); } - return std::unique_ptr>(); + return std::unique_ptr>(); } private: template - static std::unique_ptr> dispatch_kernel( - raft::resources const& res, search_plan_impl_base& plan) + static std::unique_ptr> + dispatch_kernel(raft::resources const& res, search_plan_impl_base& plan) { if (plan.algo == search_algo::SINGLE_CTA) { - return std::unique_ptr>( + return std::unique_ptr>( new single_cta_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } else if (plan.algo == search_algo::MULTI_CTA) { - return std::unique_ptr>( + return std::unique_ptr>( new multi_cta_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } else { - return std::unique_ptr>( + return std::unique_ptr>( new multi_kernel_search:: - search( + search( res, plan, plan.dim, plan.graph_degree, plan.topk)); } } diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 1fcd159959..8192b1ae51 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -45,44 +45,46 @@ namespace multi_cta_search { template -struct search : public search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; +struct search : public search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; - using search_plan_impl::hash_bitlen; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; + using search_plan_impl::hash_bitlen; - using search_plan_impl::smem_size; + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_cta_per_query; rmm::device_uvector intermediate_indices; @@ -95,8 +97,7 @@ struct search : public search_plan_impl( - res, params, dim, graph_degree, topk), + : search_plan_impl(res, params, dim, graph_degree, topk), intermediate_indices(0, resource::get_cuda_stream(res)), intermediate_distances(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)) @@ -120,9 +121,11 @@ struct search : public search_plan_impl(dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + smem_size = sizeof(float) * query_smem_buffer_length + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + - sizeof(uint32_t) * search_width + sizeof(uint32_t); + sizeof(uint32_t) * search_width + sizeof(uint32_t) + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; RAFT_LOG_DEBUG("# smem_size: %u", smem_size); // @@ -191,22 +194,25 @@ struct search : public search_plan_impl dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk, - SAMPLE_FILTER_T sample_filter) + void operator()( + raft::resources const& res, + // raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view + graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const uint32_t num_queries, + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); - select_and_run( - dataset, + select_and_run( + dataset_desc, graph, intermediate_indices.data(), intermediate_distances.data(), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index 7a5ad17460..50f9e69593 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include // none_cagra_sample_filter #include // RAFT_EXPLICIT @@ -27,63 +28,66 @@ namespace multi_cta_search { template -void select_and_run(raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, - DISTANCE_T* const topk_distances_ptr, - const DATA_T* const queries_ptr, - const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, - uint32_t* const num_executed_iterations, - uint32_t topk, - uint32_t block_size, - uint32_t result_buffer_size, - uint32_t smem_size, - int64_t hash_bitlen, - INDEX_T* hashmap_ptr, - uint32_t num_cta_per_query, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, - uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, - SAMPLE_FILTER_T sample_filter, - cudaStream_t stream) RAFT_EXPLICIT; +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, + const uint32_t num_queries, + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, + uint32_t* const num_executed_iterations, + uint32_t topk, + uint32_t block_size, + uint32_t result_buffer_size, + uint32_t smem_size, + int64_t hash_bitlen, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, + uint32_t num_cta_per_query, + uint32_t num_random_samplings, + uint64_t rand_xor_mask, + uint32_t num_seeds, + size_t itopk_size, + size_t search_width, + size_t min_iterations, + size_t max_iterations, + SAMPLE_FILTER_T sample_filter, + cudaStream_t stream) RAFT_EXPLICIT; #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ + dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_kernel_selection( @@ -120,5 +124,292 @@ instantiate_kernel_selection( 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection + +#define instantiate_q_kernel_selection(TEAM_SIZE, \ + MAX_DATASET_DIM, \ + CODE_BOOK_T, \ + PQ_BITS, \ + PQ_CODE_BOOK_DIM, \ + DATA_T, \ + INDEX_T, \ + DISTANCE_T, \ + SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_q_kernel_selection( + 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection( + 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection( + 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(8, + 128, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(16, + 256, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 512, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_kernel_selection(32, + 1024, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_q_kernel_selection } // namespace multi_cta_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 30f56780d6..48c22d9d14 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -123,29 +123,26 @@ __device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] // // multiple CTAs per single query // -template __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( - INDEX_T* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] - DISTANCE_T* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const size_t dataset_dim, - const size_t dataset_size, - const size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const INDEX_T* const knn_graph, // [dataset_size, graph_degree] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] + DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const knn_graph, // [dataset_size, graph_degree] const uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const uint32_t hash_bitlen, const uint32_t itopk_size, const uint32_t search_width, @@ -154,6 +151,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( uint32_t* const num_executed_iterations, /* stats */ SAMPLE_FILTER_T sample_filter) { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; + const auto num_queries = gridDim.y; const auto query_id = blockIdx.y; const auto num_cta_per_query = gridDim.x; @@ -188,14 +190,20 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( assert(result_buffer_size_32 <= MAX_ELEMENTS); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - auto query_buffer = reinterpret_cast(smem); + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + auto query_buffer = reinterpret_cast(smem); auto result_indices_buffer = reinterpret_cast(query_buffer + query_smem_buffer_length); auto result_distances_buffer = reinterpret_cast(result_indices_buffer + result_buffer_size_32); auto parent_indices_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); - auto terminate_flag = reinterpret_cast(parent_indices_buffer + search_width); + auto distance_work_buffer_ptr = + reinterpret_cast(parent_indices_buffer + search_width); + auto terminate_flag = reinterpret_cast(distance_work_buffer_ptr + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte); + + // Set smem working buffer for the distance calculation + dataset_desc.set_smem_ptr(distance_work_buffer_ptr); #if 0 /* debug */ @@ -204,15 +212,10 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( result_distances_buffer[i] = utils::get_max_value(); } #endif - const DATA_T* const query_ptr = queries_ptr + (dataset_dim * query_id); - for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { - unsigned j = device::swizzling(i); - if (i < dataset_dim) { - query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); - } else { - query_buffer[j] = 0.0; - } - } + const DATA_T* const query_ptr = queries_ptr + (dataset_desc.dim * query_id); + dataset_desc.template copy_query( + query_ptr, query_buffer, query_smem_buffer_length); + if (threadIdx.x == 0) { terminate_flag[0] = 0; } INDEX_T* const local_visited_hashmap_ptr = visited_hashmap_ptr + (hashmap::get_size(hash_bitlen) * query_id); @@ -224,23 +227,19 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; uint32_t block_id = cta_id + (num_cta_per_query * query_id); uint32_t num_blocks = num_cta_per_query * num_queries; - device::compute_distance_to_random_nodes( - result_indices_buffer, - result_distances_buffer, - query_buffer, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen, - block_id, - num_blocks); + device::compute_distance_to_random_nodes(result_indices_buffer, + result_distances_buffer, + query_buffer, + dataset_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen, + block_id, + num_blocks); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -272,13 +271,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( _CLK_START(); // constexpr unsigned max_n_frags = 16; constexpr unsigned max_n_frags = 0; - device::compute_distance_to_child_nodes( + device::compute_distance_to_child_nodes( result_indices_buffer + itopk_size, result_distances_buffer + itopk_size, query_buffer, - dataset_ptr, - dataset_dim, - dataset_ld, + dataset_desc, knn_graph, graph_degree, local_visited_hashmap_ptr, @@ -398,53 +395,35 @@ void set_value_batch(T* const dev_ptr, <<>>(dev_ptr, ld, val, count, batch_size); } -template struct search_kernel_config { // Search kernel function type. Note that the actual values for the template value // parameters do not matter, because they are not part of the function signature. The // second to fourth value parameters will be selected by the choose_* functions below. using kernel_t = decltype(&search_kernel); static auto choose_buffer_size(unsigned result_buffer_size, unsigned block_size) -> kernel_t { if (result_buffer_size <= 64) { - return search_kernel; + return search_kernel; } else if (result_buffer_size <= 128) { return search_kernel; } else if (result_buffer_size <= 256) { return search_kernel; } THROW("Result buffer size %u larger than max buffer size %u", result_buffer_size, 256); @@ -453,26 +432,24 @@ struct search_kernel_config { template -void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, // multi_cta_search (params struct) uint32_t block_size, // uint32_t result_buffer_size, uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, uint32_t num_cta_per_query, uint32_t num_random_samplings, uint64_t rand_xor_mask, @@ -485,19 +462,20 @@ void select_and_run( // raft::resources const& res, cudaStream_t stream) { auto kernel = - search_kernel_config::choose_buffer_size(result_buffer_size, block_size); - - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + search_kernel_config:: + choose_buffer_size(result_buffer_size, block_size); + + RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte)); // Initialize hash table const uint32_t hash_size = hashmap::get_size(hash_bitlen); - set_value_batch( - hashmap_ptr, hash_size, utils::get_max_value(), hash_size, num_queries, stream); + set_value_batch(hashmap_ptr, + hash_size, + utils::get_max_value(), + hash_size, + num_queries, + stream); dim3 block_dims(block_size, 1, 1); dim3 grid_dims(num_cta_per_query, num_queries, 1); @@ -508,10 +486,7 @@ void select_and_run( // raft::resources const& res, smem_size); kernel<<>>(topk_indices_ptr, topk_distances_ptr, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index e4a30675bb..10788da432 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -16,6 +16,7 @@ #pragma once #include "compute_distance.hpp" +#include "compute_distance_vpq.cuh" #include "device_common.hpp" #include "hashmap.hpp" #include "search_plan.cuh" @@ -86,27 +87,25 @@ void get_value(T* const host_ptr, const T* const dev_ptr, cudaStream_t cuda_stre } // MAX_DATASET_DIM : must equal to or greater than dataset_dim -template -RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const std::size_t num_pickup, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const result_indices_ptr, // [num_queries, ldr] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] - const std::uint32_t ldr, // (*) ldr >= num_pickup - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] - const std::uint32_t hash_bitlen) +template +RAFT_KERNEL random_pickup_kernel( + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const std::size_t num_pickup, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldr] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] + const std::uint32_t ldr, // (*) ldr >= num_pickup + typename DATASET_DESCRIPTOR_T::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + const std::uint32_t hash_bitlen) { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + const auto ldb = hashmap::get_size(hash_bitlen); const auto global_team_index = (blockIdx.x * blockDim.x + threadIdx.x) / TEAM_SIZE; const uint32_t query_id = blockIdx.y; @@ -114,19 +113,17 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s // Load a query extern __shared__ float query_buffer[]; const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; for (uint32_t i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { - query_buffer[j] = - spatial::knn::detail::utils::mapping{}((queries_ptr + query_id * dataset_dim)[i]); + if (i < dataset_desc.dim) { + query_buffer[j] = spatial::knn::detail::utils::mapping{}( + (queries_ptr + query_id * dataset_desc.dim)[i]); } else { query_buffer[j] = 0.0; } } __syncthreads(); - device::distance_op dist_op( - query_buffer); INDEX_T best_index_team_local; DISTANCE_T best_norm2_team_local = utils::get_max_value(); @@ -136,10 +133,12 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s seed_index = seed_ptr[global_team_index + (num_seeds * query_id)]; } else { // Chose a seed node randomly - seed_index = device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_size; + seed_index = + device::xorshift64((global_team_index ^ rand_xor_mask) * (i + 1)) % dataset_desc.size; } - const auto norm2 = dist_op(dataset_ptr + (dataset_ld * seed_index), dataset_dim, true); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, seed_index, true); if (norm2 < best_norm2_team_local) { best_norm2_team_local = norm2; @@ -161,28 +160,22 @@ RAFT_KERNEL random_pickup_kernel(const DATA_T* const dataset_ptr, // [dataset_s } // MAX_DATASET_DIM : must be equal to or greater than dataset_dim -template -void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const std::size_t num_queries, - const std::size_t num_pickup, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const result_indices_ptr, // [num_queries, ldr] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] - const std::size_t ldr, // (*) ldr >= num_pickup - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] - const std::uint32_t hash_bitlen, - cudaStream_t const cuda_stream = 0) +template +void random_pickup( + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const std::size_t num_queries, + const std::size_t num_pickup, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldr] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] + const std::size_t ldr, // (*) ldr >= num_pickup + typename DATASET_DESCRIPTOR_T::INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << bitlen] + const std::uint32_t hash_bitlen, + cudaStream_t const cuda_stream = 0) { const auto block_size = 256u; const auto num_teams_per_threadblock = block_size / TEAM_SIZE; @@ -190,14 +183,11 @@ void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_d num_queries); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; const auto smem_size = query_smem_buffer_length * sizeof(float); - random_pickup_kernel - <<>>(dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, + random_pickup_kernel + <<>>(dataset_desc, queries_ptr, num_pickup, num_distilation, @@ -313,30 +303,33 @@ void pickup_next_parents(INDEX_T* const parent_candidates_ptr, // [num_queries, template RAFT_KERNEL compute_distance_to_child_nodes_kernel( - const INDEX_T* const parent_node_list, // [num_queries, search_width] - INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] - DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_node_list, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_candidates_ptr, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const std::uint32_t search_width, - const DATA_T* const dataset_ptr, // [dataset_size, data_dim] - const std::uint32_t dataset_dim, - const std::uint32_t dataset_size, - const std::uint32_t dataset_ld, - const INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, - const DATA_T* query_ptr, // [num_queries, data_dim] - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const typename DATASET_DESCRIPTOR_T::DATA_T* query_ptr, // [num_queries, data_dim] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t hash_bitlen, - INDEX_T* const result_indices_ptr, // [num_queries, ldd] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldd] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree SAMPLE_FILTER_T sample_filter) { + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; const auto global_team_id = tid / TEAM_SIZE; @@ -344,12 +337,12 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( extern __shared__ float query_buffer[]; const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; for (uint32_t i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { unsigned j = device::swizzling(i); - if (i < dataset_dim) { + if (i < dataset_desc.dim) { query_buffer[j] = - spatial::knn::detail::utils::mapping{}((query_ptr + query_id * dataset_dim)[i]); + spatial::knn::detail::utils::mapping{}((query_ptr + query_id * dataset_desc.dim)[i]); } else { query_buffer[j] = 0.0; } @@ -357,9 +350,6 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( __syncthreads(); if (global_team_id >= search_width * graph_degree) { return; } - device::distance_op dist_op( - query_buffer); - const std::size_t parent_list_index = parent_node_list[global_team_id / graph_degree + (search_width * blockIdx.y)]; @@ -381,8 +371,8 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( const auto compute_distance_flag = hashmap::insert( visited_hashmap_ptr + (ldb * blockIdx.y), hash_bitlen, child_id); - const auto norm2 = - dist_op(dataset_ptr + (dataset_ld * child_id), dataset_dim, compute_distance_flag); + const auto norm2 = dataset_desc.template compute_similarity( + query_buffer, child_id, compute_distance_flag); if (compute_distance_flag) { if (threadIdx.x % TEAM_SIZE == 0) { @@ -407,29 +397,29 @@ RAFT_KERNEL compute_distance_to_child_nodes_kernel( template + class SAMPLE_FILTER_T, + class DATASET_DESCRIPTOR_T> void compute_distance_to_child_nodes( - const INDEX_T* const parent_node_list, // [num_queries, search_width] - INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] - DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_node_list, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + parent_candidates_ptr, // [num_queries, search_width] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const + parent_distance_ptr, // [num_queries, search_width] const std::size_t lds, const uint32_t search_width, - const DATA_T* const dataset_ptr, // [dataset_size, data_dim] - const std::uint32_t dataset_dim, - const std::uint32_t dataset_size, - const std::uint32_t dataset_ld, - const INDEX_T* const neighbor_graph_ptr, // [dataset_size, graph_degree] + const DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::INDEX_T* const + neighbor_graph_ptr, // [dataset_size, graph_degree] const std::uint32_t graph_degree, - const DATA_T* query_ptr, // [num_queries, data_dim] + const typename DATASET_DESCRIPTOR_T::DATA_T* query_ptr, // [num_queries, data_dim] const std::uint32_t num_queries, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t hash_bitlen, - INDEX_T* const result_indices_ptr, // [num_queries, ldd] - DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, ldd] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream = 0) { @@ -439,20 +429,21 @@ void compute_distance_to_child_nodes( num_queries); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - const auto smem_size = query_smem_buffer_length * sizeof(float); + const auto smem_size = + query_smem_buffer_length * sizeof(float) + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; - compute_distance_to_child_nodes_kernel + compute_distance_to_child_nodes_kernel <<>>(parent_node_list, parent_candidates_ptr, parent_distance_ptr, lds, search_width, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, + dataset_desc, neighbor_graph_ptr, graph_degree, query_ptr, @@ -609,47 +600,51 @@ void set_value_batch(T* const dev_ptr, // |<--- result_buffer_size --->| // Double buffer (B) template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; +struct search : search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + static_assert(std::is_same_v, "Only float is supported as resulting distance"); + + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; size_t result_buffer_allocation_size; - rmm::device_uvector result_indices; // results_indices_buffer - rmm::device_uvector result_distances; // result_distances_buffer + rmm::device_uvector result_indices; // results_indices_buffer + rmm::device_uvector result_distances; // result_distances_buffer rmm::device_uvector parent_node_list; rmm::device_uvector topk_hint; rmm::device_scalar terminate_flag; // dev_terminate_flag, host_terminate_flag.; @@ -666,8 +661,7 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl( - res, params, dim, graph_degree, topk), + : search_plan_impl(res, params, dim, graph_degree, topk), result_indices(0, resource::get_cuda_stream(res)), result_distances(0, resource::get_cuda_stream(res)), parent_node_list(0, resource::get_cuda_stream(res)), @@ -800,7 +794,7 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] @@ -828,24 +822,20 @@ struct search : search_plan_impl { } // Choose initial entry point candidates at random - random_pickup( - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), - queries_ptr, - num_queries, - result_buffer_size, - num_random_samplings, - rand_xor_mask, - dev_seed_ptr, - num_seeds, - result_indices.data(), - result_distances.data(), - result_buffer_allocation_size, - hashmap.data(), - hash_bitlen, - stream); + random_pickup(dataset_desc, + queries_ptr, + num_queries, + result_buffer_size, + num_random_samplings, + rand_xor_mask, + dev_seed_ptr, + num_seeds, + result_indices.data(), + result_distances.data(), + result_buffer_allocation_size, + hashmap.data(), + hash_bitlen, + stream); unsigned iter = 0; while (1) { @@ -897,16 +887,13 @@ struct search : search_plan_impl { } // Compute distance to child nodes that are adjacent to the parent node - compute_distance_to_child_nodes( + compute_distance_to_child_nodes( parent_node_list.data(), result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, result_buffer_allocation_size, search_width, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc, graph.data_handle(), graph.extent(1), queries_ptr, @@ -993,5 +980,68 @@ struct search : search_plan_impl { } }; +template +struct search, + SAMPLE_FILTER_T> + : public search_plan_impl, + SAMPLE_FILTER_T> { + using DATASET_DESCRIPTOR_T = cagra_q_dataset_descriptor_t; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + search(raft::resources const& res, + search_params params, + int64_t dim, + int64_t graph_degree, + uint32_t topk) + : search_plan_impl(res, params, dim, graph_degree, topk) + { + THROW("The multi-kernel mode does not support VPQ"); + } + + void set_params(raft::resources const& res) {} + + void operator()(raft::resources const& res, + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const uint32_t num_queries, + const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] + uint32_t topk, + SAMPLE_FILTER_T sample_filter) + { + } +}; + } // namespace multi_kernel_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 11ef7e5211..be5ac0554f 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -71,8 +71,12 @@ struct search_plan_impl_base : public search_params { } }; -template +template struct search_plan_impl : public search_plan_impl_base { + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + int64_t hash_bitlen; size_t small_hash_bitlen; @@ -111,7 +115,7 @@ struct search_plan_impl : public search_plan_impl_base { virtual ~search_plan_impl() {} virtual void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index f1e74ee7a5..4430b929fb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -48,43 +48,45 @@ namespace single_cta_search { template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; +struct search : search_plan_impl { + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_itopk_candidates; @@ -93,8 +95,7 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl( - res, params, dim, graph_degree, topk) + : search_plan_impl(res, params, dim, graph_degree, topk) { set_params(res); } @@ -128,7 +129,8 @@ struct search : search_plan_impl { sizeof(float) * query_smem_buffer_length + (sizeof(INDEX_T) + sizeof(DISTANCE_T)) * result_buffer_size_32 + sizeof(INDEX_T) * hashmap::get_size(small_hash_bitlen) + sizeof(INDEX_T) * search_width + - sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t); + sizeof(std::uint32_t) * topk_ws_size + sizeof(std::uint32_t) + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte; smem_size = base_smem_size; if (num_itopk_candidates > 256) { // Tentatively calculate the required share memory size when radix @@ -205,7 +207,7 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - raft::device_matrix_view dataset, + DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] @@ -217,8 +219,8 @@ struct search : search_plan_impl { SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); - select_and_run( - dataset, + select_and_run( + dataset_desc, graph, result_indices_ptr, result_distances_ptr, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index fef060ffee..a836334667 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -27,25 +27,23 @@ namespace single_cta_search { template void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, uint32_t num_itopk_candidates, uint32_t block_size, uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, uint32_t num_random_samplings, @@ -60,34 +58,38 @@ void select_and_run( // raft::resources const& res, #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void select_and_run< \ + TEAM_SIZE, \ + MAX_DATASET_DIM, \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::standard_dataset_descriptor_t \ + dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); instantiate_single_cta_select_and_run( @@ -125,5 +127,473 @@ instantiate_single_cta_select_and_run( #undef instantiate_single_cta_select_and_run +#define instantiate_q_single_cta_select_and_run(TEAM_SIZE, \ + MAX_DATASET_DIM, \ + CODE_BOOK_T, \ + PQ_BITS, \ + PQ_CODE_BOOK_DIM, \ + DATA_T, \ + INDEX_T, \ + DISTANCE_T, \ + SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + half, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + float, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 1024, half, 8, 2, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 1024, half, 8, 4, half, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 2, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + float, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 16, 256, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 32, 512, half, 8, 4, float, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + float, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + uint8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + int8_t, + uint32_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(8, + 128, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + uint8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 2, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 2, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run( + 8, 128, half, 8, 4, int8_t, int64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(16, + 256, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 512, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_q_single_cta_select_and_run(32, + 1024, + half, + 8, + 4, + int8_t, + int64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_q_single_cta_select_and_run + } // namespace single_cta_search } // namespace raft::neighbors::cagra::detail diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 652115928b..a697f9512c 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -456,42 +456,44 @@ __device__ inline void set_value_device(T* const ptr, const T fill, const std::u } // One query one thread block -template -__launch_bounds__(1024, 1) RAFT_KERNEL - search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] - DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] - const std::uint32_t top_k, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] - const std::size_t dataset_dim, - const std::size_t dataset_size, - const std::size_t dataset_ld, // stride of dataset - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] - const INDEX_T* const knn_graph, // [dataset_size, graph_degree] - const std::uint32_t graph_degree, - const unsigned num_distilation, - const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] - const uint32_t num_seeds, - INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] - const std::uint32_t internal_topk, - const std::uint32_t search_width, - const std::uint32_t min_iteration, - const std::uint32_t max_iteration, - std::uint32_t* const num_executed_iterations, // [num_queries] - const std::uint32_t hash_bitlen, - const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval, - SAMPLE_FILTER_T sample_filter) +__launch_bounds__(1024, 1) RAFT_KERNEL search_kernel( + typename DATASET_DESCRIPTOR_T::INDEX_T* const result_indices_ptr, // [num_queries, top_k] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] + const std::uint32_t top_k, + DATASET_DESCRIPTOR_T dataset_desc, + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] + const typename DATASET_DESCRIPTOR_T::INDEX_T* const knn_graph, // [dataset_size, graph_degree] + const std::uint32_t graph_degree, + const unsigned num_distilation, + const uint64_t rand_xor_mask, + const typename DATASET_DESCRIPTOR_T::INDEX_T* seed_ptr, // [num_queries, num_seeds] + const uint32_t num_seeds, + typename DATASET_DESCRIPTOR_T::INDEX_T* const + visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] + const std::uint32_t internal_topk, + const std::uint32_t search_width, + const std::uint32_t min_iteration, + const std::uint32_t max_iteration, + std::uint32_t* const num_executed_iterations, // [num_queries] + const std::uint32_t hash_bitlen, + const std::uint32_t small_hash_bitlen, + const std::uint32_t small_hash_reset_interval, + SAMPLE_FILTER_T sample_filter) { - using LOAD_T = device::LOAD_128BIT_T; + using LOAD_T = device::LOAD_128BIT_T; + + using DATA_T = typename DATASET_DESCRIPTOR_T::DATA_T; + using INDEX_T = typename DATASET_DESCRIPTOR_T::INDEX_T; + using DISTANCE_T = typename DATASET_DESCRIPTOR_T::DISTANCE_T; + using QUERY_T = typename DATASET_DESCRIPTOR_T::QUERY_T; + const auto query_id = blockIdx.y; #ifdef _CLK_BREAKDOWN @@ -525,30 +527,31 @@ __launch_bounds__(1024, 1) RAFT_KERNEL const auto small_hash_size = hashmap::get_size(small_hash_bitlen); const auto query_smem_buffer_length = - raft::ceildiv(dataset_dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; - auto query_buffer = reinterpret_cast(smem); + raft::ceildiv(dataset_desc.dim, DATASET_BLOCK_DIM) * DATASET_BLOCK_DIM; + auto query_buffer = reinterpret_cast(smem); auto result_indices_buffer = reinterpret_cast(query_buffer + query_smem_buffer_length); auto result_distances_buffer = reinterpret_cast(result_indices_buffer + result_buffer_size_32); auto visited_hash_buffer = reinterpret_cast(result_distances_buffer + result_buffer_size_32); auto parent_list_buffer = reinterpret_cast(visited_hash_buffer + small_hash_size); - auto topk_ws = reinterpret_cast(parent_list_buffer + search_width); - auto terminate_flag = reinterpret_cast(topk_ws + 3); - auto smem_working_ptr = reinterpret_cast(terminate_flag + 1); + auto distance_work_buffer_ptr = + reinterpret_cast(parent_list_buffer + search_width); + auto topk_ws = reinterpret_cast(distance_work_buffer_ptr + + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte); + auto terminate_flag = reinterpret_cast(topk_ws + 3); + auto smem_work_ptr = reinterpret_cast(terminate_flag + 1); + + // Set smem working buffer for the distance calculation + dataset_desc.set_smem_ptr(distance_work_buffer_ptr); // A flag for filtering. auto filter_flag = terminate_flag; - const DATA_T* const query_ptr = queries_ptr + query_id * dataset_dim; - for (unsigned i = threadIdx.x; i < query_smem_buffer_length; i += blockDim.x) { - unsigned j = device::swizzling(i); - if (i < dataset_dim) { - query_buffer[j] = spatial::knn::detail::utils::mapping{}(query_ptr[i]); - } else { - query_buffer[j] = 0.0; - } - } + const DATA_T* const query_ptr = queries_ptr + query_id * dataset_desc.dim; + dataset_desc.template copy_query( + query_ptr, query_buffer, query_smem_buffer_length); + if (threadIdx.x == 0) { terminate_flag[0] = 0; topk_ws[0] = ~0u; @@ -568,21 +571,17 @@ __launch_bounds__(1024, 1) RAFT_KERNEL // compute distance to randomly selecting nodes _CLK_START(); const INDEX_T* const local_seed_ptr = seed_ptr ? seed_ptr + (num_seeds * query_id) : nullptr; - device::compute_distance_to_random_nodes( - result_indices_buffer, - result_distances_buffer, - query_buffer, - dataset_ptr, - dataset_dim, - dataset_size, - dataset_ld, - result_buffer_size, - num_distilation, - rand_xor_mask, - local_seed_ptr, - num_seeds, - local_visited_hashmap_ptr, - hash_bitlen); + device::compute_distance_to_random_nodes(result_indices_buffer, + result_distances_buffer, + query_buffer, + dataset_desc, + result_buffer_size, + num_distilation, + rand_xor_mask, + local_seed_ptr, + num_seeds, + local_visited_hashmap_ptr, + hash_bitlen); __syncthreads(); _CLK_REC(clk_compute_1st_distance); @@ -667,7 +666,7 @@ __launch_bounds__(1024, 1) RAFT_KERNEL nullptr, topk_ws, true, - reinterpret_cast(smem_working_ptr)); + reinterpret_cast(smem_work_ptr)); _CLK_REC(clk_topk); // reset small-hash table @@ -688,7 +687,7 @@ __launch_bounds__(1024, 1) RAFT_KERNEL parent_list_buffer, result_indices_buffer, internal_topk, - dataset_size, + dataset_desc.size, search_width); _CLK_REC(clk_pickup_parents); } @@ -708,13 +707,11 @@ __launch_bounds__(1024, 1) RAFT_KERNEL // compute the norms between child nodes and query node _CLK_START(); constexpr unsigned max_n_frags = 8; - device::compute_distance_to_child_nodes( + device::compute_distance_to_child_nodes( result_indices_buffer + internal_topk, result_distances_buffer + internal_topk, query_buffer, - dataset_ptr, - dataset_dim, - dataset_ld, + dataset_desc, knn_graph, graph_degree, local_visited_hashmap_ptr, @@ -814,50 +811,53 @@ __launch_bounds__(1024, 1) RAFT_KERNEL #endif } -template struct search_kernel_config { - using kernel_t = - decltype(&search_kernel); + using kernel_t = decltype(&search_kernel); template static auto choose_search_kernel(unsigned itopk_size) -> kernel_t { if (itopk_size <= 64) { - return search_kernel; + return search_kernel; } else if (itopk_size <= 128) { return search_kernel; } else if (itopk_size <= 256) { return search_kernel; } else if (itopk_size <= 512) { return search_kernel; } THROW("No kernel for parametels itopk_size %u, max_candidates %u", itopk_size, MAX_CANDIDATES); @@ -878,9 +878,21 @@ struct search_kernel_config { // Radix-based topk is used constexpr unsigned max_candidates = 32; // to avoid build failure if (itopk_size <= 256) { - return search_kernel; + return search_kernel; } else if (itopk_size <= 512) { - return search_kernel; + return search_kernel; } } THROW("No kernel for parametels itopk_size %u, num_itopk_candidates %u", @@ -891,25 +903,23 @@ struct search_kernel_config { template -void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] +void select_and_run( + DATASET_DESCRIPTOR_T dataset_desc, + raft::device_matrix_view graph, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] - uint32_t* const num_executed_iterations, // [num_queries,] + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] uint32_t topk, uint32_t num_itopk_candidates, uint32_t block_size, // uint32_t smem_size, int64_t hash_bitlen, - INDEX_T* hashmap_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, uint32_t num_random_samplings, @@ -923,16 +933,11 @@ void select_and_run( // raft::resources const& res, cudaStream_t stream) { auto kernel = - search_kernel_config::choose_itopk_and_mx_candidates(itopk_size, - num_itopk_candidates, - block_size); - RAFT_CUDA_TRY( - cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); + search_kernel_config:: + choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); + RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, + cudaFuncAttributeMaxDynamicSharedMemorySize, + smem_size + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte)); dim3 thread_dims(block_size, 1, 1); dim3 block_dims(1, num_queries, 1); RAFT_LOG_DEBUG( @@ -940,10 +945,7 @@ void select_and_run( // raft::resources const& res, kernel<<>>(topk_indices_ptr, topk_distances_ptr, topk, - dataset.data_handle(), - dataset.extent(1), - dataset.extent(0), - dataset.stride(0), + dataset_desc, queries_ptr, graph.data_handle(), graph.extent(1), diff --git a/cpp/include/raft/neighbors/detail/cagra/utils.hpp b/cpp/include/raft/neighbors/detail/cagra/utils.hpp index 7e403abe91..265cbfdceb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/utils.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/utils.hpp @@ -111,6 +111,11 @@ _RAFT_HOST_DEVICE constexpr unsigned size_of() { return 2; } +template <> +_RAFT_HOST_DEVICE constexpr unsigned size_of() +{ + return 4; +} // max values for data types template diff --git a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp index 69d2bd29b2..f5c8c73bb9 100644 --- a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp +++ b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp @@ -54,6 +54,7 @@ template distance::DistanceType metric); instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine(uint32_t, float, float, int64_t); instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); diff --git a/cpp/include/raft/neighbors/detail/vpq_dataset.cuh b/cpp/include/raft/neighbors/detail/vpq_dataset.cuh index f6cd2a1ceb..f1321ba343 100644 --- a/cpp/include/raft/neighbors/detail/vpq_dataset.cuh +++ b/cpp/include/raft/neighbors/detail/vpq_dataset.cuh @@ -81,7 +81,7 @@ auto fill_missing_params_heuristics(const vpq_params& params, const DatasetT& da vpq_params r = params; double n_rows = dataset.extent(0); size_t dim = dataset.extent(1); - if (r.pq_dim == 0) { r.pq_dim = raft::div_rounding_up_safe(dim, size_t{4}); } + if (r.pq_dim == 0) { r.pq_dim = raft::div_rounding_up_safe(dim, size_t{2}); } if (r.pq_bits == 0) { r.pq_bits = 8; } if (r.vq_n_centers == 0) { r.vq_n_centers = raft::round_up_safe(std::sqrt(n_rows), 8); } if (r.vq_kmeans_trainset_fraction == 0) { diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh index fc57494b22..7948a0e4f2 100644 --- a/cpp/include/raft/neighbors/refine-ext.cuh +++ b/cpp/include/raft/neighbors/refine-ext.cuh @@ -52,7 +52,7 @@ void refine(raft::resources const& handle, #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ +#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ extern template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ @@ -60,8 +60,9 @@ void refine(raft::resources const& handle, raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - raft::distance::DistanceType metric); \ - \ + raft::distance::DistanceType metric); + +#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ extern template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::host_matrix_view dataset, \ @@ -71,8 +72,14 @@ void refine(raft::resources const& handle, raft::host_matrix_view distances, \ raft::distance::DistanceType metric); -instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, int8_t, float, int64_t); -instantiate_raft_neighbors_refine(int64_t, uint8_t, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, int8_t, float, int64_t); +instantiate_raft_neighbors_refine_d(int64_t, uint8_t, float, int64_t); + +instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, int8_t, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, uint8_t, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_raft_neighbors_refine_d +#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py new file mode 100644 index 0000000000..e827c06be5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_00_generate.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +""" + +trailer = """ +} // namespace raft::neighbors::cagra::detail::multi_cta_search +""" + +mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] +pq_bits = [8] +subspace_dims = [2, 4] +# block = [(64, 16), (128, 8), (256, 4), (512, 2), (1024, 1)] +# mxelem = [64, 128, 256] +load_types = ["uint4"] +code_book_types = ["half"] +search_types = dict( + float_uint32=( + "float", + "uint32_t", + "float", + ), # data_t, vec_idx_t, distance_t + half_uint32=("half", "uint32_t", "float"), + int8_uint32=("int8_t", "uint32_t", "float"), + uint8_uint32=("uint8_t", "uint32_t", "float"), + float_uint64=("float", "uint64_t", "float"), + half_uint64=("half", "uint64_t", "float"), +) +# knn +for type_path, (data_t, idx_t, distance_t) in search_types.items(): + for (mxdim, team) in mxdim_team: + for code_book_t in code_book_types: + for subspace_dim in subspace_dims: + for pq_bit in pq_bits: + path = f"q_search_multi_cta_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{subspace_dim}subd_{code_book_t}.cu" + with open(path, "w") as f: + f.write(header) + f.write( + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA {distance_t} COMMA {idx_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + ) + f.write(trailer) + # For pasting into CMakeLists.txt + print(f"src/neighbors/detail/cagra/{path}") diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..0bd386144c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..cd891b8e97 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..66e8357498 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..eb84983f9e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..c66f8a0ae3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..2a1783944c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..9fa74f1134 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..8fc91b5a10 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..4e68c00525 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..5fe526ae47 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..64c89a880a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..c3e2427f57 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..0a8826df1c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..8019bec3e3 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..1a2a364037 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..2f661538e6 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..aec486769f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..03f27085d8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..119d1f2921 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..666c676e87 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..e53b456a54 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..2aee739141 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..daa442b514 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a19346d19b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..1c1d5381c9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..b7402a3c38 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..f493b83bee --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..8efcbe0650 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..cb770f44ba --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..0fd8ab809c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..50cf198883 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..1548ed831e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..c60ea7c87d --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..4a68e1e43c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..df9fabd6a5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..77075b0a44 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..374af8b56b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..ddb80458fd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..14e5c5d3dc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..3c1776760a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..e5a0a8882c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..cee80390e8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..88678bf4ff --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..baa7ee358a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..5c44f052f2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..127a065fb5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..fcf6985f97 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..f361e771b5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_multi_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_multi_cta_00_generate.py + * + */ + +#include "search_multi_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py new file mode 100644 index 0000000000..418d528a82 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_00_generate.py @@ -0,0 +1,89 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +header = """/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +""" + +trailer = """ +} // namespace raft::neighbors::cagra::detail::single_cta_search +""" + +mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] +# block = [(64, 16), (128, 8), (256, 4), (512, 2), (1024, 1)] +# itopk_candidates = [64, 128, 256] +# itopk_size = [64, 128, 256, 512] +# mxelem = [64, 128, 256] + +pq_bits = [8] +subspace_dims = [2, 4] + +# rblock = [(256, 4), (512, 2), (1024, 1)] +# rcandidates = [32] +# rsize = [256, 512] +code_book_types = ["half"] + +search_types = dict( + float_uint32=("float", "uint32_t", "float"), # data_t, idx_t, distance_t + half_uint32=("half", "uint32_t", "float"), + int8_uint32=("int8_t", "uint32_t", "float"), + uint8_uint32=("uint8_t", "uint32_t", "float"), + float_uint64=("float", "uint64_t", "float"), + half_uint64=("half", "uint64_t", "float"), +) + +# knn +for type_path, (data_t, idx_t, distance_t) in search_types.items(): + for (mxdim, team) in mxdim_team: + for code_book_t in code_book_types: + for subspace_dim in subspace_dims: + for pq_bit in pq_bits: + path = f"q_search_single_cta_{type_path}_dim{mxdim}_t{team}_{pq_bit}pq_{subspace_dim}subd_{code_book_t}.cu" + with open(path, "w") as f: + f.write(header) + f.write( + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t<{data_t} COMMA {code_book_t} COMMA {pq_bit} COMMA {subspace_dim} COMMA {distance_t} COMMA {idx_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" + ) + + f.write(trailer) + # For pasting into CMakeLists.txt + print(f"src/neighbors/detail/cagra/{path}") diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..d61ad0ce15 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..410d2377ec --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..60cd58bab9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..dfe5e6f14e --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..9a5d862276 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..d92ab50a58 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..aac197d590 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..f38a10e6d0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..5523e63038 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..b06ef3d4fd --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..1fddee0e06 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..2aee442186 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..7a15e85280 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..efba46c248 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..990582f18b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a55907c66f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + float COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..55fd749720 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..4b4063652a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..bae83dc0fa --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..99492db344 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..797142e317 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..9a36c35ae0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..e0a01e84cc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..14de1b8941 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..b1d50fb445 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..c189a91764 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..8693ee3716 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..216ffd1ec5 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..36985d218b --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..8d55fe2b09 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..2fdb1cbc20 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..6dc3dc2ca8 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + half COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint64_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..21f8633033 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..1a3867e06f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..9cbb16188a --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..305a1754bc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..900e1b69d9 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..a0bb2259f0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..09d36a39a0 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..dc9cbb2b56 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + int8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..c5508a38e2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..7024425155 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 1024, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu new file mode 100644 index 0000000000..68687bc9cf --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu new file mode 100644 index 0000000000..60efc55a30 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(8, + 128, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu new file mode 100644 index 0000000000..b2dfaac5fe --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu new file mode 100644 index 0000000000..891e9ef7cc --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(16, + 256, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu new file mode 100644 index 0000000000..91e617204c --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 2 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu new file mode 100644 index 0000000000..a01d497676 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by q_search_single_cta_00_generate.py + * + * Make changes there and run in this directory: + * + * > python q_search_single_cta_00_generate.py + * + */ + +#include "search_single_cta.cuh" + +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection(32, + 512, + raft::neighbors::cagra::detail::cagra_q_dataset_descriptor_t< + uint8_t COMMA half COMMA 8 COMMA 4 COMMA float COMMA uint32_t>, + raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh new file mode 100644 index 0000000000..179bf8f20f --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace raft::neighbors::cagra::detail::multi_cta_search { + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +#define COMMA , + +} // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 6f8766c86b..6f023c39f1 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,45 +37,14 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t block_size, \\ - uint32_t result_buffer_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - uint32_t num_cta_per_query, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { """ trailer = """ -#undef instantiate_kernel_selection - } // namespace raft::neighbors::cagra::detail::multi_cta_search """ @@ -103,7 +71,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 1a3b2284bd..0e28d7a876 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index 36e86d9ed6..5e5e80a5de 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index 6f1af2d93f..9039496968 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index 1279f8e415..fe1c7e77e5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index 0dabff0df5..7ef36baf7d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index 72bb74cdb8..da51c16314 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index dceea10b5d..99a4f7feb7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index acb8bd6a12..50cdc97dd7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu index fa89bca45f..b2d9cdb600 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu index 645ca61ff5..d756b295b7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu index 41b6f9b420..b1e998762c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu index 38f0ac3b04..e712de6390 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu index c462a9d359..282de4a851 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu index f5b2874e20..71ef968575 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu index 0b01428b86..7c88406d71 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu index 70228a129d..360635dddb 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_half_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index 0254f09ff0..3f129bd7cf 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index 2b67e7e968..053b73275e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index 17d6722e58..a1bb20369a 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 38f02812e2..dbbc8bdd21 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index fa111196c6..125499e319 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 1ef3c28aa3..f2117c4f80 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index d26cb44843..8e5ba0f98f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 4d4322f261..bea7d25392 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,43 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::multi_cta_search { +#include "search_multi_cta.cuh" -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::multi_cta_search { instantiate_kernel_selection( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_kernel_selection + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::multi_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh new file mode 100644 index 0000000000..7fb705a2d2 --- /dev/null +++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +namespace raft::neighbors::cagra::detail::single_cta_search { + +#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATASET_DESC_T, SAMPLE_FILTER_T) \ + template void select_and_run( \ + DATASET_DESC_T dataset_desc, \ + raft::device_matrix_view graph, \ + typename DATASET_DESC_T::INDEX_T* const topk_indices_ptr, \ + typename DATASET_DESC_T::DISTANCE_T* const topk_distances_ptr, \ + const typename DATASET_DESC_T::DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +#define COMMA , + +} // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index 1515f43134..0e809e4dc3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -header = """ -/* +header = """/* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -38,46 +37,14 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ - template void \\ - select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t num_itopk_candidates, \\ - uint32_t block_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - size_t small_hash_bitlen, \\ - size_t small_hash_reset_interval, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - SAMPLE_FILTER_T sample_filter, \\ - cudaStream_t stream); +#include +namespace raft::neighbors::cagra::detail::single_cta_search { """ trailer = """ -#undef instantiate_single_cta_search_kernel - } // namespace raft::neighbors::cagra::detail::single_cta_search """ @@ -107,7 +74,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, raft::neighbors::cagra::detail::standard_dataset_descriptor_t<{data_t} COMMA {idx_t} COMMA {distance_t}>, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index b8c23103ba..8a9fc408ee 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index 8ab1897119..c6f7c90c69 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 9fd36b4cb9..2766286673 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index a9ee2c864b..98ee189766 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index dadc574b65..c3ea39a729 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index 30e043f47e..a53457656c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index 089e4c930f..52318efb85 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 3e8ffb8bf8..6451fdc7f3 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu index 29e7bfa250..e927fd0878 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu index a004f900d0..3f3d22ee08 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu index 549849b21d..a84e5b8bd7 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu index 3825f572f7..af4248865b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, half, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu index 31d83f443b..16bd0cb647 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim1024_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu index 3493ab294c..afc59c8a59 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim128_t8.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu index 6e09709994..147d31cf85 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim256_t16.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu index 4bc0158f7e..5624a71c3c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_half_uint64_dim512_t32.cu @@ -1,4 +1,3 @@ - /* * Copyright (c) 2023-2024, NVIDIA CORPORATION. * @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, half, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index 279587738e..761fb705ba 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index ef127d3f7d..84b76cba53 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 7fcfdcc28e..598fff9cdf 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index a6c606d99b..e7a1a9d9c6 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 0b8be56614..d40b9285fc 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 1024, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index 4c193b9408..073bb350da 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 8, + 128, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index bdf16d2f03..29b0224b4d 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 16, + 256, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index 93624df4aa..d9601de2ad 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -1,6 +1,5 @@ - /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,44 +23,15 @@ * */ -#include -#include - -namespace raft::neighbors::cagra::detail::single_cta_search { +#include "search_single_cta.cuh" -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ - cudaStream_t stream); +#include -instantiate_single_cta_select_and_run( - 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); - -#undef instantiate_single_cta_search_kernel +namespace raft::neighbors::cagra::detail::single_cta_search { +instantiate_kernel_selection( + 32, + 512, + raft::neighbors::cagra::detail::standard_dataset_descriptor_t, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace raft::neighbors::cagra::detail::single_cta_search diff --git a/cpp/src/neighbors/detail/refine_host_float_float.cpp b/cpp/src/neighbors/detail/refine_host_float_float.cpp index c596200c0a..09dcae9c3a 100644 --- a/cpp/src/neighbors/detail/refine_host_float_float.cpp +++ b/cpp/src/neighbors/detail/refine_host_float_float.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,5 +25,6 @@ distance::DistanceType metric); instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine(uint32_t, float, float, int64_t); #undef instantiate_raft_neighbors_refine diff --git a/cpp/src/neighbors/refine_float_float.cu b/cpp/src/neighbors/refine_float_float.cu index ea6892d2c5..75851eeedb 100644 --- a/cpp/src/neighbors/refine_float_float.cu +++ b/cpp/src/neighbors/refine_float_float.cu @@ -1,6 +1,6 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,7 @@ #include -#define instantiate_raft_neighbors_refine(idx_t, data_t, distance_t, matrix_idx) \ +#define instantiate_raft_neighbors_refine_d(idx_t, data_t, distance_t, matrix_idx) \ template void raft::neighbors::refine( \ raft::resources const& handle, \ raft::device_matrix_view dataset, \ @@ -34,17 +34,21 @@ raft::device_matrix_view neighbor_candidates, \ raft::device_matrix_view indices, \ raft::device_matrix_view distances, \ - raft::distance::DistanceType metric); \ - \ - template void raft::neighbors::refine( \ - raft::resources const& handle, \ - raft::host_matrix_view dataset, \ - raft::host_matrix_view queries, \ - raft::host_matrix_view neighbor_candidates, \ - raft::host_matrix_view indices, \ - raft::host_matrix_view distances, \ raft::distance::DistanceType metric); -instantiate_raft_neighbors_refine(int64_t, float, float, int64_t); +#define instantiate_raft_neighbors_refine_h(idx_t, data_t, distance_t, matrix_idx) \ + template void raft::neighbors::refine( \ + raft::resources const& handle, \ + raft::host_matrix_view dataset, \ + raft::host_matrix_view queries, \ + raft::host_matrix_view neighbor_candidates, \ + raft::host_matrix_view indices, \ + raft::host_matrix_view distances, \ + raft::distance::DistanceType metric); + +instantiate_raft_neighbors_refine_d(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(int64_t, float, float, int64_t); +instantiate_raft_neighbors_refine_h(uint32_t, float, float, int64_t); -#undef instantiate_raft_neighbors_refine +#undef instantiate_raft_neighbors_refine_d +#undef instantiate_raft_neighbors_refine_h diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index ecb871fccc..20ed3bacc7 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -372,6 +372,8 @@ if(BUILD_TESTS) test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu test/neighbors/ann_cagra/test_float_int64_t.cu test/neighbors/ann_cagra/test_half_int64_t.cu + test/neighbors/ann_cagra_vpq/test_float_int64_t.cu + test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index 175e4ef483..5cca6d561a 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,87 +21,133 @@ namespace raft::neighbors::cagra::detail { namespace multi_cta_search { -#define instantiate_kernel_selection( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_kernel_selection( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_kernel_selection( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 32, + 1024, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 8, + 128, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 16, + 256, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection(standard_dataset_descriptor_t, + 32, + 512, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection } // namespace multi_cta_search namespace single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ - extern template void \ - select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - SAMPLE_FILTER_T sample_filter, \ +#define instantiate_single_cta_select_and_run( \ + DATASET_DESCRIPTOR, TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run, \ + SAMPLE_FILTER_T>( \ + raft::neighbors::cagra::detail::DATASET_DESCRIPTOR dataset_desc, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run( - 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); -instantiate_single_cta_select_and_run( - 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 32, + 1024, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 8, + 128, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 16, + 256, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run(standard_dataset_descriptor_t, + 32, + 512, + float, + uint64_t, + float, + raft::neighbors::filtering::none_cagra_sample_filter); } // namespace single_cta_search -} // namespace raft::neighbors::cagra::detail \ No newline at end of file +} // namespace raft::neighbors::cagra::detail diff --git a/cpp/test/neighbors/ann_cagra_vpq.cuh b/cpp/test/neighbors/ann_cagra_vpq.cuh new file mode 100755 index 0000000000..503b1a413a --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq.cuh @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "../test_utils.cuh" +#include "ann_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace { +template +void GenerateDataset(T* const dataset_ptr, + T* const query_ptr, + const std::size_t dataset_size, + const std::size_t query_size, + const std::size_t dim, + const std::size_t num_centers, + cudaStream_t cuda_stream) +{ + auto center_list = raft::make_host_matrix(num_centers, dim); + auto host_dataset = raft::make_host_matrix(std::max(dataset_size, query_size), dim); + + std::normal_distribution dist(0, 1); + std::mt19937 mt(0); + for (std::size_t i = 0; i < center_list.size(); i++) { + center_list.data_handle()[i] = dist(mt); + } + + std::uniform_int_distribution i_dist(0, num_centers - 1); + for (std::size_t i = 0; i < dataset_size; i++) { + const auto center_index = i_dist(mt); + for (std::size_t j = 0; j < dim; j++) { + host_dataset.data_handle()[i * dim + j] = + center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; + } + } + raft::copy(dataset_ptr, host_dataset.data_handle(), dataset_size * dim, cuda_stream); + + for (std::size_t i = 0; i < query_size; i++) { + const auto center_index = i_dist(mt); + for (std::size_t j = 0; j < dim; j++) { + host_dataset.data_handle()[i * dim + j] = + center_list.data_handle()[center_index + j] + dist(mt) * 1e-1; + } + } + raft::copy(query_ptr, host_dataset.data_handle(), query_size * dim, cuda_stream); +} +} // namespace + +namespace raft::neighbors::cagra { +struct AnnCagraVpqInputs { + int n_queries; + int n_rows; + int dim; + int k; + int pq_len; + int pq_bits; + graph_build_algo build_algo; + search_algo algo; + int max_queries; + int team_size; + int itopk_size; + int search_width; + raft::distance::DistanceType metric; + bool host_dataset; + bool include_serialized_dataset; + // std::optional + double min_recall; // = std::nullopt; +}; + +inline ::std::ostream& operator<<(::std::ostream& os, const AnnCagraVpqInputs& p) +{ + std::vector algo = {"single-cta", "multi_cta", "multi_kernel", "auto"}; + std::vector build_algo = {"IVF_PQ", "NN_DESCENT"}; + os << "{n_queries=" << p.n_queries << ", dataset shape=" << p.n_rows << "x" << p.dim + << ", k=" << p.k << ", pq_bits=" << p.pq_bits << ", pq_len=" << p.pq_len << ", " + << algo.at((int)p.algo) << ", max_queries=" << p.max_queries << ", itopk_size=" << p.itopk_size + << ", search_width=" << p.search_width << ", metric=" << static_cast(p.metric) + << (p.host_dataset ? ", host" : ", device") + << ", build_algo=" << build_algo.at((int)p.build_algo) << '}' << std::endl; + return os; +} + +template +class AnnCagraVpqTest : public ::testing::TestWithParam { + public: + AnnCagraVpqTest() + : stream_(resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + protected: + void testCagra() + { + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database.data(), + ps.n_queries, + ps.n_rows, + ps.dim, + ps.k, + ps.metric); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + const auto vpq_k = ps.k * 16; + { + rmm::device_uvector distances_dev(vpq_k * ps.n_queries, stream_); + rmm::device_uvector indices_dev(vpq_k * ps.n_queries, stream_); + + { + if ((ps.dim % ps.pq_len) != 0) { + // TODO: remove this requirement in the algorithm. + GTEST_SKIP() << "(TODO) At the moment dim, (" << ps.dim + << ") must be a multiple of pq_len (" << ps.pq_len << ")"; + } + cagra::index_params index_params; + index_params.compression = vpq_params{.pq_bits = static_cast(ps.pq_bits), + .pq_dim = static_cast(ps.dim / ps.pq_len)}; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + index_params.build_algo = ps.build_algo; + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.itopk_size = ps.itopk_size; + + auto database_view = + raft::make_device_matrix_view(database.data(), ps.n_rows, ps.dim); + + { + cagra::index index(handle_); + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + database_host.data_handle(), ps.n_rows, ps.dim); + index = cagra::build(handle_, index_params, database_host_view); + } else { + index = cagra::build(handle_, index_params, database_view); + }; + cagra::serialize(handle_, "cagra_index", index, ps.include_serialized_dataset); + } + + auto index = cagra::deserialize(handle_, "cagra_index"); + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + + // CAGRA-Q sanity check: we've built the right index type + auto* vpq_dataset = + dynamic_cast*>(&index.data()); + EXPECT_NE(vpq_dataset, nullptr) + << "Expected VPQ dataset, because we're testing CAGRA-Q here."; + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, vpq_k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, vpq_k); + + cagra::search( + handle_, search_params, index, search_queries_view, indices_out_view, dists_out_view); + + { + auto host_dataset = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy( + host_dataset.data_handle(), (const DataT*)database.data(), ps.n_rows * ps.dim, stream_); + + auto host_queries = raft::make_host_matrix(ps.n_queries, ps.dim); + raft::copy(host_queries.data_handle(), + (const DataT*)search_queries_view.data_handle(), + ps.n_queries * ps.dim, + stream_); + + auto host_index_candidate = raft::make_host_matrix(ps.n_queries, vpq_k); + raft::copy(host_index_candidate.data_handle(), + indices_out_view.data_handle(), + ps.n_queries * vpq_k, + stream_); + + auto host_indices_Cagra_view = + raft::make_host_matrix_view(indices_Cagra.data(), ps.n_queries, ps.k); + + auto host_dists_Cagra_view = + raft::make_host_matrix_view(distances_Cagra.data(), ps.n_queries, ps.k); + + resource::sync_stream(handle_); + + raft::neighbors::refine(handle_, + raft::make_const_mdspan(host_dataset.view()), + raft::make_const_mdspan(host_queries.view()), + raft::make_const_mdspan(host_index_candidate.view()), + host_indices_Cagra_view, + host_dists_Cagra_view, + ps.metric); + + raft::copy(indices_dev.data(), + host_indices_Cagra_view.data_handle(), + ps.k * ps.n_queries, + stream_); + raft::copy(distances_dev.data(), + host_dists_Cagra_view.data_handle(), + ps.k * ps.n_queries, + stream_); + resource::sync_stream(handle_); + } + } + + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.003, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + search_queries.resize(ps.n_queries * ps.dim, stream_); + GenerateDataset(database.data(), + search_queries.data(), + ps.n_rows, + ps.n_queries, + ps.dim, + static_cast(std::sqrt(ps.n_rows)), + stream_); + resource::sync_stream(handle_); + } + + void TearDown() override + { + resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnCagraVpqInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + +const std::vector vpq_inputs = raft::util::itertools::product( + {100}, // n_queries + {1000, 10000}, // n_rows + {128, 132, 192, 256, 512, 768}, // dim + {8, 12}, // k + {2}, // pq_len + {8}, // pq_bits + {graph_build_algo::NN_DESCENT}, // build_algo + {search_algo::SINGLE_CTA, search_algo::MULTI_CTA}, // algo + {0}, // max_queries + {0}, // team_size + {512}, // itopk_size + {1}, // search_width + {raft::distance::DistanceType::L2Expanded}, // metric + {false}, // host_dataset + {true}, // include_serialized_dataset + {0.8} // min_recall +); + +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu new file mode 100644 index 0000000000..f60edb5ed6 --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_int64_t.cu @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY +#include "../ann_cagra_vpq.cuh" + +#include + +namespace raft::neighbors::cagra { + +typedef AnnCagraVpqTest AnnCagraVpqTestF_I64; +TEST_P(AnnCagraVpqTestF_I64, AnnCagraVpq) { this->testCagra(); } + +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_I64, ::testing::ValuesIn(vpq_inputs)); + +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu new file mode 100644 index 0000000000..19d3f32250 --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_vpq/test_float_uint32_t.cu @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../ann_cagra_vpq.cuh" + +#include + +namespace raft::neighbors::cagra { + +typedef AnnCagraVpqTest AnnCagraVpqTestF_U32; +TEST_P(AnnCagraVpqTestF_U32, AnnCagraVpq) { this->testCagra(); } + +INSTANTIATE_TEST_CASE_P(AnnCagraVpqTest, AnnCagraVpqTestF_U32, ::testing::ValuesIn(vpq_inputs)); + +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_utils.cuh b/cpp/test/neighbors/ann_utils.cuh index 6be2ac7fc7..3e0bead665 100644 --- a/cpp/test/neighbors/ann_utils.cuh +++ b/cpp/test/neighbors/ann_utils.cuh @@ -300,7 +300,7 @@ auto eval_distances(raft::resources const& handle, raft::matrix::copy_rows( handle, - make_device_matrix_view(x, k, n_cols), + make_device_matrix_view(x, n_rows, n_cols), y.view(), make_device_vector_view(neighbors + i * k, k)); From b7734949c8b6ae00a6ccc726289fa2641b1d30c3 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 21 Mar 2024 07:53:50 +0100 Subject: [PATCH 4/7] Add CAGRA-Q to ANN benchmarks (#2233) Add the relevant options to the CAGRA parameter parser and refinement to the CAGRA ANN benchmark. No changes to the library code. NB: the new option won't work correctly until https://github.com/rapidsai/raft/pull/2206 is merged. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/2233 --- .../src/raft/raft_ann_bench_param_parser.h | 23 +++++ cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 83 ++++++++++++++++++- 2 files changed, 103 insertions(+), 3 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 2339677340..48bf1d70d8 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -43,6 +43,7 @@ extern template class raft::bench::ann::RaftIvfPQ; #endif #ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA extern template class raft::bench::ann::RaftCagra; +extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; #endif @@ -149,6 +150,20 @@ void parse_build_param(const nlohmann::json& conf, } } +inline void parse_build_param(const nlohmann::json& conf, raft::neighbors::vpq_params& param) +{ + if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); } + if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); } + if (conf.contains("vq_n_centers")) { param.vq_n_centers = conf.at("vq_n_centers"); } + if (conf.contains("kmeans_n_iters")) { param.kmeans_n_iters = conf.at("kmeans_n_iters"); } + if (conf.contains("vq_kmeans_trainset_fraction")) { + param.vq_kmeans_trainset_fraction = conf.at("vq_kmeans_trainset_fraction"); + } + if (conf.contains("pq_kmeans_trainset_fraction")) { + param.pq_kmeans_trainset_fraction = conf.at("pq_kmeans_trainset_fraction"); + } +} + nlohmann::json collect_conf_with_prefix(const nlohmann::json& conf, const std::string& prefix, bool remove_prefix = true) @@ -204,6 +219,12 @@ void parse_build_param(const nlohmann::json& conf, } param.nn_descent_params = nn_param; } + nlohmann::json comp_search_conf = collect_conf_with_prefix(conf, "compression_"); + if (!comp_search_conf.empty()) { + raft::neighbors::vpq_params vpq_pams; + parse_build_param(comp_search_conf, vpq_pams); + param.cagra_params.compression.emplace(vpq_pams); + } } raft::bench::ann::AllocatorType parse_allocator(std::string mem_type) @@ -248,5 +269,7 @@ void parse_search_param(const nlohmann::json& conf, if (conf.contains("internal_dataset_memory_type")) { param.dataset_mem = parse_allocator(conf.at("internal_dataset_memory_type")); } + // Same ratio as in IVF-PQ + param.refine_ratio = conf.value("refine_ratio", 1.0f); } #endif diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 25f7f93777..70fd22001e 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,7 @@ class RaftCagra : public ANN, public AnnGPU { struct SearchParam : public AnnSearchParam { raft::neighbors::experimental::cagra::search_params p; + float refine_ratio; AllocatorType graph_mem = AllocatorType::Device; AllocatorType dataset_mem = AllocatorType::Device; auto needs_dataset() const -> bool override { return true; } @@ -98,6 +100,8 @@ class RaftCagra : public ANN, public AnnGPU { // will be filled with (size_t)-1 void search( const T* queries, int batch_size, int k, size_t* neighbors, float* distances) const override; + void search_base( + const T* queries, int batch_size, int k, size_t* neighbors, float* distances) const; [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override { @@ -124,6 +128,7 @@ class RaftCagra : public ANN, public AnnGPU { raft::mr::cuda_huge_page_resource mr_huge_page_; AllocatorType graph_mem_; AllocatorType dataset_mem_; + float refine_ratio_; BuildParam index_params_; bool need_dataset_update_; raft::neighbors::cagra::search_params search_params_; @@ -151,6 +156,9 @@ void RaftCagra::build(const T* dataset, size_t nrow) auto& params = index_params_.cagra_params; + // Do include the compressed dataset for the CAGRA-Q + bool shall_include_dataset = params.compression.has_value(); + index_ = std::make_shared>( std::move(raft::neighbors::cagra::detail::build(handle_, params, @@ -159,7 +167,7 @@ void RaftCagra::build(const T* dataset, size_t nrow) index_params_.ivf_pq_refine_rate, index_params_.ivf_pq_build_params, index_params_.ivf_pq_search_params, - false))); + shall_include_dataset))); } inline std::string allocator_to_string(AllocatorType mem_type) @@ -179,6 +187,7 @@ void RaftCagra::set_search_param(const AnnSearchParam& param) { auto search_param = dynamic_cast(param); search_params_ = search_param.p; + refine_ratio_ = search_param.refine_ratio; if (search_param.graph_mem != graph_mem_) { // Move graph to correct memory space graph_mem_ = search_param.graph_mem; @@ -223,12 +232,16 @@ void RaftCagra::set_search_param(const AnnSearchParam& param) template void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) { + using ds_idx_type = decltype(index_->data().n_rows()); + bool is_vpq = + dynamic_cast*>(&index_->data()) || + dynamic_cast*>(&index_->data()); // It can happen that we are re-using a previous algo object which already has // the dataset set. Check if we need update. if (static_cast(input_dataset_v_->extent(0)) != nrow || input_dataset_v_->data_handle() != dataset) { *input_dataset_v_ = make_device_matrix_view(dataset, nrow, this->dim_); - need_dataset_update_ = true; + need_dataset_update_ = !is_vpq; // ignore update if this is a VPQ dataset. } } @@ -258,7 +271,7 @@ std::unique_ptr> RaftCagra::copy() } template -void RaftCagra::search( +void RaftCagra::search_base( const T* queries, int batch_size, int k, size_t* neighbors, float* distances) const { IdxT* neighbors_IdxT; @@ -286,4 +299,68 @@ void RaftCagra::search( raft::resource::get_cuda_stream(handle_)); } } + +template +void RaftCagra::search( + const T* queries, int batch_size, int k, size_t* neighbors, float* distances) const +{ + auto k0 = static_cast(refine_ratio_ * k); + const bool disable_refinement = k0 <= static_cast(k); + const raft::resources& res = handle_; + auto stream = resource::get_cuda_stream(res); + + if (disable_refinement) { + search_base(queries, batch_size, k, neighbors, distances); + } else { + auto candidate_ixs = raft::make_device_matrix(res, batch_size, k0); + auto candidate_dists = raft::make_device_matrix(res, batch_size, k0); + search_base(queries, + batch_size, + k0, + reinterpret_cast(candidate_ixs.data_handle()), + candidate_dists.data_handle()); + + if (raft::get_device_for_address(input_dataset_v_->data_handle()) >= 0) { + auto queries_v = + raft::make_device_matrix_view(queries, batch_size, dimension_); + auto neighours_v = raft::make_device_matrix_view( + reinterpret_cast(neighbors), batch_size, k); + auto distances_v = raft::make_device_matrix_view(distances, batch_size, k); + raft::neighbors::refine( + res, + *input_dataset_v_, + queries_v, + raft::make_const_mdspan(candidate_ixs.view()), + neighours_v, + distances_v, + index_->metric()); + } else { + auto dataset_host = raft::make_host_matrix_view( + input_dataset_v_->data_handle(), input_dataset_v_->extent(0), input_dataset_v_->extent(1)); + auto queries_host = raft::make_host_matrix(batch_size, dimension_); + auto candidates_host = raft::make_host_matrix(batch_size, k0); + auto neighbors_host = raft::make_host_matrix(batch_size, k); + auto distances_host = raft::make_host_matrix(batch_size, k); + + raft::copy(queries_host.data_handle(), queries, queries_host.size(), stream); + raft::copy( + candidates_host.data_handle(), candidate_ixs.data_handle(), candidates_host.size(), stream); + + raft::resource::sync_stream(res); // wait for the queries and candidates + raft::neighbors::refine(res, + dataset_host, + queries_host.view(), + candidates_host.view(), + neighbors_host.view(), + distances_host.view(), + index_->metric()); + + raft::copy(neighbors, + reinterpret_cast(neighbors_host.data_handle()), + neighbors_host.size(), + stream); + raft::copy(distances, distances_host.data_handle(), distances_host.size(), stream); + } + } +} } // namespace raft::bench::ann From fd91efb22a64fa1f75bd83d024e4c1ffed3702f0 Mon Sep 17 00:00:00 2001 From: rhdong Date: Thu, 21 Mar 2024 05:48:55 -0700 Subject: [PATCH 5/7] [FEA] Add support for bitmap_view & the API of `bitmap_to_csr` (#2109) - This PR is one part of the feature of #1969 Authors: - James Rong (https://github.com/rhdong) Approvers: - Ben Frederickson (https://github.com/benfred) - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) Authors: - rhdong (https://github.com/rhdong) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Micka (https://github.com/lowener) URL: https://github.com/rapidsai/raft/pull/2109 --- cpp/bench/prims/CMakeLists.txt | 9 +- cpp/bench/prims/sparse/bitmap_to_csr.cu | 156 +++++++++ cpp/include/raft/core/bitmap.cuh | 127 ++++++++ cpp/include/raft/sparse/convert/csr.cuh | 29 +- .../sparse/convert/detail/bitmap_to_csr.cuh | 300 ++++++++++++++++++ cpp/test/sparse/convert_csr.cu | 243 ++++++++++++++ docs/source/cpp_api/core.rst | 3 +- docs/source/cpp_api/core_bitmap.rst | 15 + 8 files changed, 879 insertions(+), 3 deletions(-) create mode 100644 cpp/bench/prims/sparse/bitmap_to_csr.cu create mode 100644 cpp/include/raft/core/bitmap.cuh create mode 100644 cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh create mode 100644 docs/source/cpp_api/core_bitmap.rst diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index 95361e19ca..9f23c44a5c 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -131,7 +131,14 @@ if(BUILD_PRIMS_BENCH) bench/prims/random/rng.cu bench/prims/random/subsample.cu bench/prims/main.cpp ) - ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) + ConfigureBench( + NAME + SPARSE_BENCH + PATH + bench/prims/sparse/bitmap_to_csr.cu + bench/prims/sparse/convert_csr.cu + bench/prims/main.cpp + ) ConfigureBench( NAME diff --git a/cpp/bench/prims/sparse/bitmap_to_csr.cu b/cpp/bench/prims/sparse/bitmap_to_csr.cu new file mode 100644 index 0000000000..ed53df3265 --- /dev/null +++ b/cpp/bench/prims/sparse/bitmap_to_csr.cu @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace raft::bench::sparse { + +template +struct bench_param { + index_t n_rows; + index_t n_cols; + float sparsity; +}; + +template +inline auto operator<<(std::ostream& os, const bench_param& params) -> std::ostream& +{ + os << " rows*cols=" << params.n_rows << "*" << params.n_cols << "\tsparsity=" << params.sparsity; + return os; +} + +template +struct BitmapToCsrBench : public fixture { + BitmapToCsrBench(const bench_param& p) + : fixture(true), + params(p), + handle(stream), + bitmap_d(0, stream), + nnz(0), + indptr_d(0, stream), + indices_d(0, stream), + values_d(0, stream) + { + index_t element = raft::ceildiv(params.n_rows * params.n_cols, index_t(sizeof(bitmap_t) * 8)); + std::vector bitmap_h(element); + nnz = create_sparse_matrix(params.n_rows, params.n_cols, params.sparsity, bitmap_h); + + bitmap_d.resize(bitmap_h.size(), stream); + indptr_d.resize(params.n_rows + 1, stream); + indices_d.resize(nnz, stream); + values_d.resize(nnz, stream); + + update_device(bitmap_d.data(), bitmap_h.data(), bitmap_h.size(), stream); + + resource::sync_stream(handle); + } + + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) + { + index_t total = static_cast(m * n); + index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t res = num_ones; + + for (auto& item : bitmap) { + item = static_cast(0); + } + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, total - 1); + + while (num_ones > 0) { + index_t index = dis(gen); + + bitmap_t& element = bitmap[index / (8 * sizeof(bitmap_t))]; + index_t bit_position = index % (8 * sizeof(bitmap_t)); + + if (((element >> bit_position) & 1) == 0) { + element |= (static_cast(1) << bit_position); + num_ones--; + } + } + return res; + } + + void run_benchmark(::benchmark::State& state) override + { + std::ostringstream label_stream; + label_stream << params; + state.SetLabel(label_stream.str()); + + auto bitmap = + raft::core::bitmap_view(bitmap_d.data(), params.n_rows, params.n_cols); + + auto csr_view = raft::make_device_compressed_structure_view( + indptr_d.data(), indices_d.data(), params.n_rows, params.n_cols, nnz); + auto csr = raft::make_device_csr_matrix(handle, csr_view); + + raft::sparse::convert::bitmap_to_csr(handle, bitmap, csr); + + resource::sync_stream(handle); + loop_on_state(state, [this, &bitmap, &csr]() { + raft::sparse::convert::bitmap_to_csr(handle, bitmap, csr); + }); + } + + protected: + const raft::device_resources handle; + + bench_param params; + + rmm::device_uvector bitmap_d; + rmm::device_uvector indptr_d; + rmm::device_uvector indices_d; + rmm::device_uvector values_d; + + index_t nnz; +}; // struct BitmapToCsrBench + +template +const std::vector> getInputs() +{ + std::vector> param_vec; + struct TestParams { + index_t m; + index_t n; + float sparsity; + }; + + const std::vector params_group = raft::util::itertools::product( + {index_t(10), index_t(1024)}, {index_t(1024 * 1024)}, {0.01f, 0.1f, 0.2f, 0.5f}); + + param_vec.reserve(params_group.size()); + for (TestParams params : params_group) { + param_vec.push_back(bench_param({params.m, params.n, params.sparsity})); + } + return param_vec; +} + +RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getInputs()); +RAFT_BENCH_REGISTER((BitmapToCsrBench), "", getInputs()); + +} // namespace raft::bench::sparse diff --git a/cpp/include/raft/core/bitmap.cuh b/cpp/include/raft/core/bitmap.cuh new file mode 100644 index 0000000000..829c84ed25 --- /dev/null +++ b/cpp/include/raft/core/bitmap.cuh @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace raft::core { +/** + * @defgroup bitmap Bitmap + * @{ + */ +/** + * @brief View of a RAFT Bitmap. + * + * This lightweight structure which represents and manipulates a two-dimensional bitmap matrix view + * with row major order. This class provides functionality for handling a matrix where each element + * is represented as a bit in a bitmap. + * + * @tparam bitmap_t Underlying type of the bitmap array. Default is uint32_t. + * @tparam index_t Indexing type used. Default is uint32_t. + */ +template +struct bitmap_view : public bitset_view { + static_assert((std::is_same::value || + std::is_same::value), + "The bitmap_t must be uint32_t or uint64_t."); + /** + * @brief Create a bitmap view from a device raw pointer. + * + * @param bitmap_ptr Device raw pointer + * @param rows Number of row in the matrix. + * @param cols Number of col in the matrix. + */ + _RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr, index_t rows, index_t cols) + : bitset_view(bitmap_ptr, rows * cols), rows_(rows), cols_(cols) + { + } + + /** + * @brief Create a bitmap view from a device vector view of the bitset. + * + * @param bitmap_span Device vector view of the bitmap + * @param rows Number of row in the matrix. + * @param cols Number of col in the matrix. + */ + _RAFT_HOST_DEVICE bitmap_view(raft::device_vector_view bitmap_span, + index_t rows, + index_t cols) + : bitset_view(bitmap_span, rows * cols), rows_(rows), cols_(cols) + { + } + + private: + // Hide the constructors of bitset_view. + _RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr, index_t bitmap_len) + : bitset_view(bitmap_ptr, bitmap_len) + { + } + + _RAFT_HOST_DEVICE bitmap_view(raft::device_vector_view bitmap_span, + index_t bitmap_len) + : bitset_view(bitmap_span, bitmap_len) + { + } + + public: + /** + * @brief Device function to test if a given row and col are set in the bitmap. + * + * @param row Row index of the bit to test + * @param col Col index of the bit to test + * @return bool True if index has not been unset in the bitset + */ + inline _RAFT_DEVICE auto test(const index_t row, const index_t col) const -> bool + { + return test(row * cols_ + col); + } + + /** + * @brief Device function to set a given row and col to set_value in the bitset. + * + * @param row Row index of the bit to set + * @param col Col index of the bit to set + * @param new_value Value to set the bit to (true or false) + */ + inline _RAFT_DEVICE void set(const index_t row, const index_t col, bool new_value) const + { + set(row * cols_ + col, &new_value); + } + + /** + * @brief Get the total number of rows + * @return index_t The total number of rows + */ + inline _RAFT_HOST_DEVICE index_t get_n_rows() const { return rows_; } + + /** + * @brief Get the total number of columns + * @return index_t The total number of columns + */ + inline _RAFT_HOST_DEVICE index_t get_n_cols() const { return cols_; } + + private: + index_t rows_; + index_t cols_; +}; + +/** @} */ +} // end namespace raft::core diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 999e64cb0b..081192ed44 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,10 @@ #pragma once +#include +#include #include +#include #include #include @@ -102,6 +105,30 @@ void adj_to_csr(raft::resources const& handle, detail::adj_to_csr(handle, adj, row_ind, num_rows, num_cols, tmp, out_col_ind); } +/** + * @brief Converts a bitmap matrix to a Compressed Sparse Row (CSR) format matrix. + * + * @tparam bitmap_t The data type of the elements in the bitmap matrix. + * @tparam index_t The data type used for indexing the elements in the matrices. + * @tparam csr_matrix_t Specifies the CSR matrix type, constrained to + * raft::device_csr_matrix. + * + * @param[in] handle The RAFT handle containing the CUDA stream for operations. + * @param[in] bitmap The bitmap matrix view, to be converted to CSR format. + * @param[out] csr Output parameter where the resulting CSR matrix is stored. In the + * bitmap, each '1' bit corresponds to a non-zero element in the CSR matrix. + */ +template >> +void bitmap_to_csr(raft::resources const& handle, + raft::core::bitmap_view bitmap, + csr_matrix_t& csr) +{ + detail::bitmap_to_csr(handle, bitmap, csr); +} + }; // end NAMESPACE convert }; // end NAMESPACE sparse }; // end NAMESPACE raft diff --git a/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh new file mode 100644 index 0000000000..b0315486ff --- /dev/null +++ b/cpp/include/raft/sparse/convert/detail/bitmap_to_csr.cuh @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // detail::popc +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cg = cooperative_groups; + +namespace raft { +namespace sparse { +namespace convert { +namespace detail { + +// Threads per block in calc_nnz_by_rows_kernel. +static const constexpr int calc_nnz_by_rows_tpb = 32; + +template +RAFT_KERNEL __launch_bounds__(calc_nnz_by_rows_tpb) calc_nnz_by_rows_kernel(const bitmap_t* bitmap, + index_t num_rows, + index_t num_cols, + index_t bitmap_num, + nnz_t* nnz_per_row) +{ + constexpr bitmap_t FULL_MASK = ~bitmap_t(0u); + constexpr bitmap_t ONE = bitmap_t(1u); + constexpr index_t BITS_PER_BITMAP = sizeof(bitmap_t) * 8; + + auto block = cg::this_thread_block(); + auto tile = cg::tiled_partition<32>(block); + + int lane_id = threadIdx.x & 0x1f; + + for (index_t row = blockIdx.x; row < num_rows; row += gridDim.x) { + index_t offset = 0; + index_t s_bit = row * num_cols; + index_t e_bit = s_bit + num_cols; + index_t l_sum = 0; + + while (offset < num_cols) { + index_t bitmap_idx = lane_id + (s_bit + offset) / BITS_PER_BITMAP; + bitmap_t l_bitmap = bitmap_t(0); + + if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + + if (s_bit > bitmap_idx * BITS_PER_BITMAP) { + l_bitmap >>= (s_bit - bitmap_idx * BITS_PER_BITMAP); + l_bitmap <<= (s_bit - bitmap_idx * BITS_PER_BITMAP); + } + + if ((bitmap_idx + 1) * BITS_PER_BITMAP > e_bit) { + l_bitmap <<= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); + l_bitmap >>= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); + } + + l_sum += static_cast(raft::detail::popc(l_bitmap)); + offset += BITS_PER_BITMAP * warpSize; + } + + l_sum = cg::reduce(tile, l_sum, cg::plus()); + + if (lane_id == 0) { *(nnz_per_row + row) += static_cast(l_sum); } + } +} + +template +void calc_nnz_by_rows(raft::resources const& handle, + const bitmap_t* bitmap, + index_t num_rows, + index_t num_cols, + nnz_t* nnz_per_row) +{ + auto stream = resource::get_cuda_stream(handle); + const index_t total = num_rows * num_cols; + const index_t bitmap_num = raft::ceildiv(total, index_t(sizeof(bitmap_t) * 8)); + + int dev_id, sm_count, blocks_per_sm; + + cudaGetDevice(&dev_id); + cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &blocks_per_sm, calc_nnz_by_rows_kernel, calc_nnz_by_rows_tpb, 0); + + index_t max_active_blocks = sm_count * blocks_per_sm; + auto grid = std::min(max_active_blocks, raft::ceildiv(bitmap_num, index_t(calc_nnz_by_rows_tpb))); + auto block = calc_nnz_by_rows_tpb; + + calc_nnz_by_rows_kernel + <<>>(bitmap, num_rows, num_cols, bitmap_num, nnz_per_row); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +/* + Execute the exclusive_scan within one warp with no inter-warp communication. + This function calculates the exclusive prefix sum of `value` across threads within the same warp. + Each thread in the warp will end up with the sum of all the values of the threads with lower IDs + in the same warp, with the first thread always getting a sum of 0. +*/ +template +RAFT_DEVICE_INLINE_FUNCTION value_t warp_exclusive_scan(value_t value) +{ + int lane_id = threadIdx.x & 0x1f; + value_t shifted_value = __shfl_up_sync(0xffffffff, value, 1, warpSize); + if (lane_id == 0) shifted_value = 0; + + value_t sum = shifted_value; + + for (int i = 1; i < warpSize; i *= 2) { + value_t n = __shfl_up_sync(0xffffffff, sum, i, warpSize); + if (lane_id >= i) { sum += n; } + } + return sum; +} + +// Threads per block in fill_indices_by_rows_kernel. +static const constexpr int fill_indices_by_rows_tpb = 32; + +template +RAFT_KERNEL __launch_bounds__(fill_indices_by_rows_tpb) + fill_indices_by_rows_kernel(const bitmap_t* bitmap, + const index_t* indptr, + index_t num_rows, + index_t num_cols, + nnz_t nnz, + index_t bitmap_num, + index_t* indices) +{ + constexpr bitmap_t FULL_MASK = ~bitmap_t(0u); + constexpr bitmap_t ONE = bitmap_t(1u); + constexpr index_t BITS_PER_BITMAP = sizeof(bitmap_t) * 8; + + int lane_id = threadIdx.x & 0x1f; + + // Ensure the HBM allocated for CSR values is sufficient to handle all non-zero bitmap bits. + // An assert will trigger if the allocated HBM is insufficient when `NDEBUG` isn't defined. + // Note: Assertion is active only if `NDEBUG` is undefined. + if constexpr (check_nnz) { + if (lane_id == 0) { assert(nnz < indptr[num_rows]); } + } + +#pragma unroll + for (index_t row = blockIdx.x; row < num_rows; row += gridDim.x) { + index_t g_sum = 0; + index_t s_bit = row * num_cols; + index_t e_bit = s_bit + num_cols; + index_t indptr_row = indptr[row]; + +#pragma unroll + for (index_t offset = 0; offset < num_cols; offset += BITS_PER_BITMAP * warpSize) { + index_t bitmap_idx = lane_id + (s_bit + offset) / BITS_PER_BITMAP; + bitmap_t l_bitmap = bitmap_t(0); + index_t l_offset = offset + lane_id * BITS_PER_BITMAP - (s_bit % BITS_PER_BITMAP); + + if (bitmap_idx * BITS_PER_BITMAP < e_bit) { l_bitmap = bitmap[bitmap_idx]; } + + if (s_bit > bitmap_idx * BITS_PER_BITMAP) { + l_bitmap >>= (s_bit - bitmap_idx * BITS_PER_BITMAP); + l_bitmap <<= (s_bit - bitmap_idx * BITS_PER_BITMAP); + } + + if ((bitmap_idx + 1) * BITS_PER_BITMAP > e_bit) { + l_bitmap <<= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); + l_bitmap >>= ((bitmap_idx + 1) * BITS_PER_BITMAP - e_bit); + } + + index_t l_sum = + g_sum + warp_exclusive_scan(static_cast(raft::detail::popc(l_bitmap))); + + for (int i = 0; i < BITS_PER_BITMAP; i++) { + if (l_bitmap & (ONE << i)) { + indices[indptr_row + l_sum] = l_offset + i; + l_sum++; + } + } + g_sum = __shfl_sync(0xffffffff, l_sum, warpSize - 1); + } + } +} + +template +void fill_indices_by_rows(raft::resources const& handle, + const bitmap_t* bitmap, + const index_t* indptr, + index_t num_rows, + index_t num_cols, + nnz_t nnz, + index_t* indices) +{ + auto stream = resource::get_cuda_stream(handle); + const index_t total = num_rows * num_cols; + const index_t bitmap_num = raft::ceildiv(total, index_t(sizeof(bitmap_t) * 8)); + + int dev_id, sm_count, blocks_per_sm; + + cudaGetDevice(&dev_id); + cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev_id); + cudaOccupancyMaxActiveBlocksPerMultiprocessor( + &blocks_per_sm, + fill_indices_by_rows_kernel, + fill_indices_by_rows_tpb, + 0); + + index_t max_active_blocks = sm_count * blocks_per_sm; + auto grid = std::min(max_active_blocks, num_rows); + auto block = fill_indices_by_rows_tpb; + + fill_indices_by_rows_kernel + <<>>(bitmap, indptr, num_rows, num_cols, nnz, bitmap_num, indices); + RAFT_CUDA_TRY(cudaPeekAtLastError()); +} + +template >> +void bitmap_to_csr(raft::resources const& handle, + raft::core::bitmap_view bitmap, + csr_matrix_t& csr) +{ + auto csr_view = csr.structure_view(); + + if (csr_view.get_n_rows() == 0 || csr_view.get_n_cols() == 0 || csr_view.get_nnz() == 0) { + return; + } + + RAFT_EXPECTS(bitmap.get_n_rows() == csr_view.get_n_rows(), + "Number of rows in bitmap must be equal to " + "number of rows in csr"); + + RAFT_EXPECTS(bitmap.get_n_cols() == csr_view.get_n_cols(), + "Number of columns in bitmap must be equal to " + "number of columns in csr"); + + auto thrust_policy = resource::get_thrust_policy(handle); + auto stream = resource::get_cuda_stream(handle); + + index_t* indptr = csr_view.get_indptr().data(); + index_t* indices = csr_view.get_indices().data(); + + RAFT_CUDA_TRY(cudaMemsetAsync(indptr, 0, (csr_view.get_n_rows() + 1) * sizeof(index_t), stream)); + + calc_nnz_by_rows(handle, bitmap.data(), csr_view.get_n_rows(), csr_view.get_n_cols(), indptr); + thrust::exclusive_scan(thrust_policy, indptr, indptr + csr_view.get_n_rows() + 1, indptr); + + if constexpr (is_device_csr_sparsity_owning_v) { + index_t nnz = 0; + RAFT_CUDA_TRY(cudaMemcpyAsync( + &nnz, indptr + csr_view.get_n_rows(), sizeof(index_t), cudaMemcpyDeviceToHost, stream)); + resource::sync_stream(handle); + csr.initialize_sparsity(nnz); + } + constexpr bool check_nnz = is_device_csr_sparsity_preserving_v; + fill_indices_by_rows( + handle, + bitmap.data(), + indptr, + csr_view.get_n_rows(), + csr_view.get_n_cols(), + csr_view.get_nnz(), + indices); + + thrust::fill_n(thrust_policy, + csr.get_elements().data(), + csr_view.get_nnz(), + typename csr_matrix_t::element_type(1)); +} + +}; // end NAMESPACE detail +}; // end NAMESPACE convert +}; // end NAMESPACE sparse +}; // end NAMESPACE raft diff --git a/cpp/test/sparse/convert_csr.cu b/cpp/test/sparse/convert_csr.cu index 4af792a9ea..1cd49b0bbd 100644 --- a/cpp/test/sparse/convert_csr.cu +++ b/cpp/test/sparse/convert_csr.cu @@ -16,6 +16,7 @@ #include "../test_utils.cuh" +#include #include #include #include @@ -218,5 +219,247 @@ INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, CSRAdjGraphTestL, ::testing::ValuesIn(csradjgraph_inputs_l)); +/******************************** bitmap to csr ********************************/ + +template +struct BitmapToCSRInputs { + index_t n_rows; + index_t n_cols; + float sparsity; + bool owning; +}; + +template +class BitmapToCSRTest : public ::testing::TestWithParam> { + public: + BitmapToCSRTest() + : stream(resource::get_cuda_stream(handle)), + params(::testing::TestWithParam>::GetParam()), + bitmap_d(0, stream), + indices_d(0, stream), + indptr_d(0, stream), + values_d(0, stream), + indptr_expected_d(0, stream), + indices_expected_d(0, stream), + values_expected_d(0, stream) + { + } + + protected: + index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) + { + index_t total = static_cast(m * n); + index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t res = num_ones; + + for (auto& item : bitmap) { + item = static_cast(0); + } + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, total - 1); + + while (num_ones > 0) { + index_t index = dis(gen); + + bitmap_t& element = bitmap[index / (8 * sizeof(bitmap_t))]; + index_t bit_position = index % (8 * sizeof(bitmap_t)); + + if (((element >> bit_position) & 1) == 0) { + element |= (static_cast(1) << bit_position); + num_ones--; + } + } + return res; + } + + void cpu_convert_to_csr(std::vector& bitmap, + index_t rows, + index_t cols, + std::vector& indices, + std::vector& indptr) + { + index_t offset_indptr = 0; + index_t offset_values = 0; + indptr[offset_indptr++] = 0; + + index_t index = 0; + bitmap_t element = 0; + index_t bit_position = 0; + + for (index_t i = 0; i < rows; ++i) { + for (index_t j = 0; j < cols; ++j) { + index = i * cols + j; + element = bitmap[index / (8 * sizeof(bitmap_t))]; + bit_position = index % (8 * sizeof(bitmap_t)); + + if (((element >> bit_position) & 1)) { + indices[offset_values] = static_cast(j); + offset_values++; + } + } + indptr[offset_indptr++] = static_cast(offset_values); + } + } + + bool csr_compare(const std::vector& row_ptrs1, + const std::vector& col_indices1, + const std::vector& row_ptrs2, + const std::vector& col_indices2) + { + if (row_ptrs1.size() != row_ptrs2.size()) { return false; } + + if (col_indices1.size() != col_indices2.size()) { return false; } + + if (!std::equal(row_ptrs1.begin(), row_ptrs1.end(), row_ptrs2.begin())) { return false; } + + for (size_t i = 0; i < row_ptrs1.size() - 1; ++i) { + size_t start_idx = row_ptrs1[i]; + size_t end_idx = row_ptrs1[i + 1]; + + std::vector cols1(col_indices1.begin() + start_idx, col_indices1.begin() + end_idx); + std::vector cols2(col_indices2.begin() + start_idx, col_indices2.begin() + end_idx); + + std::sort(cols1.begin(), cols1.end()); + std::sort(cols2.begin(), cols2.end()); + + if (cols1 != cols2) { return false; } + } + + return true; + } + + void SetUp() override + { + index_t element = raft::ceildiv(params.n_rows * params.n_cols, index_t(sizeof(bitmap_t) * 8)); + std::vector bitmap_h(element); + nnz = create_sparse_matrix(params.n_rows, params.n_cols, params.sparsity, bitmap_h); + + std::vector indices_h(nnz); + std::vector indptr_h(params.n_rows + 1); + + cpu_convert_to_csr(bitmap_h, params.n_rows, params.n_cols, indices_h, indptr_h); + + bitmap_d.resize(bitmap_h.size(), stream); + indptr_d.resize(params.n_rows + 1, stream); + indices_d.resize(nnz, stream); + + indptr_expected_d.resize(params.n_rows + 1, stream); + indices_expected_d.resize(nnz, stream); + values_expected_d.resize(nnz, stream); + + thrust::fill_n(resource::get_thrust_policy(handle), values_expected_d.data(), nnz, value_t{1}); + + values_d.resize(nnz, stream); + + update_device(indices_expected_d.data(), indices_h.data(), indices_h.size(), stream); + update_device(indptr_expected_d.data(), indptr_h.data(), indptr_h.size(), stream); + update_device(bitmap_d.data(), bitmap_h.data(), bitmap_h.size(), stream); + + resource::sync_stream(handle); + } + + void Run() + { + auto bitmap = + raft::core::bitmap_view(bitmap_d.data(), params.n_rows, params.n_cols); + + if (params.owning) { + auto csr = + raft::make_device_csr_matrix(handle, params.n_rows, params.n_cols, nnz); + auto csr_view = csr.structure_view(); + + convert::bitmap_to_csr(handle, bitmap, csr); + raft::copy(indptr_d.data(), csr_view.get_indptr().data(), indptr_d.size(), stream); + raft::copy(indices_d.data(), csr_view.get_indices().data(), indices_d.size(), stream); + raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); + } else { + auto csr_view = raft::make_device_compressed_structure_view( + indptr_d.data(), indices_d.data(), params.n_rows, params.n_cols, nnz); + auto csr = raft::make_device_csr_matrix(handle, csr_view); + + convert::bitmap_to_csr(handle, bitmap, csr); + raft::copy(values_d.data(), csr.get_elements().data(), nnz, stream); + } + resource::sync_stream(handle); + + std::vector indices_h(indices_expected_d.size(), 0); + std::vector indices_expected_h(indices_expected_d.size(), 0); + update_host(indices_h.data(), indices_d.data(), indices_h.size(), stream); + update_host(indices_expected_h.data(), indices_expected_d.data(), indices_h.size(), stream); + + std::vector indptr_h(indptr_expected_d.size(), 0); + std::vector indptr_expected_h(indptr_expected_d.size(), 0); + update_host(indptr_h.data(), indptr_d.data(), indptr_h.size(), stream); + update_host(indptr_expected_h.data(), indptr_expected_d.data(), indptr_h.size(), stream); + + resource::sync_stream(handle); + + ASSERT_TRUE(csr_compare(indptr_h, indices_h, indptr_expected_h, indices_expected_h)); + ASSERT_TRUE(raft::devArrMatch( + values_expected_d.data(), values_d.data(), nnz, raft::Compare(), stream)); + } + + protected: + raft::resources handle; + cudaStream_t stream; + + BitmapToCSRInputs params; + + rmm::device_uvector bitmap_d; + + index_t nnz; + + rmm::device_uvector indptr_d; + rmm::device_uvector indices_d; + rmm::device_uvector values_d; + + rmm::device_uvector indptr_expected_d; + rmm::device_uvector indices_expected_d; + rmm::device_uvector values_expected_d; +}; + +using BitmapToCSRTestI = BitmapToCSRTest; +TEST_P(BitmapToCSRTestI, Result) { Run(); } + +using BitmapToCSRTestL = BitmapToCSRTest; +TEST_P(BitmapToCSRTestL, Result) { Run(); } + +template +const std::vector> bitmaptocsr_inputs = { + {0, 0, 0.2, false}, + {10, 32, 0.4, false}, + {10, 3, 0.2, false}, + {32, 1024, 0.4, false}, + {1024, 1048576, 0.01, false}, + {1024, 1024, 0.4, false}, + {64 * 1024 + 10, 2, 0.3, false}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.3, false}, // No peeling-remainder + {17, 16, 0.3, false}, // Check peeling-remainder + {18, 16, 0.3, false}, // Check peeling-remainder + {32 + 9, 33, 0.2, false}, // Check peeling-remainder + {2, 33, 0.2, false}, // Check peeling-remainder + {0, 0, 0.2, true}, + {10, 32, 0.4, true}, + {10, 3, 0.2, true}, + {32, 1024, 0.4, true}, + {1024, 1048576, 0.01, true}, + {1024, 1024, 0.4, true}, + {64 * 1024 + 10, 2, 0.3, true}, // 64K + 10 is slightly over maximum of blockDim.y + {16, 16, 0.3, true}, // No peeling-remainder + {17, 16, 0.3, true}, // Check peeling-remainder + {18, 16, 0.3, true}, // Check peeling-remainder + {32 + 9, 33, 0.2, true}, // Check peeling-remainder + {2, 33, 0.2, true}, // Check peeling-remainder +}; + +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitmapToCSRTestI, + ::testing::ValuesIn(bitmaptocsr_inputs)); +INSTANTIATE_TEST_CASE_P(SparseConvertCSRTest, + BitmapToCSRTestL, + ::testing::ValuesIn(bitmaptocsr_inputs)); + } // namespace sparse } // namespace raft diff --git a/docs/source/cpp_api/core.rst b/docs/source/cpp_api/core.rst index 39e57fd69a..4122a18506 100644 --- a/docs/source/cpp_api/core.rst +++ b/docs/source/cpp_api/core.rst @@ -21,4 +21,5 @@ expose in public APIs. core_interruptible.rst core_operators.rst core_math.rst - core_bitset.rst \ No newline at end of file + core_bitset.rst + core_bitmap.rst \ No newline at end of file diff --git a/docs/source/cpp_api/core_bitmap.rst b/docs/source/cpp_api/core_bitmap.rst new file mode 100644 index 0000000000..6c1dc607bf --- /dev/null +++ b/docs/source/cpp_api/core_bitmap.rst @@ -0,0 +1,15 @@ +Bitmap +====== + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *raft::core* + +.. doxygengroup:: bitmap + :project: RAFT + :members: + :content-only: \ No newline at end of file From 9637b3c22a3e67d20200886cffb5e804e33473dc Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 21 Mar 2024 13:32:41 -0400 Subject: [PATCH 6/7] Update pre-commit-hooks to v0.0.3 (#2239) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6bca1d228e..2b89948ec1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -104,7 +104,7 @@ repos: hooks: - id: check-json - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.0.1 + rev: v0.0.3 hooks: - id: verify-copyright files: | From 52e0d7331cb533955f479d82e4656253eaa9ef6f Mon Sep 17 00:00:00 2001 From: Michael Schellenberger Costa Date: Thu, 21 Mar 2024 16:15:41 -0700 Subject: [PATCH 7/7] Replace usages of raw `get_upstream` with `get_upstream_resource()` (#2207) We want to get rid of raw memory resources so move to the new interface instead Authors: - Michael Schellenberger Costa (https://github.com/miscco) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2207 --- cpp/test/core/device_resources_manager.cpp | 16 ++++++++-------- cpp/test/core/handle.cpp | 8 +++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cpp/test/core/device_resources_manager.cpp b/cpp/test/core/device_resources_manager.cpp index c7c9e175ea..b9b8996a09 100644 --- a/cpp/test/core/device_resources_manager.cpp +++ b/cpp/test/core/device_resources_manager.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -114,17 +115,16 @@ TEST(DeviceResourcesManager, ObeysSetters) auto* mr = dynamic_cast*>( rmm::mr::get_current_device_resource()); - auto* workspace_mr = - dynamic_cast*>( - dynamic_cast*>( - res.get_workspace_resource()) - ->get_upstream()); + rmm::device_async_resource_ref workspace_mr = + dynamic_cast*>( + res.get_workspace_resource()) + ->get_upstream_resource(); if (upstream_mrs[i % devices.size()] != nullptr) { // Expect that the current memory resource is a pool memory resource as requested EXPECT_NE(mr, nullptr); - // Expect that the upstream workspace memory resource is a pool memory - // resource as requested - EXPECT_NE(workspace_mr, nullptr); + + // We cannot easily check the type of a resource_ref + (void)workspace_mr; } { diff --git a/cpp/test/core/handle.cpp b/cpp/test/core/handle.cpp index 0b0b4b54ab..be18b0d5b4 100644 --- a/cpp/test/core/handle.cpp +++ b/cpp/test/core/handle.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -281,7 +282,8 @@ TEST(Raft, WorkspaceResource) raft::handle_t handle; // The returned resource is always a limiting adaptor - auto* orig_mr = resource::get_workspace_resource(handle)->get_upstream(); + rmm::device_async_resource_ref orig_mr{ + resource::get_workspace_resource(handle)->get_upstream_resource()}; // Let's create a pooled resource auto pool_mr = std::shared_ptr{new rmm::mr::pool_memory_resource( @@ -295,8 +297,8 @@ TEST(Raft, WorkspaceResource) auto new_mr = resource::get_workspace_resource(handle); // By this point, the orig_mr likely points to a non-existent resource; don't dereference! - ASSERT_NE(orig_mr, new_mr); - ASSERT_EQ(pool_mr.get(), new_mr->get_upstream()); + ASSERT_NE(orig_mr, rmm::device_async_resource_ref{new_mr}); + ASSERT_EQ(rmm::device_async_resource_ref{pool_mr.get()}, new_mr->get_upstream_resource()); // We can safely reset pool_mr, because the shared_ptr to the pool memory stays in the resource pool_mr.reset();