diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index db379c9d47..945589dc12 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -52,7 +52,6 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
- skip_upload_pkgs: libraft-template
docs-build:
if: github.ref_type == 'branch'
needs: python-build
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 82e56cd95d..47951783ba 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -43,16 +43,8 @@ jobs:
- '!README.md'
- '!docs/**'
- '!img/**'
- - '!notebooks/**'
- '!python/**'
- '!thirdparty/LICENSES/**'
- test_notebooks:
- - '**'
- - '!.devcontainer/**'
- - '!.pre-commit-config.yaml'
- - '!CONTRIBUTING.md'
- - '!README.md'
- - '!thirdparty/LICENSES/**'
test_python:
- '**'
- '!.devcontainer/**'
@@ -61,7 +53,6 @@ jobs:
- '!README.md'
- '!docs/**'
- '!img/**'
- - '!notebooks/**'
- '!thirdparty/LICENSES/**'
checks:
secrets: inherit
diff --git a/README.md b/README.md
index 7f43eb89dc..898c5c22c3 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
#
RAFT: Reusable Accelerated Functions and Tools for Vector Search and More
> [!IMPORTANT]
-> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and will be removing them altogether in the 24.12 (December) release.
+> The vector search and clustering algorithms in RAFT have been formally migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). The headers for the vector search and clustering algorithms in RAFT will remain for a bried period, but will no longer be tested, benchmarked, included in the pre-compiled libraft binary, or otherwise updated after the 24.12 (December 2024) release. We will be removing these headers altogether in a future release. It is strongly suggested to use cuVS for these routines, which include any headers in the `distance`, `neighbors`, `cluster` and `spatial` directories, and use the RAFT versions at your own risk.
![RAFT tech stack](img/raft-tech-stack-vss.png)
@@ -27,7 +27,6 @@
- [RAFT Reference Documentation](https://docs.rapids.ai/api/raft/stable/): API Documentation.
- [RAFT Getting Started](./docs/source/quick_start.md): Getting started with RAFT.
- [Build and Install RAFT](./docs/source/build.md): Instructions for installing and building RAFT.
-- [Example Notebooks](./notebooks): Example jupyter notebooks
- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
- [GitHub repository](https://github.com/rapidsai/raft): Download the RAFT source code.
- [Issue tracker](https://github.com/rapidsai/raft/issues): Report issues or request features.
@@ -120,13 +119,13 @@ auto metric = raft::distance::DistanceType::L2SqrtExpanded;
raft::distance::pairwise_distance(handle, input.view(), input.view(), output.view(), metric);
```
-It's also possible to create `raft::device_mdspan` views to invoke the same API with raw pointers and shape information:
+It's also possible to create `raft::device_mdspan` views to invoke the same API with raw pointers and shape information. Take this example from the [NVIDIA cuVS](https://github.com/rapidsai/cuvs) library:
```c++
#include
#include
#include
-#include
+#include
raft::device_resources handle;
@@ -147,8 +146,8 @@ auto output_view = raft::make_device_matrix_view(output, n_samples, n_samples);
raft::random::make_blobs(handle, input_view, labels_view);
-auto metric = raft::distance::DistanceType::L2SqrtExpanded;
-raft::distance::pairwise_distance(handle, input_view, input_view, output_view, metric);
+auto metric = cuvs::distance::DistanceType::L2SqrtExpanded;
+cuvs::distance::pairwise_distance(handle, input_view, input_view, output_view, metric);
```
@@ -156,12 +155,12 @@ raft::distance::pairwise_distance(handle, input_view, input_view, output_view, m
The `pylibraft` package contains a Python API for RAFT algorithms and primitives. `pylibraft` integrates nicely into other libraries by being very lightweight with minimal dependencies and accepting any object that supports the `__cuda_array_interface__`, such as [CuPy's ndarray](https://docs.cupy.dev/en/stable/user_guide/interoperability.html#rmm). The number of RAFT algorithms exposed in this package is continuing to grow from release to release.
-The example below demonstrates computing the pairwise Euclidean distances between CuPy arrays. Note that CuPy is not a required dependency for `pylibraft`.
+The example below demonstrates computing the pairwise Euclidean distances between CuPy arrays using the [NVIDIA cuVS](https://github.com/rapidsai/cuvs) library. Note that CuPy is not a required dependency for `pylibraft`.
```python
import cupy as cp
-from pylibraft.distance import pairwise_distance
+from cuvs.distance import pairwise_distance
n_samples = 5000
n_features = 50
@@ -208,7 +207,7 @@ pylibraft.config.set_output_as(lambda device_ndarray: return device_ndarray.copy
```python
import cupy as cp
-from pylibraft.distance import pairwise_distance
+from cuvs.distance import pairwise_distance
n_samples = 5000
n_features = 50
@@ -230,7 +229,6 @@ RAFT's C++ and Python libraries can both be installed through Conda and the Pyth
The easiest way to install RAFT is through conda and several packages are provided.
- `libraft-headers` C++ headers
-- `libraft` (optional) C++ shared library containing pre-compiled template instantiations and runtime API.
- `pylibraft` (optional) Python library
- `raft-dask` (optional) Python library for deployment of multi-node multi-GPU algorithms that use the RAFT `raft::comms` abstraction layer in Dask clusters.
@@ -253,8 +251,6 @@ You can also install the conda packages individually using the `mamba` command a
mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.5
```
-If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-24.12/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
-
### Installing Python through Pip
`pylibraft` and `raft-dask` both have experimental packages that can be [installed through pip](https://rapids.ai/pip.html#install):
@@ -263,12 +259,10 @@ pip install pylibraft-cu11 --extra-index-url=https://pypi.nvidia.com
pip install raft-dask-cu11 --extra-index-url=https://pypi.nvidia.com
```
-These packages statically build RAFT's pre-compiled instantiations and so the C++ headers and pre-compiled shared library won't be readily available to use in your code.
+These packages statically build RAFT's pre-compiled instantiations and so the C++ headers won't be readily available to use in your code.
The [build instructions](https://docs.rapids.ai/api/raft/nightly/build/) contain more details on building RAFT from source and including it in downstream projects. You can also find a more comprehensive version of the above CPM code snippet the [Building RAFT C++ and Python from source](https://docs.rapids.ai/api/raft/nightly/build/#building-c-and-python-from-source) section of the build instructions.
-You can find an example [RAFT project template](cpp/template/README.md) in the `cpp/template` directory, which demonstrates how to build a new application with RAFT or incorporate RAFT into an existing CMake project.
-
## Contributing
@@ -282,7 +276,7 @@ When citing RAFT generally, please consider referencing this Github project.
title={Rapidsai/raft: RAFT contains fundamental widely-used algorithms and primitives for data science, Graph and machine learning.},
url={https://github.com/rapidsai/raft},
journal={GitHub},
- publisher={Nvidia RAPIDS},
+ publisher={NVIDIA RAPIDS},
author={Rapidsai},
year={2022}
}
diff --git a/build.sh b/build.sh
index d54a8895a3..a95cb8ee23 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
# scripts, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)
-VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+VALIDARGS="clean libraft pylibraft raft-dask docs tests bench-prims clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--build-metrics=]
where is:
clean - remove all existing build artifacts and configuration (start over)
@@ -29,7 +29,6 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool= is:
-v - verbose build mode
@@ -73,8 +72,8 @@ INSTALL_TARGET=install
BUILD_REPORT_METRICS=""
BUILD_REPORT_INCL_CACHE_STATS=OFF
-TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;NEIGHBORS_TEST;NEIGHBORS_ANN_BRUTE_FORCE_TEST;NEIGHBORS_ANN_CAGRA_TEST;NEIGHBORS_ANN_NN_DESCENT_TEST;NEIGHBORS_ANN_IVF_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
-BENCH_TARGETS="CLUSTER_BENCH;CORE_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH"
+TEST_TARGETS="CORE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;STATS_TEST;UTILS_TEST"
+BENCH_TARGETS="CORE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH"
CACHE_ARGS=""
NVTX=ON
@@ -480,11 +479,3 @@ if hasArg docs; then
sphinx-build -b html source _html
fi
-################################################################################
-# Initiate build for example RAFT application template (if needed)
-
-if hasArg template; then
- pushd ${REPODIR}/cpp/template
- ./build.sh
- popd
-fi
diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh
deleted file mode 100644
index 86c0fa11b6..0000000000
--- a/conda/recipes/libraft/build_libraft_template.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-
-# Just building template so we verify it uses libraft.so and fail if it doesn't build
-./build.sh template --no-nvtx
diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index a075308500..503c4cb6fb 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -322,57 +322,3 @@ outputs:
home: https://rapids.ai/
license: Apache-2.0
summary: libraft tests
- - name: libraft-template
- version: {{ version }}
- script: build_libraft_template.sh
- build:
- script_env: *script_env
- number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
- ignore_run_exports_from:
- {% if cuda_major == "11" %}
- - {{ compiler('cuda11') }}
- {% else %}
- - {{ compiler('cuda') }}
- - cuda-cudart-dev
- - libcublas-dev
- {% endif %}
- requirements:
- build:
- - {{ compiler('c') }}
- - {{ compiler('cxx') }}
- {% if cuda_major == "11" %}
- - {{ compiler('cuda11') }} ={{ cuda_version }}
- {% else %}
- - {{ compiler('cuda') }}
- {% endif %}
- - cuda-version ={{ cuda_version }}
- - cmake {{ cmake_version }}
- - ninja
- - {{ stdlib("c") }}
- host:
- - {{ pin_subpackage('libraft', exact=True) }}
- - {{ pin_subpackage('libraft-headers', exact=True) }}
- - cuda-version ={{ cuda_version }}
- {% if cuda_major == "11" %}
- - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
- - libcublas {{ cuda11_libcublas_host_version }}
- - libcublas-dev {{ cuda11_libcublas_host_version }}
- {% else %}
- - cuda-cudart-dev
- - cuda-profiler-api
- - libcublas-dev
- {% endif %}
- run:
- - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- {% if cuda_major == "11" %}
- - cudatoolkit
- {% else %}
- - cuda-cudart
- - libcublas
- {% endif %}
- - {{ pin_subpackage('libraft', exact=True) }}
- about:
- home: https://rapids.ai/
- license: Apache-2.0
- summary: libraft template
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index f4c18d53a8..4ed9529a36 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -49,7 +49,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
option(BUILD_SHARED_LIBS "Build raft shared libraries" ON)
option(BUILD_TESTS "Build raft unit-tests" ON)
option(BUILD_PRIMS_BENCH "Build raft C++ benchmark tests" OFF)
-option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON)
option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
option(CUDA_ENABLE_LINEINFO
"Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
@@ -171,10 +170,6 @@ if(BUILD_PRIMS_BENCH)
rapids_cpm_gbench(BUILD_STATIC)
endif()
-if(BUILD_CAGRA_HNSWLIB)
- include(cmake/thirdparty/get_hnswlib.cmake)
-endif()
-
# ##################################################################################################
# * raft ---------------------------------------------------------------------
add_library(raft INTERFACE)
@@ -183,9 +178,6 @@ add_library(raft::raft ALIAS raft)
target_include_directories(
raft INTERFACE "$" "$"
)
-if(BUILD_CAGRA_HNSWLIB)
- target_link_libraries(raft INTERFACE hnswlib::hnswlib)
-endif()
# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass CCCL::CCCL)
@@ -271,277 +263,11 @@ if(RAFT_COMPILE_LIBRARY)
add_library(
raft_objs OBJECT
src/core/logger.cpp
- src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_rbf.cu
- src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
- src/distance/distance.cu
- src/distance/fused_l2_nn.cu
- src/distance/fused_distance_nn.cu
src/linalg/detail/coalesced_reduction.cu
- src/matrix/detail/select_k_double_int64_t.cu
- src/matrix/detail/select_k_double_uint32_t.cu
- src/matrix/detail/select_k_float_int64_t.cu
- src/matrix/detail/select_k_float_uint32_t.cu
- src/matrix/detail/select_k_float_int32.cu
- src/matrix/detail/select_k_half_int64_t.cu
- src/matrix/detail/select_k_half_uint32_t.cu
- src/neighbors/ball_cover.cu
- src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
- src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
- src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
- src/neighbors/brute_force_knn_int_float_int.cu
- src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
- src/neighbors/brute_force_knn_index_float.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
- src/neighbors/detail/ivf_flat_search.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
- src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
- src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
- src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
- src/neighbors/detail/refine_host_float_float.cpp
- src/neighbors/detail/refine_host_half_float.cpp
- src/neighbors/detail/refine_host_int8_t_float.cpp
- src/neighbors/detail/refine_host_uint8_t_float.cpp
- src/neighbors/ivf_flat_build_float_int64_t.cu
- src/neighbors/ivf_flat_build_int8_t_int64_t.cu
- src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
- src/neighbors/ivf_flat_extend_float_int64_t.cu
- src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
- src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
- src/neighbors/ivf_flat_search_float_int64_t.cu
- src/neighbors/ivf_flat_search_int8_t_int64_t.cu
- src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
- src/neighbors/ivfpq_build_float_int64_t.cu
- src/neighbors/ivfpq_build_half_int64_t.cu
- src/neighbors/ivfpq_build_int8_t_int64_t.cu
- src/neighbors/ivfpq_build_uint8_t_int64_t.cu
- src/neighbors/ivfpq_extend_float_int64_t.cu
- src/neighbors/ivfpq_extend_half_int64_t.cu
- src/neighbors/ivfpq_extend_int8_t_int64_t.cu
- src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
- src/neighbors/ivfpq_search_float_int64_t.cu
- src/neighbors/ivfpq_search_half_int64_t.cu
- src/neighbors/ivfpq_search_int8_t_int64_t.cu
- src/neighbors/ivfpq_search_uint8_t_int64_t.cu
- src/neighbors/refine_float_float.cu
- src/neighbors/refine_half_float.cu
- src/neighbors/refine_int8_t_float.cu
- src/neighbors/refine_uint8_t_float.cu
- src/raft_runtime/cluster/cluster_cost.cuh
- src/raft_runtime/cluster/cluster_cost_double.cu
- src/raft_runtime/cluster/cluster_cost_float.cu
- src/raft_runtime/cluster/kmeans_fit_double.cu
- src/raft_runtime/cluster/kmeans_fit_float.cu
- src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
- src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
- src/raft_runtime/cluster/update_centroids.cuh
- src/raft_runtime/cluster/update_centroids_double.cu
- src/raft_runtime/cluster/update_centroids_float.cu
- src/raft_runtime/distance/fused_distance_min_arg.cu
- src/raft_runtime/distance/fused_l2_min_arg.cu
- src/raft_runtime/distance/pairwise_distance.cu
- src/raft_runtime/matrix/select_k_float_int64_t.cu
- src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
- src/raft_runtime/neighbors/cagra_build.cu
- src/raft_runtime/neighbors/cagra_search.cu
- src/raft_runtime/neighbors/cagra_serialize.cu
- src/raft_runtime/neighbors/eps_neighborhood.cu
- $<$:src/raft_runtime/neighbors/hnsw.cpp>
- src/raft_runtime/neighbors/ivf_flat_build.cu
- src/raft_runtime/neighbors/ivf_flat_search.cu
- src/raft_runtime/neighbors/ivf_flat_serialize.cu
- src/raft_runtime/neighbors/ivfpq_build.cu
- src/raft_runtime/neighbors/ivfpq_deserialize.cu
- src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
- src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
- src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
- src/raft_runtime/neighbors/ivfpq_serialize.cu
- src/raft_runtime/neighbors/refine_d_int64_t_float.cu
- src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
- src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
- src/raft_runtime/neighbors/refine_h_int64_t_float.cu
- src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
- src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu
src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
- src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
- src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu
- src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
- src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
)
set_target_properties(
raft_objs
diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt
index 52c63ad73b..cf03a36612 100644
--- a/cpp/bench/prims/CMakeLists.txt
+++ b/cpp/bench/prims/CMakeLists.txt
@@ -74,49 +74,9 @@ function(ConfigureBench)
endfunction()
if(BUILD_PRIMS_BENCH)
- ConfigureBench(
- NAME
- CORE_BENCH
- PATH
- core/bitset.cu
- core/copy.cu
- main.cpp
- )
+ ConfigureBench(NAME CORE_BENCH PATH core/bitset.cu core/copy.cu main.cpp)
- ConfigureBench(
- NAME
- UTIL_BENCH
- PATH
- util/popc.cu
- main.cpp
- )
-
- ConfigureBench(
- NAME CLUSTER_BENCH PATH cluster/kmeans_balanced.cu cluster/kmeans.cu
- main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
- )
-
- ConfigureBench(
- NAME TUNE_DISTANCE PATH distance/tune_pairwise/kernel.cu
- distance/tune_pairwise/bench.cu main.cpp
- )
-
- ConfigureBench(
- NAME
- DISTANCE_BENCH
- PATH
- distance/distance_cosine.cu
- distance/distance_exp_l2.cu
- distance/distance_l1.cu
- distance/distance_unexp_l2.cu
- distance/fused_l2_nn.cu
- distance/masked_nn.cu
- distance/kernels.cu
- main.cpp
- OPTIONAL
- LIB
- EXPLICIT_INSTANTIATE_ONLY
- )
+ ConfigureBench(NAME UTIL_BENCH PATH util/popc.cu main.cpp)
ConfigureBench(
NAME
@@ -137,54 +97,18 @@ if(BUILD_PRIMS_BENCH)
)
ConfigureBench(
- NAME MATRIX_BENCH PATH matrix/argmin.cu matrix/gather.cu
- matrix/select_k.cu main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY
+ NAME MATRIX_BENCH PATH matrix/argmin.cu matrix/gather.cu matrix/select_k.cu main.cpp OPTIONAL
+ LIB EXPLICIT_INSTANTIATE_ONLY
)
ConfigureBench(
- NAME RANDOM_BENCH PATH random/make_blobs.cu random/permute.cu
- random/rng.cu random/subsample.cu main.cpp
- )
-
- ConfigureBench(
- NAME
- SPARSE_BENCH
- PATH
- sparse/bitmap_to_csr.cu
- sparse/convert_csr.cu
- sparse/select_k_csr.cu
+ NAME RANDOM_BENCH PATH random/make_blobs.cu random/permute.cu random/rng.cu random/subsample.cu
main.cpp
)
ConfigureBench(
- NAME
- NEIGHBORS_BENCH
- PATH
- neighbors/knn/brute_force_float_int64_t.cu
- neighbors/knn/brute_force_float_uint32_t.cu
- neighbors/knn/cagra_float_uint32_t.cu
- neighbors/knn/ivf_flat_filter_float_int64_t.cu
- neighbors/knn/ivf_flat_float_int64_t.cu
- neighbors/knn/ivf_flat_int8_t_int64_t.cu
- neighbors/knn/ivf_flat_uint8_t_int64_t.cu
- neighbors/knn/ivf_pq_float_int64_t.cu
- neighbors/knn/ivf_pq_filter_float_int64_t.cu
- neighbors/knn/ivf_pq_int8_t_int64_t.cu
- neighbors/knn/ivf_pq_uint8_t_int64_t.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_search_filtering_float_int64_t.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_float_bitset64.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_float_half_bitset64.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu
- ${RAFT_SOURCE_DIR}/src/neighbors/detail/ivf_pq_compute_similarity_half_half_bitset64.cu
- neighbors/refine_float_int64_t.cu
- neighbors/refine_uint8_t_int64_t.cu
+ NAME SPARSE_BENCH PATH sparse/bitmap_to_csr.cu sparse/convert_csr.cu sparse/select_k_csr.cu
main.cpp
- OPTIONAL
- LIB
- EXPLICIT_INSTANTIATE_ONLY
)
endif()
diff --git a/cpp/bench/prims/cluster/kmeans.cu b/cpp/bench/prims/cluster/kmeans.cu
deleted file mode 100644
index 6387211135..0000000000
--- a/cpp/bench/prims/cluster/kmeans.cu
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-
-#include
-#include
-
-namespace raft::bench::cluster {
-
-struct KMeansBenchParams {
- DatasetParams data;
- BlobsParams blobs;
- raft::cluster::KMeansParams kmeans;
-};
-
-inline auto operator<<(std::ostream& os, const KMeansBenchParams& p) -> std::ostream&
-{
- os << p.data.rows << "#" << p.data.cols << "#" << p.kmeans.n_clusters;
- return os;
-}
-
-template
-struct KMeans : public BlobsFixture {
- KMeans(const KMeansBenchParams& p)
- : BlobsFixture(p.data, p.blobs),
- params(p),
- centroids(this->handle),
- labels(this->handle)
- {
- }
-
- void run_benchmark(::benchmark::State& state) override
- {
- std::ostringstream label_stream;
- label_stream << params;
- state.SetLabel(label_stream.str());
-
- raft::device_matrix_view X_view = this->X.view();
- std::optional> opt_weights_view = std::nullopt;
- std::optional> centroids_view =
- std::make_optional>(centroids.view());
- raft::device_vector_view labels_view = labels.view();
- raft::host_scalar_view inertia_view = raft::make_host_scalar_view(&inertia);
- raft::host_scalar_view n_iter_view = raft::make_host_scalar_view(&n_iter);
-
- this->loop_on_state(state, [&]() {
- raft::cluster::kmeans_fit_predict(this->handle,
- params.kmeans,
- X_view,
- opt_weights_view,
- centroids_view,
- labels_view,
- inertia_view,
- n_iter_view);
- });
- }
-
- void allocate_temp_buffers(const ::benchmark::State& state) override
- {
- centroids =
- raft::make_device_matrix(this->handle, params.kmeans.n_clusters, params.data.cols);
- labels = raft::make_device_vector(this->handle, params.data.rows);
- }
-
- private:
- KMeansBenchParams params;
- raft::device_matrix centroids;
- raft::device_vector labels;
- T inertia;
- IndexT n_iter;
-}; // struct KMeans
-
-std::vector getKMeansInputs()
-{
- std::vector out;
- KMeansBenchParams p;
- p.data.row_major = true;
- p.blobs.cluster_std = 1.0;
- p.blobs.shuffle = false;
- p.blobs.center_box_min = -10.0;
- p.blobs.center_box_max = 10.0;
- p.blobs.seed = 12345ULL;
- p.kmeans.init = raft::cluster::KMeansParams::KMeansPlusPlus;
- p.kmeans.max_iter = 300;
- p.kmeans.tol = 1e-4;
- p.kmeans.verbosity = RAFT_LEVEL_INFO;
- p.kmeans.metric = raft::distance::DistanceType::L2Expanded;
- p.kmeans.inertia_check = true;
- std::vector> row_cols_k = {
- {1000000, 20, 1000},
- {3000000, 50, 20},
- {10000000, 50, 5},
- };
- for (auto& rck : row_cols_k) {
- p.data.rows = std::get<0>(rck);
- p.data.cols = std::get<1>(rck);
- p.blobs.n_clusters = std::get<2>(rck);
- p.kmeans.n_clusters = std::get<2>(rck);
- out.push_back(p);
- }
- return out;
-}
-
-// note(lsugy): commenting out int64_t because the templates are not compiled in the distance
-// library, resulting in long compilation times.
-RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs());
-RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs());
-// RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs());
-// RAFT_BENCH_REGISTER((KMeans), "", getKMeansInputs());
-
-} // namespace raft::bench::cluster
diff --git a/cpp/bench/prims/cluster/kmeans_balanced.cu b/cpp/bench/prims/cluster/kmeans_balanced.cu
deleted file mode 100644
index dc05783989..0000000000
--- a/cpp/bench/prims/cluster/kmeans_balanced.cu
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-
-#include
-#include
-#include
-
-namespace raft::bench::cluster {
-
-struct KMeansBalancedBenchParams {
- DatasetParams data;
- uint32_t n_lists;
- raft::cluster::kmeans_balanced_params kb_params;
-};
-
-template
-struct KMeansBalanced : public fixture {
- KMeansBalanced(const KMeansBalancedBenchParams& p) : params(p), X(handle), centroids(handle) {}
-
- void run_benchmark(::benchmark::State& state) override
- {
- this->loop_on_state(state, [this]() {
- raft::device_matrix_view X_view = this->X.view();
- raft::device_matrix_view centroids_view = this->centroids.view();
- raft::cluster::kmeans_balanced::fit(
- this->handle, this->params.kb_params, X_view, centroids_view);
- });
- }
-
- void allocate_data(const ::benchmark::State& state) override
- {
- X = raft::make_device_matrix(handle, params.data.rows, params.data.cols);
-
- raft::random::RngState rng{1234};
- constexpr T kRangeMax = std::is_integral_v ? std::numeric_limits::max() : T(1);
- constexpr T kRangeMin = std::is_integral_v ? std::numeric_limits::min() : T(-1);
- if constexpr (std::is_integral_v) {
- raft::random::uniformInt(
- handle, rng, X.data_handle(), params.data.rows * params.data.cols, kRangeMin, kRangeMax);
- } else {
- raft::random::uniform(
- handle, rng, X.data_handle(), params.data.rows * params.data.cols, kRangeMin, kRangeMax);
- }
- resource::sync_stream(handle, stream);
- }
-
- void allocate_temp_buffers(const ::benchmark::State& state) override
- {
- centroids =
- raft::make_device_matrix(this->handle, params.n_lists, params.data.cols);
- }
-
- private:
- KMeansBalancedBenchParams params;
- raft::device_matrix X;
- raft::device_matrix centroids;
-}; // struct KMeansBalanced
-
-std::vector getKMeansBalancedInputs()
-{
- std::vector out;
- KMeansBalancedBenchParams p;
- p.data.row_major = true;
- p.kb_params.n_iters = 20;
- p.kb_params.metric = raft::distance::DistanceType::L2Expanded;
- std::vector> row_cols = {
- {100000, 128}, {1000000, 128}, {10000000, 128},
- // The following dataset sizes are too large for most GPUs.
- // {100000000, 128},
- };
- for (auto& rc : row_cols) {
- p.data.rows = rc.first;
- p.data.cols = rc.second;
- for (auto n_lists : std::vector({1000, 10000, 100000})) {
- p.n_lists = n_lists;
- out.push_back(p);
- }
- }
- return out;
-}
-
-// Note: the datasets sizes are too large for 32-bit index types.
-RAFT_BENCH_REGISTER((KMeansBalanced), "", getKMeansBalancedInputs());
-
-} // namespace raft::bench::cluster
diff --git a/cpp/bench/prims/distance/distance_common.cuh b/cpp/bench/prims/distance/distance_common.cuh
deleted file mode 100644
index 8368062168..0000000000
--- a/cpp/bench/prims/distance/distance_common.cuh
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-
-#include
-#include
-
-#include
-
-namespace raft::bench::distance {
-
-struct distance_params {
- int m, n, k;
- bool isRowMajor;
-}; // struct distance_params
-
-template
-struct distance : public fixture {
- distance(const distance_params& p)
- : params(p),
- x(p.m * p.k, stream),
- y(p.n * p.k, stream),
- out(p.m * p.n, stream),
- workspace(0, stream)
- {
- RAFT_CUDA_TRY(cudaMemsetAsync(x.data(), 0, x.size() * sizeof(T), stream));
- RAFT_CUDA_TRY(cudaMemsetAsync(y.data(), 0, y.size() * sizeof(T), stream));
- RAFT_CUDA_TRY(cudaMemsetAsync(out.data(), 0, out.size() * sizeof(T), stream));
- worksize = raft::distance::getWorkspaceSize(
- x.data(), y.data(), params.m, params.n, params.k);
- workspace.resize(worksize, stream);
- }
-
- void run_benchmark(::benchmark::State& state) override
- {
- loop_on_state(state, [this]() {
- raft::distance::distance(handle,
- x.data(),
- y.data(),
- out.data(),
- params.m,
- params.n,
- params.k,
- (void*)workspace.data(),
- worksize,
- params.isRowMajor);
- });
- }
-
- private:
- distance_params params;
- rmm::device_uvector x, y, out;
- rmm::device_uvector workspace;
- size_t worksize;
-}; // struct Distance
-
-const std::vector dist_input_vecs{
- {32, 16384, 16384, true}, {64, 16384, 16384, true}, {128, 16384, 16384, true},
- {256, 16384, 16384, true}, {512, 16384, 16384, true}, {1024, 16384, 16384, true},
- {16384, 32, 16384, true}, {16384, 64, 16384, true}, {16384, 128, 16384, true},
- {16384, 256, 16384, true}, {16384, 512, 16384, true}, {16384, 1024, 16384, true},
- {16384, 16384, 32, true}, {16384, 16384, 64, true}, {16384, 16384, 128, true},
- {16384, 16384, 256, true}, {16384, 16384, 512, true}, {16384, 16384, 1024, true},
- {16384, 16384, 16384, true}, {32, 16384, 16384, false}, {64, 16384, 16384, false},
- {128, 16384, 16384, false}, {256, 16384, 16384, false}, {512, 16384, 16384, false},
- {1024, 16384, 16384, false}, {16384, 32, 16384, false}, {16384, 64, 16384, false},
- {16384, 128, 16384, false}, {16384, 256, 16384, false}, {16384, 512, 16384, false},
- {16384, 1024, 16384, false}, {16384, 16384, 32, false}, {16384, 16384, 64, false},
- {16384, 16384, 128, false}, {16384, 16384, 256, false}, {16384, 16384, 512, false},
- {16384, 16384, 1024, false}, {16384, 16384, 16384, false}
-
-};
-
-#define DIST_BENCH_REGISTER(Name, Metric) \
- using Name##F = distance; \
- RAFT_BENCH_REGISTER(Name##F, "", dist_input_vecs); \
- using Name##D = distance; \
- RAFT_BENCH_REGISTER(Name##D, "", dist_input_vecs);
-
-} // namespace raft::bench::distance
diff --git a/cpp/bench/prims/distance/distance_cosine.cu b/cpp/bench/prims/distance/distance_cosine.cu
deleted file mode 100644
index c8ac8067c8..0000000000
--- a/cpp/bench/prims/distance/distance_cosine.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "distance_common.cuh"
-
-namespace raft::bench::distance {
-
-DIST_BENCH_REGISTER(DistanceCosine, raft::distance::DistanceType::CosineExpanded);
-
-} // namespace raft::bench::distance
diff --git a/cpp/bench/prims/distance/distance_exp_l2.cu b/cpp/bench/prims/distance/distance_exp_l2.cu
deleted file mode 100644
index 52b7fff05c..0000000000
--- a/cpp/bench/prims/distance/distance_exp_l2.cu
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "distance_common.cuh"
-
-namespace raft::bench::distance {
-
-DIST_BENCH_REGISTER(DistanceL2Sq, raft::distance::DistanceType::L2Expanded);
-DIST_BENCH_REGISTER(DistanceL2Sqrt, raft::distance::DistanceType::L2SqrtExpanded);
-
-} // namespace raft::bench::distance
diff --git a/cpp/bench/prims/distance/distance_l1.cu b/cpp/bench/prims/distance/distance_l1.cu
deleted file mode 100644
index e80db48ef0..0000000000
--- a/cpp/bench/prims/distance/distance_l1.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "distance_common.cuh"
-
-namespace raft::bench::distance {
-
-DIST_BENCH_REGISTER(DistanceL1, raft::distance::DistanceType::L1);
-
-} // namespace raft::bench::distance
diff --git a/cpp/bench/prims/distance/distance_unexp_l2.cu b/cpp/bench/prims/distance/distance_unexp_l2.cu
deleted file mode 100644
index 7ac1a8a4b5..0000000000
--- a/cpp/bench/prims/distance/distance_unexp_l2.cu
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "distance_common.cuh"
-
-namespace raft::bench::distance {
-
-DIST_BENCH_REGISTER(DistanceUnexpL2Sq, raft::distance::DistanceType::L2Unexpanded);
-DIST_BENCH_REGISTER(DistanceUnexpL2Sqrt, raft::distance::DistanceType::L2SqrtUnexpanded);
-
-} // namespace raft::bench::distance
diff --git a/cpp/bench/prims/distance/fused_l2_nn.cu b/cpp/bench/prims/distance/fused_l2_nn.cu
deleted file mode 100644
index a263bef6ba..0000000000
--- a/cpp/bench/prims/distance/fused_l2_nn.cu
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-
-#include
-#include
-#include
-#include
-
-#include
-
-namespace raft::bench::distance {
-
-struct fusedl2nn_inputs {
- int64_t m, n, k;
-}; // struct fusedl2nn_inputs
-
-inline auto operator<<(std::ostream& os, const fusedl2nn_inputs& p) -> std::ostream&
-{
- os << p.m << "#" << p.n << "#" << p.k;
- return os;
-}
-
-template
-struct fusedl2nn : public fixture {
- fusedl2nn(const fusedl2nn_inputs& p)
- : params(p),
- workspace(this->handle),
- x(this->handle),
- y(this->handle),
- x_norm(this->handle),
- y_norm(this->handle),
- out(this->handle)
- {
- }
-
- void allocate_data(const ::benchmark::State& state) override
- {
- x = raft::make_device_matrix(handle, params.m, params.k);
- y = raft::make_device_matrix(handle, params.n, params.k);
- x_norm = raft::make_device_vector(handle, params.m);
- y_norm = raft::make_device_vector(handle, params.n);
- out = raft::make_device_vector(handle, params.m);
-
- raft::random::RngState rng{1234};
- raft::random::uniform(
- handle, rng, x.data_handle(), params.m * params.k, (DataT)-1.0, (DataT)1.0);
- raft::random::uniform(
- handle, rng, y.data_handle(), params.n * params.k, (DataT)-1.0, (DataT)1.0);
-
- // Pre-compute norms
- raft::linalg::rowNorm(x_norm.data_handle(),
- x.data_handle(),
- params.k,
- params.m,
- raft::linalg::L2Norm,
- true,
- stream);
- raft::linalg::rowNorm(y_norm.data_handle(),
- y.data_handle(),
- params.k,
- params.n,
- raft::linalg::L2Norm,
- true,
- stream);
- resource::sync_stream(handle, stream);
- }
-
- void allocate_temp_buffers(const ::benchmark::State& state) override
- {
- workspace = raft::make_device_vector(handle, params.m * sizeof(IdxT));
- }
-
- void run_benchmark(::benchmark::State& state) override
- {
- std::ostringstream label_stream;
- label_stream << params;
- state.SetLabel(label_stream.str());
-
- loop_on_state(state, [this]() {
- raft::distance::fusedL2NNMinReduce(out.data_handle(),
- x.data_handle(),
- y.data_handle(),
- x_norm.data_handle(),
- y_norm.data_handle(),
- static_cast(params.m),
- static_cast(params.n),
- static_cast(params.k),
- (void*)workspace.data_handle(),
- false,
- true,
- stream);
- });
-
- int64_t num_flops = 2 * params.m * params.n * params.k;
-
- int64_t read_elts = params.n * params.k + params.m * params.k;
- int64_t write_elts = params.m;
-
- state.counters["FLOP/s"] = benchmark::Counter(
- num_flops, benchmark::Counter::kIsIterationInvariantRate, benchmark::Counter::OneK::kIs1000);
-
- state.counters["BW Wr"] = benchmark::Counter(write_elts * sizeof(OutT),
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
- state.counters["BW Rd"] = benchmark::Counter(read_elts * sizeof(DataT),
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
- }
-
- private:
- fusedl2nn_inputs params;
- raft::device_matrix x, y;
- raft::device_vector x_norm, y_norm;
- raft::device_vector out;
- raft::device_vector workspace;
-}; // struct fusedl2nn
-
-template
-std::vector getFusedL2NNInputs()
-{
- std::vector inputs;
- std::vector m_list = {100000, 1000000};
- if constexpr (sizeof(IdxT) == 8) { m_list.push_back(10000000); }
- std::vector n_list = {100, 1000, 10000};
- std::vector k_list = {64, 128, 256};
- for (auto m : m_list) {
- for (auto n : n_list) {
- for (auto k : k_list) {
- inputs.push_back({m, n, k});
- }
- }
- }
- return inputs;
-}
-
-#define FUSEDL2NN_BENCH(DataT, IdxT, OutT) \
- RAFT_BENCH_REGISTER((fusedl2nn), "", getFusedL2NNInputs())
-
-FUSEDL2NN_BENCH(float, int, float);
-FUSEDL2NN_BENCH(double, int, double);
-FUSEDL2NN_BENCH(float, int, (raft::KeyValuePair));
-FUSEDL2NN_BENCH(double, int, (raft::KeyValuePair));
-FUSEDL2NN_BENCH(float, int64_t, float);
-FUSEDL2NN_BENCH(double, int64_t, double);
-FUSEDL2NN_BENCH(float, int64_t, (raft::KeyValuePair));
-FUSEDL2NN_BENCH(double, int64_t, (raft::KeyValuePair));
-
-} // namespace raft::bench::distance
diff --git a/cpp/bench/prims/distance/kernels.cu b/cpp/bench/prims/distance/kernels.cu
deleted file mode 100644
index eb86330637..0000000000
--- a/cpp/bench/prims/distance/kernels.cu
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include
-
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-
-namespace raft::bench::distance::kernels {
-
-using namespace raft::distance::kernels;
-struct GramTestParams {
- int m; // m parameter of the GEMM
- int k; // k parameter of the GEMM
- int n; // n parameter of the GEMM
- KernelParams kernel_params;
- bool is_row_major;
-}; // struct GramTestParams
-
-template
-struct GramMatrix : public fixture {
- GramMatrix(const GramTestParams& p)
- : params(p), handle(stream), A(0, stream), B(0, stream), C(0, stream)
- {
- kernel = std::unique_ptr>(
- KernelFactory::create(p.kernel_params, resource::get_cublas_handle(handle)));
-
- A.resize(params.m * params.k, stream);
- B.resize(params.k * params.n, stream);
- C.resize(params.m * params.n, stream);
- raft::random::RngState rng(123456ULL);
- raft::random::uniform(handle, rng, A.data(), params.m * params.k, T(-1.0), T(1.0));
- raft::random::uniform(handle, rng, B.data(), params.k * params.n, T(-1.0), T(1.0));
- }
-
- ~GramMatrix()
- {
- A.release();
- B.release();
- C.release();
- }
-
- void run_benchmark(::benchmark::State& state) override
- {
- if (!this->kernel) { state.SkipWithError("Kernel matrix is not initialized"); }
- loop_on_state(state, [this]() {
- (*this->kernel)(A.data(),
- this->params.m,
- this->params.k,
- B.data(),
- this->params.n,
- C.data(),
- this->params.is_row_major,
- this->stream);
- });
- }
-
- private:
- const raft::device_resources handle;
- std::unique_ptr> kernel;
- GramTestParams params;
-
- rmm::device_uvector A; // input matrix A, size [m * k]
- rmm::device_uvector B; // input matrix B, size [n * k]
- rmm::device_uvector C; // output matrix C, size [m*n]
-};
-
-static std::vector getInputs()
-{
- std::vector param_vec;
- std::vector kernel_params{KernelParams{LINEAR, 3, 1, 0},
- KernelParams{POLYNOMIAL, 2, 1.3, 1},
- KernelParams{TANH, 2, 0.5, 2.4},
- KernelParams{RBF, 2, 0.5, 0}};
- struct TestSize {
- int m;
- int k;
- int n;
- };
- std::vector data_size{{4096, 10, 1024},
- {4096, 100, 1024},
- {4096, 1000, 1024},
- {4096, 10000, 1024},
- {100000, 10, 1024},
- {100000, 100, 1024},
- {100000, 1000, 1024}};
-
- param_vec.reserve(kernel_params.size() * data_size.size());
- for (TestSize s : data_size) {
- for (auto kernel : kernel_params) {
- for (bool row_major : {false, true}) {
- param_vec.push_back(GramTestParams{s.m, s.k, s.n, kernel, row_major});
- }
- }
- }
- return param_vec;
-}
-
-RAFT_BENCH_REGISTER(GramMatrix, "", getInputs());
-RAFT_BENCH_REGISTER(GramMatrix, "", getInputs());
-
-} // namespace raft::bench::distance::kernels
diff --git a/cpp/bench/prims/distance/masked_nn.cu b/cpp/bench/prims/distance/masked_nn.cu
deleted file mode 100644
index 979d438b67..0000000000
--- a/cpp/bench/prims/distance/masked_nn.cu
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace raft::bench::distance::masked_nn {
-
-// Introduce various sparsity patterns
-enum AdjacencyPattern {
- checkerboard = 0,
- checkerboard_4 = 1,
- checkerboard_64 = 2,
- all_true = 3,
- all_false = 4
-};
-
-struct Params {
- int m, n, k, num_groups;
- AdjacencyPattern pattern;
-}; // struct Params
-
-RAFT_KERNEL init_adj(AdjacencyPattern pattern,
- int n,
- raft::device_matrix_view adj,
- raft::device_vector_view group_idxs)
-{
- int m = adj.extent(0);
- int num_groups = adj.extent(1);
-
- for (int idx_m = blockIdx.y * blockDim.y + threadIdx.y; idx_m < m;
- idx_m += blockDim.y * gridDim.y) {
- for (int idx_g = blockIdx.x * blockDim.x + threadIdx.x; idx_g < num_groups;
- idx_g += blockDim.x * gridDim.x) {
- switch (pattern) {
- case checkerboard: adj(idx_m, idx_g) = (idx_m + idx_g) % 2; break;
- case checkerboard_4: adj(idx_m, idx_g) = (idx_m / 4 + idx_g) % 2; break;
- case checkerboard_64: adj(idx_m, idx_g) = (idx_m / 64 + idx_g) % 2; break;
- case all_true: adj(idx_m, idx_g) = true; break;
- case all_false: adj(idx_m, idx_g) = false; break;
- default: assert(false && "unknown pattern");
- }
- }
- }
- // Each group is of size n / num_groups.
- //
- // - group_idxs[j] indicates the start of group j + 1 (i.e. is the inclusive
- // scan of the group lengths)
- //
- // - The first group always starts at index zero, so we do not store it.
- //
- // - The group_idxs[num_groups - 1] should always equal n.
-
- if (blockIdx.y == 0 && threadIdx.y == 0) {
- const int g_stride = blockDim.x * gridDim.x;
- for (int idx_g = blockIdx.x * blockDim.x + threadIdx.x; idx_g < num_groups; idx_g += g_stride) {
- group_idxs(idx_g) = (idx_g + 1) * (n / num_groups);
- }
- group_idxs(num_groups - 1) = n;
- }
-}
-
-template
-struct masked_l2_nn : public fixture {
- using DataT = T;
- using IdxT = int;
- using OutT = raft::KeyValuePair;
- using RedOpT = raft::distance::MinAndDistanceReduceOp;
- using PairRedOpT = raft::distance::KVPMinReduce;
- using ParamT = raft::distance::masked_l2_nn_params;
-
- // Parameters
- Params params;
- // Data
- raft::device_vector out;
- raft::device_matrix x, y;
- raft::device_vector xn, yn;
- raft::device_matrix adj;
- raft::device_vector group_idxs;
-
- masked_l2_nn(const Params& p)
- : params(p),
- out{raft::make_device_vector(handle, p.m)},
- x{raft::make_device_matrix(handle, p.m, p.k)},
- y{raft::make_device_matrix(handle, p.n, p.k)},
- xn{raft::make_device_vector(handle, p.m)},
- yn{raft::make_device_vector(handle, p.n)},
- adj{raft::make_device_matrix(handle, p.m, p.num_groups)},
- group_idxs{raft::make_device_vector(handle, p.num_groups)}
- {
- raft::random::RngState r(123456ULL);
-
- uniform(handle, r, x.data_handle(), p.m * p.k, T(-1.0), T(1.0));
- uniform(handle, r, y.data_handle(), p.n * p.k, T(-1.0), T(1.0));
- raft::linalg::rowNorm(
- xn.data_handle(), x.data_handle(), p.k, p.m, raft::linalg::L2Norm, true, stream);
- raft::linalg::rowNorm(
- yn.data_handle(), y.data_handle(), p.k, p.n, raft::linalg::L2Norm, true, stream);
- raft::distance::initialize, int>(
- handle, out.data_handle(), p.m, std::numeric_limits::max(), RedOpT{});
-
- dim3 block(32, 32);
- dim3 grid(10, 10);
- init_adj<<>>(p.pattern, p.n, adj.view(), group_idxs.view());
- RAFT_CUDA_TRY(cudaGetLastError());
- }
-
- void run_benchmark(::benchmark::State& state) override
- {
- bool init_out = true;
- bool sqrt = false;
- ParamT masked_l2_params{RedOpT{}, PairRedOpT{}, sqrt, init_out};
-
- loop_on_state(state, [this, masked_l2_params]() {
- // It is sufficient to only benchmark the L2-squared metric
- raft::distance::masked_l2_nn(handle,
- masked_l2_params,
- x.view(),
- y.view(),
- xn.view(),
- yn.view(),
- adj.view(),
- group_idxs.view(),
- out.view());
- });
-
- // Virtual flop count if no skipping had occurred.
- size_t virtual_flops = size_t(2) * size_t(params.m) * size_t(params.n) * size_t(params.k);
-
- int64_t read_elts = params.n * params.k + params.m * params.k;
- int64_t write_elts = params.m;
-
- // Virtual min flops is the number of flops that would have been executed if
- // the algorithm had actually skipped each computation that it could have
- // skipped.
- size_t virtual_min_flops = 0;
- switch (params.pattern) {
- case checkerboard:
- case checkerboard_4:
- case checkerboard_64: virtual_min_flops = virtual_flops / 2; break;
- case all_true: virtual_min_flops = virtual_flops; break;
- case all_false: virtual_min_flops = 0; break;
- default: assert(false && "unknown pattern");
- }
-
- // VFLOP/s is the "virtual" flop count that would have executed if there was
- // no adjacency pattern. This is useful for comparing to fusedL2NN
- state.counters["VFLOP/s"] = benchmark::Counter(virtual_flops,
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
- // Virtual min flops is the number of flops that would have been executed if
- // the algorithm had actually skipped each computation that it could have
- // skipped.
- state.counters["VminFLOP/s"] = benchmark::Counter(virtual_min_flops,
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
-
- state.counters["BW Wr"] = benchmark::Counter(write_elts * sizeof(OutT),
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
- state.counters["BW Rd"] = benchmark::Counter(read_elts * sizeof(DataT),
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
-
- state.counters["m"] = benchmark::Counter(params.m);
- state.counters["n"] = benchmark::Counter(params.n);
- state.counters["k"] = benchmark::Counter(params.k);
- state.counters["num_groups"] = benchmark::Counter(params.num_groups);
- state.counters["group size"] = benchmark::Counter(params.n / params.num_groups);
- state.counters["Pat"] = benchmark::Counter(static_cast(params.pattern));
-
- state.counters["SM count"] = raft::getMultiProcessorCount();
- }
-};
-
-const std::vector masked_l2_nn_input_vecs = {
- // Very fat matrices...
- {32, 16384, 16384, 32, AdjacencyPattern::checkerboard},
- {64, 16384, 16384, 32, AdjacencyPattern::checkerboard},
- {128, 16384, 16384, 32, AdjacencyPattern::checkerboard},
- {256, 16384, 16384, 32, AdjacencyPattern::checkerboard},
- {512, 16384, 16384, 32, AdjacencyPattern::checkerboard},
- {1024, 16384, 16384, 32, AdjacencyPattern::checkerboard},
- {16384, 32, 16384, 32, AdjacencyPattern::checkerboard},
- {16384, 64, 16384, 32, AdjacencyPattern::checkerboard},
- {16384, 128, 16384, 32, AdjacencyPattern::checkerboard},
- {16384, 256, 16384, 32, AdjacencyPattern::checkerboard},
- {16384, 512, 16384, 32, AdjacencyPattern::checkerboard},
- {16384, 1024, 16384, 32, AdjacencyPattern::checkerboard},
-
- // Representative matrices...
- {16384, 16384, 32, 32, AdjacencyPattern::checkerboard},
- {16384, 16384, 64, 32, AdjacencyPattern::checkerboard},
- {16384, 16384, 128, 32, AdjacencyPattern::checkerboard},
- {16384, 16384, 256, 32, AdjacencyPattern::checkerboard},
- {16384, 16384, 512, 32, AdjacencyPattern::checkerboard},
- {16384, 16384, 1024, 32, AdjacencyPattern::checkerboard},
- {16384, 16384, 16384, 32, AdjacencyPattern::checkerboard},
-
- {16384, 16384, 32, 32, AdjacencyPattern::checkerboard_4},
- {16384, 16384, 64, 32, AdjacencyPattern::checkerboard_4},
- {16384, 16384, 128, 32, AdjacencyPattern::checkerboard_4},
- {16384, 16384, 256, 32, AdjacencyPattern::checkerboard_4},
- {16384, 16384, 512, 32, AdjacencyPattern::checkerboard_4},
- {16384, 16384, 1024, 32, AdjacencyPattern::checkerboard_4},
- {16384, 16384, 16384, 32, AdjacencyPattern::checkerboard_4},
-
- {16384, 16384, 32, 32, AdjacencyPattern::checkerboard_64},
- {16384, 16384, 64, 32, AdjacencyPattern::checkerboard_64},
- {16384, 16384, 128, 32, AdjacencyPattern::checkerboard_64},
- {16384, 16384, 256, 32, AdjacencyPattern::checkerboard_64},
- {16384, 16384, 512, 32, AdjacencyPattern::checkerboard_64},
- {16384, 16384, 1024, 32, AdjacencyPattern::checkerboard_64},
- {16384, 16384, 16384, 32, AdjacencyPattern::checkerboard_64},
-
- {16384, 16384, 32, 32, AdjacencyPattern::all_true},
- {16384, 16384, 64, 32, AdjacencyPattern::all_true},
- {16384, 16384, 128, 32, AdjacencyPattern::all_true},
- {16384, 16384, 256, 32, AdjacencyPattern::all_true},
- {16384, 16384, 512, 32, AdjacencyPattern::all_true},
- {16384, 16384, 1024, 32, AdjacencyPattern::all_true},
- {16384, 16384, 16384, 32, AdjacencyPattern::all_true},
-
- {16384, 16384, 32, 32, AdjacencyPattern::all_false},
- {16384, 16384, 64, 32, AdjacencyPattern::all_false},
- {16384, 16384, 128, 32, AdjacencyPattern::all_false},
- {16384, 16384, 256, 32, AdjacencyPattern::all_false},
- {16384, 16384, 512, 32, AdjacencyPattern::all_false},
- {16384, 16384, 1024, 32, AdjacencyPattern::all_false},
- {16384, 16384, 16384, 32, AdjacencyPattern::all_false},
-};
-
-RAFT_BENCH_REGISTER(masked_l2_nn, "", masked_l2_nn_input_vecs);
-// We don't benchmark double to keep compile times in check when not using the
-// distance library.
-
-} // namespace raft::bench::distance::masked_nn
diff --git a/cpp/bench/prims/distance/tune_pairwise/bench.cu b/cpp/bench/prims/distance/tune_pairwise/bench.cu
deleted file mode 100644
index 81105cdefe..0000000000
--- a/cpp/bench/prims/distance/tune_pairwise/bench.cu
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Tuning benchmarks.
-//
-// Goals:
-//
-// 1. Fast compile times to maintain iteration speed.
-// 2. Create benchmarks that can inform the design of the kernels.
-//
-// Non-goals:
-//
-// 1. Measure every distance operation. Instead measures just one distance
-// operation at the same time.
-// 2. Be useful for finding performance regressions. This is handled by the
-// normal benchmarks.
-//
-// So far, both goals are partly achieved.
-//
-// RE (1), COMPILE TIMES: kernel.cu is fast to compile. This file is not.
-// When the internals of a pairwise distance kernel is changed, this file is not
-// recompiled.
-//
-// RE 2, benchmarks with intent: this file contains a benchmark to check the
-// maximal throughput of a kernel. Measuring other things, like performance on
-// skinny or wide matrices is not yet implemented.
-
-#include "kernel.cuh" // launch_kernel
-
-#include // RAFT_BENCH_REGISTER
-
-#include // pairwise_matrix_params
-
-#include // rmm::device_uvector
-
-#include // std::min
-#include // std::vector
-
-namespace raft::bench::distance::tune {
-
-// Max throughput benchmark.
-//
-// Goal: Measure the maximum distances/sec that can be computed.
-//
-// To achieve this, we make sure that:
-//
-// - Input data size is a multiple of the block tile size.
-//
-// - Perfect distribution of work between SMs, i.e. the number of block tiles is
-// a large multiple (num_waves) of the number of blocks (#SMs * occupancy).
-//
-// - Multiple iterations over Kblk are executed (num_k_iters).
-struct throughput_param {
- int num_waves;
- int occupancy;
- int num_k_iters;
-};
-
-const std::vector throughput_params{
- // 32 waves, requested occupancy of 4, and 32 k iterations typically achieves
- // maximum throughput. No need to pick higher values.
- {32, 4, 32},
-};
-
-struct throughput_bench : public fixture {
- const throughput_param p;
-
- throughput_bench(const throughput_param& p_) : p(p_) {}
-
- void run_benchmark(::benchmark::State& state) override
- {
- // Get block size:
- int block_m, block_n, block_k;
- get_block_size(block_m, block_n, block_k);
-
- // Determine number of blocks that will be launched. This informs the size
- // of the inputs as well as the grid size.
- const int num_sms = raft::getMultiProcessorCount();
- const int max_occupancy = get_max_occupancy();
- const int occupancy = std::min(p.occupancy, max_occupancy);
- const int num_blocks = occupancy * num_sms;
- dim3 grid(num_blocks);
-
- // Create input sizes that are a multiple of the block tile size.
- size_t m = block_m;
- size_t n = block_n * p.num_waves * num_blocks;
- size_t k = block_k * p.num_k_iters;
-
- // DataT, OutT, IdxT, etc, are defined in tuned_kernel.cuh
- rmm::device_uvector x_vec(m * k, stream);
- rmm::device_uvector y_vec(n * k, stream);
- rmm::device_uvector x_norm_vec(m, stream);
- rmm::device_uvector y_norm_vec(n, stream);
- rmm::device_uvector out_vec(m * n, stream);
-
- auto x = x_vec.data();
- auto y = y_vec.data();
- auto x_norm = x_norm_vec.data();
- auto y_norm = y_norm_vec.data();
- auto out = out_vec.data();
- FinOpT fin_op{};
-
- // Create kernel parameter struct. Flip x and y if column major.
- IdxT ldx = row_major ? k : m;
- IdxT ldy = row_major ? k : n;
- IdxT ld_out = row_major ? n : m;
-
- // Template parameters of pairwise_matrix_params are defined in kernel.cuh
- pairwise_matrix_params kparams{
- IdxT(m), IdxT(n), IdxT(k), ldx, ldy, ld_out, x, y, x_norm, y_norm, out, fin_op, row_major};
-
- // Run benchmark
- loop_on_state(state, [&]() { launch_kernel(kparams, grid, stream); });
-
- // Report metrics. We don't report flop/s because we do not know for each
- // distance operation how many flops it costs. For L2_unexp and l1, we can
- // double this number to get the flop/s. For l2 expanded, core_ops/s should
- // equal flop/s (modulo the sqrt and subtracting from the norm).
- size_t num_core_ops = m * n * k;
- size_t read_elts = n * k + m * k;
- size_t write_elts = m * n;
-
- state.counters["m"] = benchmark::Counter(m);
- state.counters["n"] = benchmark::Counter(n);
- state.counters["k"] = benchmark::Counter(k);
- state.counters["occupancy"] = benchmark::Counter(occupancy);
- state.counters["# waves"] = benchmark::Counter(p.num_waves);
- state.counters["# k iters"] = benchmark::Counter(p.num_k_iters);
-
- state.counters["core_ops/s"] = benchmark::Counter(num_core_ops,
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
-
- state.counters["BW"] = benchmark::Counter(write_elts * sizeof(OutT) + read_elts * sizeof(DataT),
- benchmark::Counter::kIsIterationInvariantRate,
- benchmark::Counter::OneK::kIs1000);
- }
-};
-
-RAFT_BENCH_REGISTER(throughput_bench, "", throughput_params);
-
-} // namespace raft::bench::distance::tune
diff --git a/cpp/bench/prims/distance/tune_pairwise/kernel.cu b/cpp/bench/prims/distance/tune_pairwise/kernel.cu
deleted file mode 100644
index 42173c51f5..0000000000
--- a/cpp/bench/prims/distance/tune_pairwise/kernel.cu
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "kernel.cuh"
-
-#include // pairwise_matrix_sm60_wrapper
-#include // raft::linalg::Policy4x4
-#include // raft::util::arch::SM_compute_arch
-
-namespace raft::bench::distance::tune {
-
-// Distance op
-using OpT = raft::distance::detail::ops::lp_unexp_distance_op;
-constexpr float metric_arg = 2.0;
-OpT distance_op{metric_arg};
-
-// Kernel policy
-constexpr int vec_len = 1;
-using Policy = typename raft::linalg::Policy4x4::Policy;
-
-// Architecture
-namespace arch = raft::util::arch;
-constexpr auto sm_compat_range = arch::SM_range(arch::SM_min(), arch::SM_future());
-
-void launch_kernel(pairwise_matrix_params params, dim3 grid, cudaStream_t stream)
-{
- dim3 block(Policy::Nthreads);
- int smem_size = OpT::shared_mem_size();
-
- // Obtain function pointer to kernel
- auto kernel = raft::distance::detail::pairwise_matrix_kernel;
-
- kernel<<>>(distance_op, params);
- RAFT_CUDA_TRY(cudaGetLastError());
-}
-
-void get_block_size(int& m, int& n, int& k)
-{
- m = Policy::Mblk;
- n = Policy::Nblk;
- k = Policy::Kblk;
-}
-
-void* get_kernel_ptr()
-{
- auto kernel = raft::distance::detail::pairwise_matrix_kernel;
- return reinterpret_cast(kernel);
-}
-
-int get_max_occupancy()
-{
- void* kernel_ptr = get_kernel_ptr();
- int max_occupancy;
- int smem_size = OpT::shared_mem_size();
-
- RAFT_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
- &max_occupancy, kernel_ptr, Policy::Nthreads, smem_size));
-
- return max_occupancy;
-}
-
-} // namespace raft::bench::distance::tune
diff --git a/cpp/bench/prims/distance/tune_pairwise/kernel.cuh b/cpp/bench/prims/distance/tune_pairwise/kernel.cuh
deleted file mode 100644
index 5da54a343c..0000000000
--- a/cpp/bench/prims/distance/tune_pairwise/kernel.cuh
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include // lp_unexp_distance_op
-#include // pairwise_matrix_params
-
-namespace raft::bench::distance::tune {
-
-// Launch one specific kernel with the following template parameters
-constexpr bool row_major = true;
-using DataT = float;
-using AccT = float;
-using OutT = DataT;
-using IdxT = int;
-
-using FinOpT = raft::identity_op;
-
-using pairwise_matrix_params =
- raft::distance::detail::pairwise_matrix_params;
-
-// Launches kernel
-void launch_kernel(pairwise_matrix_params, dim3, cudaStream_t);
-
-// Describes the block size that is decided by the policy
-void get_block_size(int& m, int& n, int& k);
-
-int get_max_occupancy();
-
-} // namespace raft::bench::distance::tune
diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh
deleted file mode 100644
index acbeba375a..0000000000
--- a/cpp/bench/prims/neighbors/cagra_bench.cuh
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include
-
-#include
-#include
-#include
-#include
-
-#include
-
-#include
-
-namespace raft::bench::neighbors {
-
-struct params {
- /** Size of the dataset. */
- size_t n_samples;
- /** Number of dimensions in the dataset. */
- int n_dims;
- /** The batch size -- number of KNN searches. */
- int n_queries;
- /** Number of nearest neighbours to find for every probe. */
- int k;
- /** kNN graph degree*/
- int degree;
- int itopk_size;
- int block_size;
- int search_width;
- int max_iterations;
- /** Ratio of removed indices. */
- double removed_ratio;
-};
-
-template
-struct CagraBench : public fixture {
- explicit CagraBench(const params& ps)
- : fixture(true),
- params_(ps),
- queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)),
- dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)),
- knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)),
- removed_indices_bitset_(handle, ps.n_samples)
- {
- // Generate random dataset and queriees
- raft::random::RngState state{42};
- constexpr T kRangeMax = std::is_integral_v ? std::numeric_limits::max() : T(1);
- constexpr T kRangeMin = std::is_integral_v ? std::numeric_limits::min() : T(-1);
- if constexpr (std::is_integral_v) {
- raft::random::uniformInt(
- handle, state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax);
- raft::random::uniformInt(
- handle, state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax);
- } else {
- raft::random::uniform(
- handle, state, dataset_.data_handle(), dataset_.size(), kRangeMin, kRangeMax);
- raft::random::uniform(
- handle, state, queries_.data_handle(), queries_.size(), kRangeMin, kRangeMax);
- }
-
- // Generate random knn graph
-
- raft::random::uniformInt(
- handle, state, knn_graph_.data_handle(), knn_graph_.size(), 0, ps.n_samples - 1);
-
- auto metric = raft::distance::DistanceType::L2Expanded;
-
- auto removed_indices =
- raft::make_device_vector(handle, ps.removed_ratio * ps.n_samples);
- thrust::sequence(
- resource::get_thrust_policy(handle),
- thrust::device_pointer_cast(removed_indices.data_handle()),
- thrust::device_pointer_cast(removed_indices.data_handle() + removed_indices.extent(0)));
- removed_indices_bitset_.set(handle, removed_indices.view());
- index_.emplace(raft::neighbors::cagra::index(
- handle, metric, make_const_mdspan(dataset_.view()), make_const_mdspan(knn_graph_.view())));
- }
-
- void run_benchmark(::benchmark::State& state) override
- {
- raft::neighbors::cagra::search_params search_params;
- search_params.max_queries = 1024;
- search_params.itopk_size = params_.itopk_size;
- search_params.team_size = 0;
- search_params.thread_block_size = params_.block_size;
- search_params.search_width = params_.search_width;
-
- auto indices = make_device_matrix(handle, params_.n_queries, params_.k);
- auto distances = make_device_matrix(handle, params_.n_queries, params_.k);
- auto ind_v = make_device_matrix_view(
- indices.data_handle(), params_.n_queries, params_.k);
- auto dist_v = make_device_matrix_view(
- distances.data_handle(), params_.n_queries, params_.k);
-
- auto queries_v = make_const_mdspan(queries_.view());
- if (params_.removed_ratio > 0) {
- auto filter = raft::neighbors::filtering::bitset_filter(removed_indices_bitset_.view());
- loop_on_state(state, [&]() {
- raft::neighbors::cagra::search_with_filtering(
- this->handle, search_params, *this->index_, queries_v, ind_v, dist_v, filter);
- });
- } else {
- loop_on_state(state, [&]() {
- raft::neighbors::cagra::search(
- this->handle, search_params, *this->index_, queries_v, ind_v, dist_v);
- });
- }
-
- double data_size = params_.n_samples * params_.n_dims * sizeof(T);
- double graph_size = params_.n_samples * params_.degree * sizeof(IdxT);
-
- int iterations = params_.max_iterations;
- if (iterations == 0) {
- // see search_plan_impl::adjust_search_params()
- double r = params_.itopk_size / static_cast(params_.search_width);
- iterations = 1 + std::min(r * 1.1, r + 10);
- }
- state.counters["dataset (GiB)"] = data_size / (1 << 30);
- state.counters["graph (GiB)"] = graph_size / (1 << 30);
- state.counters["n_rows"] = params_.n_samples;
- state.counters["n_cols"] = params_.n_dims;
- state.counters["degree"] = params_.degree;
- state.counters["n_queries"] = params_.n_queries;
- state.counters["k"] = params_.k;
- state.counters["itopk_size"] = params_.itopk_size;
- state.counters["block_size"] = params_.block_size;
- state.counters["search_width"] = params_.search_width;
- state.counters["iterations"] = iterations;
- state.counters["removed_ratio"] = params_.removed_ratio;
- }
-
- private:
- const params params_;
- std::optional> index_;
- raft::device_matrix queries_;
- raft::device_matrix dataset_;
- raft::device_matrix knn_graph_;
- raft::core::bitset removed_indices_bitset_;
-};
-
-inline const std::vector generate_inputs()
-{
- std::vector inputs =
- raft::util::itertools::product({2000000ull}, // n_samples
- {128, 256, 512, 1024}, // dataset dim
- {1000}, // n_queries
- {32}, // k
- {64}, // knn graph degree
- {64}, // itopk_size
- {0}, // block_size
- {1}, // search_width
- {0}, // max_iterations
- {0.0} // removed_ratio
- );
- auto inputs2 = raft::util::itertools::product({2000000ull, 10000000ull}, // n_samples
- {128}, // dataset dim
- {1000}, // n_queries
- {32}, // k
- {64}, // knn graph degree
- {64}, // itopk_size
- {64, 128, 256, 512, 1024}, // block_size
- {1}, // search_width
- {0}, // max_iterations
- {0.0} // removed_ratio
- );
- inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
-
- inputs2 = raft::util::itertools::product(
- {2000000ull, 10000000ull}, // n_samples
- {128}, // dataset dim
- {1, 10, 10000}, // n_queries
- {255}, // k
- {64}, // knn graph degree
- {300}, // itopk_size
- {256}, // block_size
- {2}, // search_width
- {0}, // max_iterations
- {0.0, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64} // removed_ratio
- );
- inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
- return inputs;
-}
-
-const std::vector kCagraInputs = generate_inputs();
-
-#define CAGRA_REGISTER(ValT, IdxT, inputs) \
- namespace BENCHMARK_PRIVATE_NAME(knn) { \
- using AnnCagra = CagraBench; \
- RAFT_BENCH_REGISTER(AnnCagra, #ValT "/" #IdxT, inputs); \
- }
-
-} // namespace raft::bench::neighbors
diff --git a/cpp/bench/prims/neighbors/knn.cuh b/cpp/bench/prims/neighbors/knn.cuh
deleted file mode 100644
index 6499078623..0000000000
--- a/cpp/bench/prims/neighbors/knn.cuh
+++ /dev/null
@@ -1,516 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-
-#include