diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1330990c6..7ce4a14c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,76 @@
+# cuvs 24.10.00 (9 Oct 2024)
+
+## š Bug Fixes
+
+- Use 64 bit types for dataset size calculation in CAGRA graph optimizer ([#380](https://github.com/rapidsai/cuvs/pull/380)) [@tfeher](https://github.com/tfeher)
+- Remove EXPLICIT_INSTANTIATE_ONLY macros ([#358](https://github.com/rapidsai/cuvs/pull/358)) [@achirkin](https://github.com/achirkin)
+- Fix order of operations for cosine IVF Flat ([#329](https://github.com/rapidsai/cuvs/pull/329)) [@lowener](https://github.com/lowener)
+- Exclude any kernel symbol that uses cutlass ([#314](https://github.com/rapidsai/cuvs/pull/314)) [@benfred](https://github.com/benfred)
+- [Fix] pin raft dependent to rapidsai ([#299](https://github.com/rapidsai/cuvs/pull/299)) [@rhdong](https://github.com/rhdong)
+- Fix dataset dimension in IVF-PQ C wrappers ([#292](https://github.com/rapidsai/cuvs/pull/292)) [@tfeher](https://github.com/tfeher)
+- Fix python ivf-pq for int8/uint8 dtypes ([#271](https://github.com/rapidsai/cuvs/pull/271)) [@benfred](https://github.com/benfred)
+- FP16 API for CAGRA and IVF-PQ ([#264](https://github.com/rapidsai/cuvs/pull/264)) [@tfeher](https://github.com/tfeher)
+
+## š Documentation
+
+- More doc updates for 24.10 ([#396](https://github.com/rapidsai/cuvs/pull/396)) [@cjnolet](https://github.com/cjnolet)
+- fix 404 in documentation link in readme ([#395](https://github.com/rapidsai/cuvs/pull/395)) [@benfred](https://github.com/benfred)
+- Improving getting started materials ([#342](https://github.com/rapidsai/cuvs/pull/342)) [@cjnolet](https://github.com/cjnolet)
+- Fix broken examples link in README. ([#326](https://github.com/rapidsai/cuvs/pull/326)) [@bdice](https://github.com/bdice)
+- Recommend `miniforge` for conda install ([#325](https://github.com/rapidsai/cuvs/pull/325)) [@bdice](https://github.com/bdice)
+
+## š New Features
+
+- Port remaining scripts to `cuvs_bench` ([#368](https://github.com/rapidsai/cuvs/pull/368)) [@divyegala](https://github.com/divyegala)
+- [Feat] Relative change with `bitset` API feature #2439 in raft ([#350](https://github.com/rapidsai/cuvs/pull/350)) [@rhdong](https://github.com/rhdong)
+- cuvs_bench plotting functions ([#347](https://github.com/rapidsai/cuvs/pull/347)) [@dantegd](https://github.com/dantegd)
+- CosineExpanded Metric for IVF-PQ (normalize inputs) ([#346](https://github.com/rapidsai/cuvs/pull/346)) [@tarang-jain](https://github.com/tarang-jain)
+- Python API for CAGRA+HNSW ([#246](https://github.com/rapidsai/cuvs/pull/246)) [@divyegala](https://github.com/divyegala)
+- C API for CAGRA+HNSW ([#240](https://github.com/rapidsai/cuvs/pull/240)) [@divyegala](https://github.com/divyegala)
+- SNMG ANN ([#231](https://github.com/rapidsai/cuvs/pull/231)) [@viclafargue](https://github.com/viclafargue)
+- [FEA] Support for half-float mixed precise in brute-force ([#225](https://github.com/rapidsai/cuvs/pull/225)) [@rhdong](https://github.com/rhdong)
+
+## š ļø Improvements
+
+- Remove cuvs-cagra-search from cuvs_static link ([#388](https://github.com/rapidsai/cuvs/pull/388)) [@benfred](https://github.com/benfred)
+- Add a static library for cuvs ([#382](https://github.com/rapidsai/cuvs/pull/382)) [@benfred](https://github.com/benfred)
+- Put the ann-bench large_workspace_resource in managed memory ([#372](https://github.com/rapidsai/cuvs/pull/372)) [@achirkin](https://github.com/achirkin)
+- Add multigpu kmeans fit function ([#348](https://github.com/rapidsai/cuvs/pull/348)) [@benfred](https://github.com/benfred)
+- Update update-version.sh to use packaging lib ([#344](https://github.com/rapidsai/cuvs/pull/344)) [@AyodeAwe](https://github.com/AyodeAwe)
+- remove NCCL pins in build and test environments ([#341](https://github.com/rapidsai/cuvs/pull/341)) [@jameslamb](https://github.com/jameslamb)
+- Vamana/DiskANN index build ([#339](https://github.com/rapidsai/cuvs/pull/339)) [@bkarsin](https://github.com/bkarsin)
+- Use CI workflow branch 'branch-24.10' again ([#331](https://github.com/rapidsai/cuvs/pull/331)) [@jameslamb](https://github.com/jameslamb)
+- fix style checks on Python 3.12 ([#328](https://github.com/rapidsai/cuvs/pull/328)) [@jameslamb](https://github.com/jameslamb)
+- Update flake8 to 7.1.1. ([#327](https://github.com/rapidsai/cuvs/pull/327)) [@bdice](https://github.com/bdice)
+- Add function for calculating the mutual_reachability_graph ([#323](https://github.com/rapidsai/cuvs/pull/323)) [@benfred](https://github.com/benfred)
+- Simplify libcuvs conda recipe. ([#322](https://github.com/rapidsai/cuvs/pull/322)) [@bdice](https://github.com/bdice)
+- Refactor dependencies.yaml to use depends-on pattern. ([#321](https://github.com/rapidsai/cuvs/pull/321)) [@bdice](https://github.com/bdice)
+- Update Python versions in cuvs_bench pyproject.toml. ([#318](https://github.com/rapidsai/cuvs/pull/318)) [@bdice](https://github.com/bdice)
+- Brute force knn tile size heuristic ([#316](https://github.com/rapidsai/cuvs/pull/316)) [@mfoerste4](https://github.com/mfoerste4)
+- Euclidean distance example ([#315](https://github.com/rapidsai/cuvs/pull/315)) [@abner-ma](https://github.com/abner-ma)
+- Migrate trustworthiness and silhouette_score stats from RAFT ([#313](https://github.com/rapidsai/cuvs/pull/313)) [@benfred](https://github.com/benfred)
+- Add support for Python 3.12 ([#312](https://github.com/rapidsai/cuvs/pull/312)) [@jameslamb](https://github.com/jameslamb)
+- Add `managed` option for RMM Pool memory resource to C API ([#305](https://github.com/rapidsai/cuvs/pull/305)) [@ajit283](https://github.com/ajit283)
+- Update rapidsai/pre-commit-hooks ([#303](https://github.com/rapidsai/cuvs/pull/303)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Expose search function with pre-filter for ANN ([#302](https://github.com/rapidsai/cuvs/pull/302)) [@lowener](https://github.com/lowener)
+- Drop Python 3.9 support ([#301](https://github.com/rapidsai/cuvs/pull/301)) [@jameslamb](https://github.com/jameslamb)
+- Use CUDA math wheels ([#298](https://github.com/rapidsai/cuvs/pull/298)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Remove NumPy <2 pin ([#297](https://github.com/rapidsai/cuvs/pull/297)) [@seberg](https://github.com/seberg)
+- CAGRA - separable compilation for distance computation ([#296](https://github.com/rapidsai/cuvs/pull/296)) [@achirkin](https://github.com/achirkin)
+- Updating example notebooks ([#294](https://github.com/rapidsai/cuvs/pull/294)) [@cjnolet](https://github.com/cjnolet)
+- Add RMM Pool memory resource to C API ([#285](https://github.com/rapidsai/cuvs/pull/285)) [@ajit283](https://github.com/ajit283)
+- Update pre-commit hooks ([#283](https://github.com/rapidsai/cuvs/pull/283)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Improve update-version.sh ([#282](https://github.com/rapidsai/cuvs/pull/282)) [@bdice](https://github.com/bdice)
+- Use tool.scikit-build.cmake.version, set scikit-build-core minimum-version ([#280](https://github.com/rapidsai/cuvs/pull/280)) [@jameslamb](https://github.com/jameslamb)
+- Add cuvs_bench.run python code and build ([#279](https://github.com/rapidsai/cuvs/pull/279)) [@dantegd](https://github.com/dantegd)
+- Add cuvs-bench to dependencies and conda environments ([#275](https://github.com/rapidsai/cuvs/pull/275)) [@dantegd](https://github.com/dantegd)
+- Update pip devcontainers to UCX v1.17.0 ([#262](https://github.com/rapidsai/cuvs/pull/262)) [@jameslamb](https://github.com/jameslamb)
+- Adding example for tuning build and search params using Optuna ([#257](https://github.com/rapidsai/cuvs/pull/257)) [@dpadmanabhan03](https://github.com/dpadmanabhan03)
+- Fixed link to build docs and corrected ivf_flat_example ([#255](https://github.com/rapidsai/cuvs/pull/255)) [@mmccarty](https://github.com/mmccarty)
+- Merge branch-24.08 into branch-24.10 ([#254](https://github.com/rapidsai/cuvs/pull/254)) [@jameslamb](https://github.com/jameslamb)
+- Persistent CAGRA kernel ([#215](https://github.com/rapidsai/cuvs/pull/215)) [@achirkin](https://github.com/achirkin)
+- [FEA] Support for Cosine distance in IVF-Flat ([#179](https://github.com/rapidsai/cuvs/pull/179)) [@lowener](https://github.com/lowener)
+
# cuvs 24.08.00 (7 Aug 2024)
## šØ Breaking Changes
diff --git a/README.md b/README.md
index e697c61ed..213fde632 100755
--- a/README.md
+++ b/README.md
@@ -1,11 +1,7 @@
#
cuVS: Vector Search and Clustering on the GPU
> [!note]
-> cuVS is a new library mostly derived from the approximate nearest neighbors and clustering algorithms in the [RAPIDS RAFT](https://github.com/rapidsai/raft) library of data mining primitives. RAPIDS RAFT currently contains the most fully-featured versions of the approximate nearest neighbors and clustering algorithms in cuVS. We are in the process of migrating the algorithms from RAFT to cuVS, but if you are unsure of which to use, please consider the following:
-> 1. RAFT contains C++ and Python APIs for all of the approximate nearest neighbors and clustering algorithms.
-> 2. cuVS contains a growing support for different languages, including C, C++, Python, and Rust. We will be adding more language support to cuVS in the future but will not be improving the language support for RAFT.
-> 3. Once all of RAFT's approximate nearest neighbors and clustering algorithms are moved to cuVS, the RAFT APIs will be deprecated and eventually removed altogether. Once removed, RAFT will become a lightweight header-only library. In the meantime, there's no harm in using RAFT if support for additional languages is not needed.
-
+> cuVS is a new library mostly derived from the approximate nearest neighbors and clustering algorithms in the [RAPIDS RAFT](https://github.com/rapidsai/raft) library of machine learning and data mining primitives. As of version 24.10 (Release in October 2024), cuVS contains the most fully-featured versions of the approximate nearest neighbors and clustering algorithms from RAFT. The algorithms which have been migrated over to cuVS will be removed from RAFT in version 24.12 (released in December 2024).
## Contents
@@ -18,10 +14,11 @@
## Useful Resources
+- [Documentation](https://docs.rapids.ai/api/cuvs/nightly/): Library documentation.
+- [Build and Install Guide](https://docs.rapids.ai/api/cuvs/nightly/build): Instructions for installing and building cuVS.
+- [Getting Started Guide](https://docs.rapids.ai/api/cuvs/nightly/getting_started): Guide to getting started with cuVS.
- [Code Examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples): Self-contained Code Examples.
- [API Reference Documentation](https://docs.rapids.ai/api/cuvs/nightly/api_docs): API Documentation.
-- [Getting Started Guide](https://docs.rapids.ai/api/cuvs/nightly/getting_started): Getting started with RAFT.
-- [Build and Install Guide](https://docs.rapids.ai/api/cuvs/nightly/build): Instructions for installing and building cuVS.
- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
- [GitHub repository](https://github.com/rapidsai/cuvs): Download the cuVS source code.
- [Issue tracker](https://github.com/rapidsai/cuvs/issues): Report issues or request features.
@@ -30,34 +27,87 @@
cuVS contains state-of-the-art implementations of several algorithms for running approximate nearest neighbors and clustering on the GPU. It can be used directly or through the various databases and other libraries that have integrated it. The primary goal of cuVS is to simplify the use of GPUs for vector similarity search and clustering.
+Vector search is an information retrieval method that has been growing in popularity over the past few years, partly because of the rising importance of multimedia embeddings created from unstructured data and the need to perform semantic search on the embeddings to find items which are semantically similar to each other.
+
+Vector search is also used in _data mining and machine learning_ tasks and comprises an important step in many _clustering_ and _visualization_ algorithms like [UMAP](https://arxiv.org/abs/2008.00325), [t-SNE](https://lvdmaaten.github.io/tsne/), K-means, and [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/how_hdbscan_works.html).
+
+Finally, faster vector search enables interactions between dense vectors and graphs. Converting a pile of dense vectors into nearest neighbors graphs unlocks the entire world of graph analysis algorithms, such as those found in [GraphBLAS](https://graphblas.org/) and [cuGraph](https://github.com/rapidsai/cugraph).
+
+Below are some common use-cases for vector search
+
+- ### Semantic search
+ - Generative AI & Retrieval augmented generation (RAG)
+ - Recommender systems
+ - Computer vision
+ - Image search
+ - Text search
+ - Audio search
+ - Molecular search
+ - Model training
+
+
+- ### Data mining
+ - Clustering algorithms
+ - Visualization algorithms
+ - Sampling algorithms
+ - Class balancing
+ - Ensemble methods
+ - k-NN graph construction
+
+## Why cuVS?
+
+There are several benefits to using cuVS and GPUs for vector search, including
+
+1. Fast index build
+2. Latency critical and high throughput search
+3. Parameter tuning
+4. Cost savings
+5. Interoperability (build on GPU, deploy on CPU)
+6. Multiple language support
+7. Building blocks for composing new or accelerating existing algorithms
+
+In addition to the items above, cuVS takes on the burden of keeping non-trivial accelerated code up to date as new NVIDIA architectures and CUDA versions are released. This provides a deslightful development experimence, guaranteeing that any libraries, databases, or applications built on top of it will always be getting the best performance and scale.
+
## Installing cuVS
-cuVS comes with pre-built packages that can be installed through [conda](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html#managing-python). Different packages are available for the different languages supported by cuVS:
+cuVS comes with pre-built packages that can be installed through [conda](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html#managing-python) and [pip](https://pip.pypa.io/en/stable/). Different packages are available for the different languages supported by cuVS:
-| Python | C/C++ |
-|--------|-----------------------------|
-| `cuvs` | `libcuvs`, `libcuvs-static` |
+| Python | C/C++ |
+|--------|-----------|
+| `cuvs` | `libcuvs` |
### Stable release
-It is recommended to use [mamba](https://mamba.readthedocs.io/en/latest/installation/mamba-installation.html) to install the desired packages. The following command will install the Python package. You can substitute `cuvs` for any of the packages in the table above:
+It is recommended to use [mamba](https://conda.github.io/conda-libmamba-solver/user-guide/) to install the desired packages. The following command will install the Python package. You can substitute `cuvs` for any of the packages in the table above:
```bash
-mamba install -c conda-forge -c nvidia -c rapidsai cuvs
+conda install -c conda-forge -c nvidia -c rapidsai cuvs
```
+The cuVS Python package can also be `installed through pip `_.
+
+For CUDA 11 packages:
+```bash
+pip install cuvs-cu11 --extra-index-url=https://pypi.nvidia.com
+````
+
+And CUDA 12 packages:
+```bash
+pip install cuvs-cu12 --extra-index-url=https://pypi.nvidia.com
+```
+
### Nightlies
If installing a version that has not yet been released, the `rapidsai` channel can be replaced with `rapidsai-nightly`:
```bash
-mamba install -c conda-forge -c nvidia -c rapidsai-nightly cuvs=24.10
+conda install -c conda-forge -c nvidia -c rapidsai-nightly cuvs=24.10
```
-Please see the [Build and Install Guide](https://docs.rapids.ai/api/cuvs/stable/build/) for more information on installing cuVS and building from source.
+cuVS also has `pip` wheel packages that can be installed. Please see the [Build and Install Guide](https://docs.rapids.ai/api/cuvs/nightly/build/) for more information on installing the available cuVS packages and building from source.
## Getting Started
-The following code snippets train an approximate nearest neighbors index for the CAGRA algorithm.
+The following code snippets train an approximate nearest neighbors index for the CAGRA algorithm in the various different languages supported by cuVS.
### Python API
@@ -85,7 +135,7 @@ cagra::index_params index_params;
auto index = cagra::build(res, index_params, dataset);
```
-For more examples of the C++ APIs, refer to the [examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples) directory in the codebase.
+For more code examples of the C++ APIs, including drop-in Cmake project templates, please refer to the [C++ examples](https://github.com/rapidsai/cuvs/tree/HEAD/examples) directory in the codebase.
### C API
@@ -110,6 +160,8 @@ cuvsCagraIndexParamsDestroy(index_params);
cuvsResourcesDestroy(res);
```
+For more code examples of the C APIs, including drop-in Cmake project templates, please refer to the [C examples](https://github.com/rapidsai/cuvs/tree/branch-24.10/examples/c)
+
### Rust API
```rust
@@ -171,6 +223,7 @@ fn cagra_example() -> Result<()> {
}
```
+For more code examples of the Rust APIs, including a drop-in project templates, please refer to the [Rust examples](https://github.com/rapidsai/cuvs/tree/branch-24.10/examples/rust).
## Contributing
@@ -178,60 +231,9 @@ If you are interested in contributing to the cuVS library, please read our [Cont
## References
-When citing cuVS generally, please consider referencing this Github repository.
-```bibtex
-@misc{rapidsai,
- title={Rapidsai/cuVS: Vector Search and Clustering on the GPU.},
- url={https://github.com/rapidsai/cuvs},
- journal={GitHub},
- publisher={Nvidia RAPIDS},
- author={Rapidsai},
- year={2024}
-}
-```
-
-If citing CAGRA, please consider the following bibtex:
-```bibtex
-@misc{ootomo2023cagra,
- title={CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search for GPUs},
- author={Hiroyuki Ootomo and Akira Naruse and Corey Nolet and Ray Wang and Tamas Feher and Yong Wang},
- year={2023},
- eprint={2308.15136},
- archivePrefix={arXiv},
- primaryClass={cs.DS}
-}
-```
-
-If citing the k-selection routines, please consider the following bibtex:
-```bibtex
-@proceedings{10.1145/3581784,
- title = {SC '23: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
- year = {2023},
- isbn = {9798400701092},
- publisher = {Association for Computing Machinery},
- address = {New York, NY, USA},
- abstract = {Started in 1988, the SC Conference has become the annual nexus for researchers and practitioners from academia, industry and government to share information and foster collaborations to advance the state of the art in High Performance Computing (HPC), Networking, Storage, and Analysis.},
- location = {, Denver, CO, USA, }
-}
-```
-
-If citing the nearest neighbors descent API, please consider the following bibtex:
-```bibtex
-@inproceedings{10.1145/3459637.3482344,
- author = {Wang, Hui and Zhao, Wan-Lei and Zeng, Xiangxiang and Yang, Jianye},
- title = {Fast K-NN Graph Construction by GPU Based NN-Descent},
- year = {2021},
- isbn = {9781450384469},
- publisher = {Association for Computing Machinery},
- address = {New York, NY, USA},
- url = {https://doi.org/10.1145/3459637.3482344},
- doi = {10.1145/3459637.3482344},
- abstract = {NN-Descent is a classic k-NN graph construction approach. It is still widely employed in machine learning, computer vision, and information retrieval tasks due to its efficiency and genericness. However, the current design only works well on CPU. In this paper, NN-Descent has been redesigned to adapt to the GPU architecture. A new graph update strategy called selective update is proposed. It reduces the data exchange between GPU cores and GPU global memory significantly, which is the processing bottleneck under GPU computation architecture. This redesign leads to full exploitation of the parallelism of the GPU hardware. In the meantime, the genericness, as well as the simplicity of NN-Descent, are well-preserved. Moreover, a procedure that allows to k-NN graph to be merged efficiently on GPU is proposed. It makes the construction of high-quality k-NN graphs for out-of-GPU-memory datasets tractable. Our approach is 100-250\texttimes{} faster than the single-thread NN-Descent and is 2.5-5\texttimes{} faster than the existing GPU-based approaches as we tested on million as well as billion scale datasets.},
- booktitle = {Proceedings of the 30th ACM International Conference on Information \& Knowledge Management},
- pages = {1929ā1938},
- numpages = {10},
- keywords = {high-dimensional, nn-descent, gpu, k-nearest neighbor graph},
- location = {Virtual Event, Queensland, Australia},
- series = {CIKM '21}
-}
-```
+For the interested reader, many of the accelerated implementations in cuVS are also based on research papers which can provide a lot more background. We also ask you to please cite the corresponding algorithms by referencing them in your own research.
+- [CAGRA: Highly Parallel Graph Construction and Approximate Nearest Neighbor Search](https://arxiv.org/abs/2308.15136)
+- [Top-K Algorithms on GPU: A Comprehensive Study and New Methods](https://dl.acm.org/doi/10.1145/3581784.3607062>)
+- [Fast K-NN Graph Construction by GPU Based NN-Descent](https://dl.acm.org/doi/abs/10.1145/3459637.3482344?casa_token=O_nan1B1F5cAAAAA:QHWDEhh0wmd6UUTLY9_Gv6c3XI-5DXM9mXVaUXOYeStlpxTPmV3nKvABRfoivZAaQ3n8FWyrkWw>)
+- [cuSLINK: Single-linkage Agglomerative Clustering on the GPU](https://arxiv.org/abs/2306.16354)
+- [GPU Semiring Primitives for Sparse Neighborhood Methods](https://arxiv.org/abs/2104.06357)
diff --git a/build.sh b/build.sh
index b463f0f0d..b787d3a41 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
# scripts, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)
-VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+VALIDARGS="clean libcuvs python rust docs tests bench-ann examples --uninstall -v -g -n --compile-static-lib --allgpuarch --no-mg --no-cpu --cpu-only --no-shared-libs --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-ann=] [--build-metrics=]
where is:
clean - remove all existing build artifacts and configuration (start over)
@@ -37,10 +37,13 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==0.0.0a0
- make
+- nccl>=2.19
- ninja
- numpy>=1.23,<3.0a0
- numpydoc
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index ce9a7f058..a25393050 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -37,6 +37,7 @@ dependencies:
- libcusparse=11.7.5.86
- librmm==24.10.*,>=0.0.0a0
- make
+- nccl>=2.19
- ninja
- numpy>=1.23,<3.0a0
- numpydoc
diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml
index 116e80ac2..bb4a96d48 100644
--- a/conda/environments/all_cuda-125_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-125_arch-aarch64.yaml
@@ -34,6 +34,7 @@ dependencies:
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- make
+- nccl>=2.19
- ninja
- numpy>=1.23,<3.0a0
- numpydoc
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 7f7ad045d..bd1b95ae8 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -34,6 +34,7 @@ dependencies:
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- make
+- nccl>=2.19
- ninja
- numpy>=1.23,<3.0a0
- numpydoc
diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
index 73c42ca71..554ad41ab 100644
--- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
@@ -35,6 +35,7 @@ dependencies:
- libcusparse=11.7.5.86
- librmm==24.10.*,>=0.0.0a0
- matplotlib
+- nccl>=2.19
- ninja
- nlohmann_json>=3.11.2
- nvcc_linux-aarch64=11.8
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 473e50bc6..dc38f3565 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -35,6 +35,7 @@ dependencies:
- libcusparse=11.7.5.86
- librmm==24.10.*,>=0.0.0a0
- matplotlib
+- nccl>=2.19
- ninja
- nlohmann_json>=3.11.2
- nvcc_linux-64=11.8
diff --git a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
index 8a877c4c0..aeb23a9ef 100644
--- a/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
+++ b/conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
@@ -32,6 +32,7 @@ dependencies:
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- matplotlib
+- nccl>=2.19
- ninja
- nlohmann_json>=3.11.2
- openblas
diff --git a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
index 54859a77f..3a408cd64 100644
--- a/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
@@ -32,6 +32,7 @@ dependencies:
- libcusparse-dev
- librmm==24.10.*,>=0.0.0a0
- matplotlib
+- nccl>=2.19
- ninja
- nlohmann_json>=3.11.2
- openblas
diff --git a/conda/recipes/cuvs_bench/build.sh b/conda/recipes/cuvs_bench/build.sh
new file mode 100644
index 000000000..05fb7bada
--- /dev/null
+++ b/conda/recipes/cuvs_bench/build.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
+cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/cuvs_bench/conda_build_config.yaml b/conda/recipes/cuvs_bench/conda_build_config.yaml
new file mode 100644
index 000000000..47bd730da
--- /dev/null
+++ b/conda/recipes/cuvs_bench/conda_build_config.yaml
@@ -0,0 +1,70 @@
+c_compiler_version:
+ - 11
+
+cxx_compiler_version:
+ - 11
+
+cuda_compiler:
+ - cuda-nvcc
+
+cuda11_compiler:
+ - nvcc
+
+c_stdlib:
+ - sysroot
+
+c_stdlib_version:
+ - "2.17"
+
+cmake_version:
+ - ">=3.26.4,!=3.30.0"
+
+nccl_version:
+ - ">=2.19"
+
+glog_version:
+ - ">=0.6.0"
+
+h5py_version:
+ - ">=3.8.0"
+
+nlohmann_json_version:
+ - ">=3.11.2"
+
+# The CTK libraries below are missing from the conda-forge::cudatoolkit package
+# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
+# and the "*_run_*" version specifiers correspond to `11.x` packages.
+
+cuda11_libcublas_host_version:
+ - "=11.11.3.6"
+
+cuda11_libcublas_run_version:
+ - ">=11.5.2.43,<12.0.0"
+
+cuda11_libcurand_host_version:
+ - "=10.3.0.86"
+
+cuda11_libcurand_run_version:
+ - ">=10.2.5.43,<10.3.1"
+
+cuda11_libcusolver_host_version:
+ - "=11.4.1.48"
+
+cuda11_libcusolver_run_version:
+ - ">=11.2.0.43,<11.4.2"
+
+cuda11_libcusparse_host_version:
+ - "=11.7.5.86"
+
+cuda11_libcusparse_run_version:
+ - ">=11.6.0.43,<12.0.0"
+
+# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all
+# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the
+# "*_run_*" version specifiers correspond to `11.x` packages.
+
+cuda11_cuda_profiler_api_host_version:
+ - "=11.8.86"
+
+cuda11_cuda_profiler_api_run_version:
+ - ">=11.4.240,<12"
diff --git a/conda/recipes/cuvs_bench/meta.yaml b/conda/recipes/cuvs_bench/meta.yaml
new file mode 100644
index 000000000..9ecbf82bb
--- /dev/null
+++ b/conda/recipes/cuvs_bench/meta.yaml
@@ -0,0 +1,105 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Usage:
+# conda build . -c rapidsai -c conda-forge -c nvidia
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version = environ['CONDA_PY'] %}
+{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set cuda_major = cuda_version.split('.')[0] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+ name: cuvs_bench
+ version: {{ version }}
+ script: build.sh
+
+source:
+ path: ../../..
+
+build:
+ script_env:
+ - AWS_ACCESS_KEY_ID
+ - AWS_SECRET_ACCESS_KEY
+ - AWS_SESSION_TOKEN
+ - CMAKE_C_COMPILER_LAUNCHER
+ - CMAKE_CUDA_COMPILER_LAUNCHER
+ - CMAKE_CXX_COMPILER_LAUNCHER
+ - CMAKE_GENERATOR
+ - PARALLEL_LEVEL
+ - RAPIDS_ARTIFACTS_DIR
+ - SCCACHE_BUCKET
+ - SCCACHE_IDLE_TIMEOUT
+ - SCCACHE_REGION
+ - SCCACHE_S3_KEY_PREFIX=cuvs-bench-aarch64 # [aarch64]
+ - SCCACHE_S3_KEY_PREFIX=cuvs-bench-linux64 # [linux64]
+ - SCCACHE_S3_USE_SSL
+ number: {{ GIT_DESCRIBE_NUMBER }}
+ string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ ignore_run_exports_from:
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ - cuda-cudart-dev
+ - libcublas-dev
+ {% endif %}
+
+requirements:
+ build:
+ - {{ compiler('c') }}
+ - {{ compiler('cxx') }}
+ {% if cuda_major == "11" %}
+ - {{ compiler('cuda11') }} ={{ cuda_version }}
+ {% else %}
+ - {{ compiler('cuda') }}
+ {% endif %}
+ - cuda-version ={{ cuda_version }}
+ - cmake {{ cmake_version }}
+ - ninja
+ - {{ stdlib("c") }}
+
+ host:
+ - benchmark
+ - cuda-version ={{ cuda_version }}
+ {% if cuda_major == "11" %}
+ - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
+ - libcublas {{ cuda11_libcublas_host_version }}
+ - libcublas-dev {{ cuda11_libcublas_host_version }}
+ {% else %}
+ - cuda-cudart-dev
+ - cuda-profiler-api
+ - libcublas-dev
+ {% endif %}
+ - glog {{ glog_version }}
+ - libcuvs {{ version }}
+ - nlohmann_json {{ nlohmann_json_version }}
+ - openblas
+ # rmm is needed to determine if package is gpu-enabled
+ - python
+ - rapids-build-backend>=0.3.0,<0.4.0.dev0
+ - rmm ={{ minor_version }}
+
+ run:
+ - benchmark
+ - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+ {% if cuda_major == "11" %}
+ - cudatoolkit
+ {% else %}
+ - cuda-cudart
+ - libcublas
+ {% endif %}
+ - glog {{ glog_version }}
+ - libcuvs {{ version }}
+ - h5py {{ h5py_version }}
+ - matplotlib
+ - pandas
+ - pyyaml
+ # rmm is needed to determine if package is gpu-enabled
+ - pylibraft ={{ minor_version }}
+ - python
+ - rmm ={{ minor_version }}
+about:
+ home: https://rapids.ai/
+ license: Apache-2.0
+ summary: cuVS GPU and CPU benchmarks
diff --git a/conda/recipes/cuvs_bench_cpu/build.sh b/conda/recipes/cuvs_bench_cpu/build.sh
new file mode 100644
index 000000000..163872053
--- /dev/null
+++ b/conda/recipes/cuvs_bench_cpu/build.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann_cpu --incl-cache-stats
+cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/cuvs_bench_cpu/conda_build_config.yaml b/conda/recipes/cuvs_bench_cpu/conda_build_config.yaml
new file mode 100644
index 000000000..ed6f708e1
--- /dev/null
+++ b/conda/recipes/cuvs_bench_cpu/conda_build_config.yaml
@@ -0,0 +1,29 @@
+c_compiler_version:
+ - 11
+
+cxx_compiler_version:
+ - 11
+
+c_stdlib:
+ - sysroot
+
+c_stdlib_version:
+ - "2.17"
+
+cmake_version:
+ - ">=3.26.4,!=3.30.0"
+
+glog_version:
+ - ">=0.6.0"
+
+h5py_version:
+ - ">=3.8.0"
+
+nlohmann_json_version:
+ - ">=3.11.2"
+
+spdlog_version:
+ - ">=1.14.1,<1.15"
+
+fmt_version:
+ - ">=11.0.2,<12"
diff --git a/conda/recipes/cuvs_bench_cpu/meta.yaml b/conda/recipes/cuvs_bench_cpu/meta.yaml
new file mode 100644
index 000000000..0ce5db744
--- /dev/null
+++ b/conda/recipes/cuvs_bench_cpu/meta.yaml
@@ -0,0 +1,67 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Usage:
+# conda build . -c rapidsai -c conda-forge -c nvidia
+{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version = environ['CONDA_PY'] %}
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+ name: cuvs_bench_cpu
+ version: {{ version }}
+ script: build.sh
+
+source:
+ path: ../../..
+
+build:
+ script_env:
+ - AWS_ACCESS_KEY_ID
+ - AWS_SECRET_ACCESS_KEY
+ - AWS_SESSION_TOKEN
+ - CMAKE_C_COMPILER_LAUNCHER
+ - CMAKE_CUDA_COMPILER_LAUNCHER
+ - CMAKE_CXX_COMPILER_LAUNCHER
+ - CMAKE_GENERATOR
+ - PARALLEL_LEVEL
+ - RAPIDS_ARTIFACTS_DIR
+ - SCCACHE_BUCKET
+ - SCCACHE_IDLE_TIMEOUT
+ - SCCACHE_REGION
+ - SCCACHE_S3_KEY_PREFIX=cuvs-bench-cpu-aarch64 # [aarch64]
+ - SCCACHE_S3_KEY_PREFIX=cuvs-bench-cpu-linux64 # [linux64]
+ - SCCACHE_S3_USE_SSL
+ number: {{ GIT_DESCRIBE_NUMBER }}
+ string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+
+requirements:
+ build:
+ - {{ compiler('c') }}
+ - {{ compiler('cxx') }}
+ - cmake {{ cmake_version }}
+ - ninja
+ - {{ stdlib("c") }}
+
+ host:
+ - benchmark
+ - fmt {{ fmt_version }}
+ - glog {{ glog_version }}
+ - nlohmann_json {{ nlohmann_json_version }}
+ - openblas
+ - python
+ - rapids-build-backend>=0.3.0,<0.4.0.dev0
+ - spdlog {{ spdlog_version }}
+
+ run:
+ - benchmark
+ - glog {{ glog_version }}
+ - h5py {{ h5py_version }}
+ - matplotlib
+ - pandas
+ - pyyaml
+ - python
+about:
+ home: https://rapids.ai/
+ license: Apache-2.0
+ summary: cuVS CPU benchmarks
diff --git a/conda/recipes/libcuvs/conda_build_config.yaml b/conda/recipes/libcuvs/conda_build_config.yaml
index e165f7ed9..b8c49943e 100644
--- a/conda/recipes/libcuvs/conda_build_config.yaml
+++ b/conda/recipes/libcuvs/conda_build_config.yaml
@@ -22,6 +22,9 @@ cmake_version:
h5py_version:
- ">=3.8.0"
+nccl_version:
+ - ">=2.19"
+
# The CTK libraries below are missing from the conda-forge::cudatoolkit package
# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
# and the "*_run_*" version specifiers correspond to `11.x` packages.
diff --git a/conda/recipes/libcuvs/meta.yaml b/conda/recipes/libcuvs/meta.yaml
index e154ccf41..46552c397 100644
--- a/conda/recipes/libcuvs/meta.yaml
+++ b/conda/recipes/libcuvs/meta.yaml
@@ -65,6 +65,7 @@ outputs:
host:
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
+ - nccl {{ nccl_version }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }}
@@ -131,6 +132,7 @@ outputs:
host:
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
+ - nccl {{ nccl_version }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
- cuda-profiler-api {{ cuda11_cuda_profiler_api_host_version }}
@@ -159,6 +161,7 @@ outputs:
- libcusolver
- libcusparse
{% endif %}
+ - libraft-headers ={{ minor_version }}
about:
home: https://rapids.ai/
license: Apache-2.0
@@ -197,6 +200,7 @@ outputs:
host:
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
+ - nccl {{ nccl_version }}
- {{ pin_subpackage('libcuvs', exact=True) }}
- cuda-version ={{ cuda_version }}
- openblas # required by some CPU algos in benchmarks
@@ -268,6 +272,7 @@ outputs:
host:
- librmm ={{ minor_version }}
- libraft-headers ={{ minor_version }}
+ - nccl {{ nccl_version }}
- {{ pin_subpackage('libcuvs', exact=True) }}
- cuda-version ={{ cuda_version }}
{% if cuda_major == "11" %}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b05030cef..3e98a247e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -57,6 +57,7 @@ option(BUILD_C_LIBRARY "Build cuVS C API library" OFF)
option(BUILD_C_TESTS "Build cuVS C API tests" OFF)
option(BUILD_CUVS_BENCH "Build cuVS ann benchmarks" OFF)
option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON)
+option(BUILD_MG_ALGOS "Build with multi-GPU support" ON)
option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
option(CUDA_ENABLE_LINEINFO
"Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
@@ -85,6 +86,12 @@ if(NOT BUILD_C_LIBRARY)
set(BUILD_C_TESTS OFF)
endif()
+if(NOT BUILD_SHARED_LIBS)
+ set(BUILD_TESTS OFF)
+ set(BUILD_C_LIBRARY OFF)
+ set(BUILD_CAGRA_HNSWLIB OFF)
+endif()
+
# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
# have different values for the `Threads::Threads` target. Setting this flag ensures
# `Threads::Threads` is the same value across all builds so that cache hits occur
@@ -175,6 +182,7 @@ rapids_cpm_init()
if(NOT BUILD_CPU_ONLY)
include(cmake/thirdparty/get_raft.cmake)
+ include(cmake/thirdparty/get_cutlass.cmake)
endif()
if(BUILD_C_LIBRARY)
@@ -186,8 +194,6 @@ if(BUILD_TESTS OR BUILD_C_TESTS)
rapids_cpm_gtest(BUILD_STATIC)
endif()
-include(cmake/thirdparty/get_cutlass.cmake)
-
if(BUILD_CUVS_BENCH)
include(${rapids-cmake-dir}/cpm/gbench.cmake)
rapids_cpm_gbench(BUILD_STATIC)
@@ -199,460 +205,568 @@ endif()
# ##################################################################################################
# * cuvs ---------------------------------------------------------------------
-add_library(
- cuvs-cagra-search STATIC
- src/neighbors/cagra_search_float.cu
- src/neighbors/cagra_search_half.cu
- src/neighbors/cagra_search_int8.cu
- src/neighbors/cagra_search_uint8.cu
- src/neighbors/detail/cagra/compute_distance.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32.cu
-)
+if(BUILD_SHARED_LIBS)
+ add_library(
+ cuvs-cagra-search STATIC
+ src/neighbors/cagra_search_float.cu
+ src/neighbors/cagra_search_half.cu
+ src/neighbors/cagra_search_int8.cu
+ src/neighbors/cagra_search_uint8.cu
+ src/neighbors/detail/cagra/compute_distance.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_float_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_half_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_int8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_InnerProduct_uint8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_float_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_half_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_int8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim128_t8.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim256_t16.cu
+ src/neighbors/detail/cagra/compute_distance_standard_L2Expanded_uint8_uint32_dim512_t32.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim128_t8_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim256_t16_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_float_uint32_dim512_t32_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim128_t8_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim256_t16_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_half_uint32_dim512_t32_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim128_t8_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim256_t16_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_int8_uint32_dim512_t32_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim128_t8_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim256_t16_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_2subd_half.cu
+ src/neighbors/detail/cagra/compute_distance_vpq_L2Expanded_uint8_uint32_dim512_t32_8pq_4subd_half.cu
+ src/neighbors/detail/cagra/search_multi_cta_float_uint32.cu
+ src/neighbors/detail/cagra/search_multi_cta_half_uint32.cu
+ src/neighbors/detail/cagra/search_multi_cta_int8_uint32.cu
+ src/neighbors/detail/cagra/search_multi_cta_uint8_uint32.cu
+ src/neighbors/detail/cagra/search_single_cta_float_uint32.cu
+ src/neighbors/detail/cagra/search_single_cta_half_uint32.cu
+ src/neighbors/detail/cagra/search_single_cta_int8_uint32.cu
+ src/neighbors/detail/cagra/search_single_cta_uint8_uint32.cu
+ )
-file(GLOB_RECURSE compute_distance_sources "src/neighbors/detail/cagra/compute_distance_*.cu")
-set_source_files_properties(${compute_distance_sources} PROPERTIES COMPILE_FLAGS -maxrregcount=64)
-
-set_target_properties(
- cuvs-cagra-search
- PROPERTIES BUILD_RPATH "\$ORIGIN"
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- CUDA_SEPARABLE_COMPILATION ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON
- POSITION_INDEPENDENT_CODE ON
-)
-target_link_libraries(cuvs-cagra-search PRIVATE raft::raft)
-target_include_directories(
- cuvs-cagra-search PRIVATE "$"
-)
-target_compile_options(
- cuvs-cagra-search PRIVATE "$<$:${CUVS_CXX_FLAGS}>"
- "$<$:${CUVS_CUDA_FLAGS}>"
-)
+ file(GLOB_RECURSE compute_distance_sources "src/neighbors/detail/cagra/compute_distance_*.cu")
+ set_source_files_properties(${compute_distance_sources} PROPERTIES COMPILE_FLAGS -maxrregcount=64)
-add_library(
- cuvs SHARED
- src/cluster/kmeans_balanced_fit_float.cu
- src/cluster/kmeans_fit_mg_float.cu
- src/cluster/kmeans_fit_mg_double.cu
- src/cluster/kmeans_fit_double.cu
- src/cluster/kmeans_fit_float.cu
- src/cluster/kmeans_auto_find_k_float.cu
- src/cluster/kmeans_fit_predict_double.cu
- src/cluster/kmeans_fit_predict_float.cu
- src/cluster/kmeans_predict_double.cu
- src/cluster/kmeans_predict_float.cu
- src/cluster/kmeans_balanced_fit_float.cu
- src/cluster/kmeans_balanced_fit_predict_float.cu
- src/cluster/kmeans_balanced_predict_float.cu
- src/cluster/kmeans_balanced_fit_int8.cu
- src/cluster/kmeans_balanced_fit_predict_int8.cu
- src/cluster/kmeans_balanced_predict_int8.cu
- src/cluster/kmeans_transform_double.cu
- src/cluster/kmeans_transform_float.cu
- src/cluster/single_linkage_float.cu
- src/core/bitset.cu
- src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_rbf.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu
- src/distance/detail/fused_distance_nn.cu
- src/distance/distance.cu
- src/distance/pairwise_distance.cu
- src/neighbors/brute_force.cu
- src/neighbors/cagra_build_float.cu
- src/neighbors/cagra_build_half.cu
- src/neighbors/cagra_build_int8.cu
- src/neighbors/cagra_build_uint8.cu
- src/neighbors/cagra_extend_float.cu
- src/neighbors/cagra_extend_int8.cu
- src/neighbors/cagra_extend_uint8.cu
- src/neighbors/cagra_optimize.cu
- src/neighbors/cagra_serialize_float.cu
- src/neighbors/cagra_serialize_half.cu
- src/neighbors/cagra_serialize_int8.cu
- src/neighbors/cagra_serialize_uint8.cu
- src/neighbors/detail/cagra/cagra_build.cpp
- src/neighbors/detail/cagra/topk_for_cagra/topk.cu
- $<$:src/neighbors/hnsw.cpp>
- src/neighbors/ivf_flat_index.cpp
- src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_helpers.cu
- src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu
- src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu
- src/neighbors/ivf_pq_index.cpp
- src/neighbors/ivf_pq/ivf_pq_build_common.cu
- src/neighbors/ivf_pq/ivf_pq_serialize.cu
- src/neighbors/ivf_pq/ivf_pq_deserialize.cu
- src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu
- src/neighbors/ivf_pq/detail/ivf_pq_build_extend_half_int64_t.cu
- src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu
- src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu
- src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu
- src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu
- src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu
- src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu
- src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu
- src/neighbors/nn_descent.cu
- src/neighbors/nn_descent_float.cu
- src/neighbors/nn_descent_half.cu
- src/neighbors/nn_descent_int8.cu
- src/neighbors/nn_descent_uint8.cu
- src/neighbors/reachability.cu
- src/neighbors/refine/detail/refine_device_float_float.cu
- src/neighbors/refine/detail/refine_device_half_float.cu
- src/neighbors/refine/detail/refine_device_int8_t_float.cu
- src/neighbors/refine/detail/refine_device_uint8_t_float.cu
- src/neighbors/refine/detail/refine_host_float_float.cpp
- src/neighbors/refine/detail/refine_host_half_float.cpp
- src/neighbors/refine/detail/refine_host_int8_t_float.cpp
- src/neighbors/refine/detail/refine_host_uint8_t_float.cpp
- src/neighbors/sample_filter.cu
- src/neighbors/vamana_build_float.cu
- src/neighbors/vamana_build_uint8.cu
- src/neighbors/vamana_build_int8.cu
- src/neighbors/vamana_serialize_float.cu
- src/neighbors/vamana_serialize_uint8.cu
- src/neighbors/vamana_serialize_int8.cu
- src/selection/select_k_float_int64_t.cu
- src/selection/select_k_float_int32_t.cu
- src/selection/select_k_float_uint32_t.cu
- src/selection/select_k_half_uint32_t.cu
- src/stats/silhouette_score.cu
- src/stats/trustworthiness_score.cu
-)
+ set_target_properties(
+ cuvs-cagra-search
+ PROPERTIES BUILD_RPATH "\$ORIGIN"
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
+ CUDA_SEPARABLE_COMPILATION ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ POSITION_INDEPENDENT_CODE ON
+ )
+ target_link_libraries(cuvs-cagra-search PRIVATE raft::raft)
+ target_include_directories(
+ cuvs-cagra-search PRIVATE "$"
+ )
+ target_compile_options(
+ cuvs-cagra-search PRIVATE "$<$:${CUVS_CXX_FLAGS}>"
+ "$<$:${CUVS_CUDA_FLAGS}>"
+ )
-target_compile_options(
- cuvs INTERFACE $<$:--expt-extended-lambda
- --expt-relaxed-constexpr>
-)
+ if(BUILD_MG_ALGOS)
+ set(CUVS_MG_ALGOS
+ src/neighbors/mg/mg_flat_float_int64_t.cu
+ src/neighbors/mg/mg_flat_int8_t_int64_t.cu
+ src/neighbors/mg/mg_flat_uint8_t_int64_t.cu
+ src/neighbors/mg/mg_pq_float_int64_t.cu
+ src/neighbors/mg/mg_pq_half_int64_t.cu
+ src/neighbors/mg/mg_pq_int8_t_int64_t.cu
+ src/neighbors/mg/mg_pq_uint8_t_int64_t.cu
+ src/neighbors/mg/mg_cagra_float_uint32_t.cu
+ src/neighbors/mg/mg_cagra_half_uint32_t.cu
+ src/neighbors/mg/mg_cagra_int8_t_uint32_t.cu
+ src/neighbors/mg/mg_cagra_uint8_t_uint32_t.cu
+ src/neighbors/mg/omp_checks.cpp
+ src/neighbors/mg/nccl_comm.cpp
+ )
+ endif()
-add_library(cuvs::cuvs ALIAS cuvs)
+ add_library(
+ cuvs_objs OBJECT
+ src/cluster/kmeans_balanced_fit_float.cu
+ src/cluster/kmeans_fit_mg_float.cu
+ src/cluster/kmeans_fit_mg_double.cu
+ src/cluster/kmeans_fit_double.cu
+ src/cluster/kmeans_fit_float.cu
+ src/cluster/kmeans_auto_find_k_float.cu
+ src/cluster/kmeans_fit_predict_double.cu
+ src/cluster/kmeans_fit_predict_float.cu
+ src/cluster/kmeans_predict_double.cu
+ src/cluster/kmeans_predict_float.cu
+ src/cluster/kmeans_balanced_fit_float.cu
+ src/cluster/kmeans_balanced_fit_predict_float.cu
+ src/cluster/kmeans_balanced_predict_float.cu
+ src/cluster/kmeans_balanced_fit_int8.cu
+ src/cluster/kmeans_balanced_fit_predict_int8.cu
+ src/cluster/kmeans_balanced_predict_int8.cu
+ src/cluster/kmeans_transform_double.cu
+ src/cluster/kmeans_transform_float.cu
+ src/cluster/single_linkage_float.cu
+ src/core/bitset.cu
+ src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_correlation_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_kl_divergence_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l1_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_expanded_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l_inf_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
+ src/distance/detail/pairwise_matrix/dispatch_rbf.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu
+ src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu
+ src/distance/detail/fused_distance_nn.cu
+ src/distance/distance.cu
+ src/distance/pairwise_distance.cu
+ src/neighbors/brute_force.cu
+ src/neighbors/cagra_build_float.cu
+ src/neighbors/cagra_build_half.cu
+ src/neighbors/cagra_build_int8.cu
+ src/neighbors/cagra_build_uint8.cu
+ src/neighbors/cagra_extend_float.cu
+ src/neighbors/cagra_extend_int8.cu
+ src/neighbors/cagra_extend_uint8.cu
+ src/neighbors/cagra_optimize.cu
+ src/neighbors/cagra_serialize_float.cu
+ src/neighbors/cagra_serialize_half.cu
+ src/neighbors/cagra_serialize_int8.cu
+ src/neighbors/cagra_serialize_uint8.cu
+ src/neighbors/iface/iface_cagra_float_uint32_t.cu
+ src/neighbors/iface/iface_cagra_half_uint32_t.cu
+ src/neighbors/iface/iface_cagra_int8_t_uint32_t.cu
+ src/neighbors/iface/iface_cagra_uint8_t_uint32_t.cu
+ src/neighbors/iface/iface_flat_float_int64_t.cu
+ src/neighbors/iface/iface_flat_int8_t_int64_t.cu
+ src/neighbors/iface/iface_flat_uint8_t_int64_t.cu
+ src/neighbors/iface/iface_pq_float_int64_t.cu
+ src/neighbors/iface/iface_pq_half_int64_t.cu
+ src/neighbors/iface/iface_pq_int8_t_int64_t.cu
+ src/neighbors/iface/iface_pq_uint8_t_int64_t.cu
+ src/neighbors/detail/cagra/cagra_build.cpp
+ src/neighbors/detail/cagra/topk_for_cagra/topk.cu
+ $<$:src/neighbors/hnsw.cpp>
+ src/neighbors/ivf_flat_index.cpp
+ src/neighbors/ivf_flat/ivf_flat_build_extend_float_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_build_extend_int8_t_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_build_extend_uint8_t_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_helpers.cu
+ src/neighbors/ivf_flat/ivf_flat_search_float_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_search_int8_t_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_search_uint8_t_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_serialize_float_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_serialize_int8_t_int64_t.cu
+ src/neighbors/ivf_flat/ivf_flat_serialize_uint8_t_int64_t.cu
+ src/neighbors/ivf_pq_index.cpp
+ src/neighbors/ivf_pq/ivf_pq_build_common.cu
+ src/neighbors/ivf_pq/ivf_pq_serialize.cu
+ src/neighbors/ivf_pq/ivf_pq_deserialize.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_build_extend_float_int64_t.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_build_extend_half_int64_t.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_build_extend_int8_t_int64_t.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_build_extend_uint8_t_int64_t.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_false_bitset64.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_fp8_true_bitset64.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_half_half_bitset64.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_half_bitset64.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_float_bitset64.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_false_bitset64.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_compute_similarity_float_fp8_true_bitset64.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_search_float_int64_t.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_search_half_int64_t.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_search_int8_t_int64_t.cu
+ src/neighbors/ivf_pq/detail/ivf_pq_search_uint8_t_int64_t.cu
+ src/neighbors/nn_descent.cu
+ src/neighbors/nn_descent_float.cu
+ src/neighbors/nn_descent_half.cu
+ src/neighbors/nn_descent_int8.cu
+ src/neighbors/nn_descent_uint8.cu
+ src/neighbors/reachability.cu
+ src/neighbors/refine/detail/refine_device_float_float.cu
+ src/neighbors/refine/detail/refine_device_half_float.cu
+ src/neighbors/refine/detail/refine_device_int8_t_float.cu
+ src/neighbors/refine/detail/refine_device_uint8_t_float.cu
+ src/neighbors/refine/detail/refine_host_float_float.cpp
+ src/neighbors/refine/detail/refine_host_half_float.cpp
+ src/neighbors/refine/detail/refine_host_int8_t_float.cpp
+ src/neighbors/refine/detail/refine_host_uint8_t_float.cpp
+ src/neighbors/sample_filter.cu
+ src/neighbors/vamana_build_float.cu
+ src/neighbors/vamana_build_uint8.cu
+ src/neighbors/vamana_build_int8.cu
+ src/neighbors/vamana_serialize_float.cu
+ src/neighbors/vamana_serialize_uint8.cu
+ src/neighbors/vamana_serialize_int8.cu
+ src/selection/select_k_float_int64_t.cu
+ src/selection/select_k_float_int32_t.cu
+ src/selection/select_k_float_uint32_t.cu
+ src/selection/select_k_half_uint32_t.cu
+ src/stats/silhouette_score.cu
+ src/stats/trustworthiness_score.cu
+ ${CUVS_MG_ALGOS}
+ )
-target_include_directories(
- cuvs PUBLIC "$"
- "$"
-)
+ set_target_properties(
+ cuvs_objs
+ PROPERTIES CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ )
+ target_compile_options(
+ cuvs_objs PRIVATE "$<$:${CUVS_CXX_FLAGS}>"
+ "$<$:${CUVS_CUDA_FLAGS}>"
+ )
+ target_link_libraries(
+ cuvs_objs PUBLIC raft::raft rmm::rmm ${CUVS_CTK_MATH_DEPENDENCIES}
+ $
+ )
-rapids_find_package(
- OpenMP REQUIRED
- BUILD_EXPORT_SET cuvs-exports
- INSTALL_EXPORT_SET cuvs-exports
-)
+ add_library(cuvs SHARED $)
+ add_library(cuvs_static STATIC $)
-if(NOT BUILD_CPU_ONLY)
+ target_compile_options(
+ cuvs INTERFACE $<$:--expt-extended-lambda
+ --expt-relaxed-constexpr>
+ )
- set(CUVS_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix})
- set(CUVS_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix})
- set(CUVS_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix})
- set(CUVS_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix})
+ add_library(cuvs::cuvs ALIAS cuvs)
+ add_library(cuvs::cuvs_static ALIAS cuvs_static)
- set(CUVS_CTK_MATH_DEPENDENCIES ${CUVS_CUBLAS_DEPENDENCY} ${CUVS_CUSOLVER_DEPENDENCY}
- ${CUVS_CUSPARSE_DEPENDENCY} ${CUVS_CURAND_DEPENDENCY}
+ set_target_properties(
+ cuvs_static
+ PROPERTIES BUILD_RPATH "\$ORIGIN"
+ INSTALL_RPATH "\$ORIGIN"
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ EXPORT_NAME cuvs_static
)
- # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
- target_link_libraries(
- cuvs
- PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES}
- PRIVATE nvidia::cutlass::cutlass $ cuvs-cagra-search
+ target_compile_options(cuvs_static PRIVATE "$<$:${CUVS_CXX_FLAGS}>")
+
+ target_include_directories(
+ cuvs_objs
+ PUBLIC "$"
+ "$"
+ INTERFACE "$"
)
-endif()
-if(BUILD_CAGRA_HNSWLIB)
- target_link_libraries(cuvs PRIVATE hnswlib::hnswlib)
- target_compile_definitions(cuvs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
-endif()
+ target_include_directories(
+ cuvs_static
+ PUBLIC "$"
+ INTERFACE "$"
+ )
-# Endian detection
-include(TestBigEndian)
-test_big_endian(BIG_ENDIAN)
-if(BIG_ENDIAN)
- target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=0)
-else()
- target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=1)
-endif()
+ # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
+ target_link_options(cuvs_static PRIVATE $)
+
+ target_include_directories(
+ cuvs_static PUBLIC "$"
+ "$"
+ )
+
+ target_include_directories(
+ cuvs PUBLIC "$"
+ "$"
+ )
-file(
- WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld"
- [=[
+ rapids_find_package(
+ OpenMP REQUIRED
+ BUILD_EXPORT_SET cuvs-exports
+ INSTALL_EXPORT_SET cuvs-exports
+ )
+
+ if(NOT BUILD_CPU_ONLY)
+
+ set(CUVS_CUSOLVER_DEPENDENCY CUDA::cusolver${_ctk_static_suffix})
+ set(CUVS_CUBLAS_DEPENDENCY CUDA::cublas${_ctk_static_suffix})
+ set(CUVS_CURAND_DEPENDENCY CUDA::curand${_ctk_static_suffix})
+ set(CUVS_CUSPARSE_DEPENDENCY CUDA::cusparse${_ctk_static_suffix})
+
+ set(CUVS_CTK_MATH_DEPENDENCIES ${CUVS_CUBLAS_DEPENDENCY} ${CUVS_CUSOLVER_DEPENDENCY}
+ ${CUVS_CUSPARSE_DEPENDENCY} ${CUVS_CURAND_DEPENDENCY}
+ )
+
+ if(BUILD_MG_ALGOS)
+ set(CUVS_COMMS_DEPENDENCY nccl)
+ endif()
+
+ # Keep cuVS as lightweight as possible. Only CUDA libs and rmm should be used in global target.
+ target_link_libraries(
+ cuvs
+ PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES}
+ PRIVATE nvidia::cutlass::cutlass $
+ cuvs-cagra-search ${CUVS_COMMS_DEPENDENCY}
+ )
+
+ target_link_libraries(
+ cuvs_static
+ PUBLIC rmm::rmm raft::raft ${CUVS_CTK_MATH_DEPENDENCIES}
+ PRIVATE nvidia::cutlass::cutlass $
+ )
+ endif()
+
+ if(BUILD_MG_ALGOS)
+ target_compile_definitions(cuvs PUBLIC CUVS_BUILD_MG_ALGOS)
+ target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_MG_ALGOS)
+ endif()
+
+ if(BUILD_CAGRA_HNSWLIB)
+ target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib)
+ target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
+ endif()
+
+ # Endian detection
+ include(TestBigEndian)
+ test_big_endian(BIG_ENDIAN)
+ if(BIG_ENDIAN)
+ target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=0)
+ else()
+ target_compile_definitions(cuvs PRIVATE CUVS_SYSTEM_LITTLE_ENDIAN=1)
+ endif()
+
+ file(
+ WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld"
+ [=[
SECTIONS
{
.nvFatBinSegment : { *(.nvFatBinSegment) }
.nv_fatbin : { *(.nv_fatbin) }
}
]=]
-)
-
-# ##################################################################################################
-# * NVTX support in cuvs -----------------------------------------------------
-
-if(CUVS_NVTX)
- # This enables NVTX within the project with no option to disable it downstream.
- target_link_libraries(cuvs PUBLIC CUDA::nvtx3)
- target_compile_definitions(cuvs PUBLIC NVTX_ENABLED)
-else()
- # Allow enable NVTX downstream if not set here. This creates a new option at build/install time,
- # which is set by default to OFF, but can be enabled in the dependent project.
- get_property(
- nvtx_option_help_string
- CACHE CUVS_NVTX
- PROPERTY HELPSTRING
)
- string(
- CONCAT
- nvtx_export_string
- "option(CUVS_NVTX \""
- ${nvtx_option_help_string}
- "\" OFF)"
- [=[
+
+ # ################################################################################################
+ # * NVTX support in cuvs -----------------------------------------------------
+
+ if(CUVS_NVTX)
+ # This enables NVTX within the project with no option to disable it downstream.
+ target_link_libraries(cuvs PUBLIC CUDA::nvtx3)
+ target_compile_definitions(cuvs PUBLIC NVTX_ENABLED)
+ else()
+ # Allow enable NVTX downstream if not set here. This creates a new option at build/install time,
+ # which is set by default to OFF, but can be enabled in the dependent project.
+ get_property(
+ nvtx_option_help_string
+ CACHE CUVS_NVTX
+ PROPERTY HELPSTRING
+ )
+ string(
+ CONCAT
+ nvtx_export_string
+ "option(CUVS_NVTX \""
+ ${nvtx_option_help_string}
+ "\" OFF)"
+ [=[
target_link_libraries(cuvs::cuvs INTERFACE $<$:CUDA::nvtx3>)
target_compile_definitions(cuvs::cuvs INTERFACE $<$:NVTX_ENABLED>)
]=]
- )
-endif()
-
-set_target_properties(
- cuvs
- PROPERTIES BUILD_RPATH "\$ORIGIN"
- INSTALL_RPATH "\$ORIGIN"
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON
- POSITION_INDEPENDENT_CODE ON
-)
-
-target_compile_options(
- cuvs PRIVATE "$<$:${CUVS_CXX_FLAGS}>"
- "$<$:${CUVS_CUDA_FLAGS}>"
-)
-# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
-target_link_options(cuvs PRIVATE $)
-
-# ##################################################################################################
-# * cuvs_c -------------------------------------------------------------------------------
-if(BUILD_C_LIBRARY)
- add_library(
- cuvs_c SHARED
- src/core/c_api.cpp
- src/neighbors/brute_force_c.cpp
- src/neighbors/ivf_flat_c.cpp
- src/neighbors/ivf_pq_c.cpp
- src/neighbors/cagra_c.cpp
- src/neighbors/hnsw_c.cpp
- src/neighbors/refine/refine_c.cpp
- src/distance/pairwise_distance_c.cpp
- )
-
- add_library(cuvs::c_api ALIAS cuvs_c)
+ )
+ endif()
set_target_properties(
- cuvs_c
+ cuvs
PROPERTIES BUILD_RPATH "\$ORIGIN"
INSTALL_RPATH "\$ORIGIN"
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
INTERFACE_POSITION_INDEPENDENT_CODE ON
- EXPORT_NAME c_api
+ POSITION_INDEPENDENT_CODE ON
)
- target_compile_options(cuvs_c PRIVATE "$<$:${CUVS_CXX_FLAGS}>")
-
- target_include_directories(
- cuvs_c
- PUBLIC "$"
- INTERFACE "$"
+ target_compile_options(
+ cuvs PRIVATE "$<$:${CUVS_CXX_FLAGS}>"
+ "$<$:${CUVS_CUDA_FLAGS}>"
)
+ # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
+ target_link_options(cuvs PRIVATE $)
+
+ # ################################################################################################
+ # * cuvs_c -------------------------------------------------------------------------------
+ if(BUILD_C_LIBRARY)
+ add_library(
+ cuvs_c SHARED
+ src/core/c_api.cpp
+ src/neighbors/brute_force_c.cpp
+ src/neighbors/ivf_flat_c.cpp
+ src/neighbors/ivf_pq_c.cpp
+ src/neighbors/cagra_c.cpp
+ $<$:src/neighbors/hnsw_c.cpp>
+ src/neighbors/refine/refine_c.cpp
+ src/distance/pairwise_distance_c.cpp
+ )
- target_link_libraries(
- cuvs_c
- PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES}
- PRIVATE raft::raft
- )
+ if(BUILD_CAGRA_HNSWLIB)
+ target_link_libraries(cuvs_c PRIVATE hnswlib::hnswlib)
+ target_compile_definitions(cuvs_c PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
+ endif()
+
+ add_library(cuvs::c_api ALIAS cuvs_c)
+
+ set_target_properties(
+ cuvs_c
+ PROPERTIES BUILD_RPATH "\$ORIGIN"
+ INSTALL_RPATH "\$ORIGIN"
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ EXPORT_NAME c_api
+ )
- # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
- target_link_options(cuvs_c PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
-endif()
+ target_compile_options(cuvs_c PRIVATE "$<$:${CUVS_CXX_FLAGS}>")
-# ##################################################################################################
-# * install targets-----------------------------------------------------------
-rapids_cmake_install_lib_dir(lib_dir)
-include(GNUInstallDirs)
-include(CPack)
-
-install(
- TARGETS cuvs
- DESTINATION ${lib_dir}
- COMPONENT cuvs
- EXPORT cuvs-exports
-)
+ target_include_directories(
+ cuvs_c
+ PUBLIC "$"
+ INTERFACE "$"
+ )
-install(
- DIRECTORY include/cuvs
- COMPONENT cuvs
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-)
+ target_link_libraries(
+ cuvs_c
+ PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES}
+ PRIVATE raft::raft
+ )
+
+ # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
+ target_link_options(cuvs_c PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
+ endif()
+
+ # ################################################################################################
+ # * install targets-----------------------------------------------------------
+ rapids_cmake_install_lib_dir(lib_dir)
+ include(GNUInstallDirs)
+ include(CPack)
-if(BUILD_C_LIBRARY)
install(
- TARGETS cuvs_c
+ TARGETS cuvs cuvs_static
DESTINATION ${lib_dir}
- COMPONENT c_api
- EXPORT cuvs-c-exports
+ COMPONENT cuvs
+ EXPORT cuvs-exports
)
-endif()
-install(
- FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuvs/version_config.hpp
- COMPONENT cuvs
- DESTINATION include/cuvs
-)
+ install(
+ DIRECTORY include/cuvs
+ COMPONENT cuvs
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+ )
-if(TARGET cuvs_c)
- list(APPEND cuvs_components c_api)
- list(APPEND cuvs_export_sets cuvs-c-exports)
- set(CUVS_C_TARGET cuvs_c)
-endif()
+ if(BUILD_C_LIBRARY)
+ install(
+ TARGETS cuvs_c
+ DESTINATION ${lib_dir}
+ COMPONENT c_api
+ EXPORT cuvs-c-exports
+ )
+ endif()
-# Use `rapids_export` for 22.04 as it will have COMPONENT support
-rapids_export(
- INSTALL cuvs
- EXPORT_SET cuvs-exports
- COMPONENTS ${cuvs_components}
- COMPONENTS_EXPORT_SET ${cuvs_export_sets}
- GLOBAL_TARGETS cuvs ${CUVS_C_TARGET}
- NAMESPACE cuvs::
-)
+ install(
+ FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuvs/version_config.hpp
+ COMPONENT cuvs
+ DESTINATION include/cuvs
+ )
-# ##################################################################################################
-# * build export -------------------------------------------------------------
-rapids_export(
- BUILD cuvs
- EXPORT_SET cuvs-exports
- COMPONENTS ${cuvs_components}
- COMPONENTS_EXPORT_SET ${cuvs_export_sets}
- GLOBAL_TARGETS cuvs ${CUVS_C_TARGET}
- NAMESPACE cuvs::
-)
+ if(TARGET cuvs_c)
+ list(APPEND cuvs_components c_api)
+ list(APPEND cuvs_export_sets cuvs-c-exports)
+ set(CUVS_C_TARGET cuvs_c)
+ endif()
+
+ # Use `rapids_export` for 22.04 as it will have COMPONENT support
+ rapids_export(
+ INSTALL cuvs
+ EXPORT_SET cuvs-exports
+ COMPONENTS ${cuvs_components}
+ COMPONENTS_EXPORT_SET ${cuvs_export_sets}
+ GLOBAL_TARGETS cuvs ${CUVS_C_TARGET}
+ NAMESPACE cuvs::
+ )
+
+ # ################################################################################################
+ # * build export -------------------------------------------------------------
+ rapids_export(
+ BUILD cuvs
+ EXPORT_SET cuvs-exports
+ COMPONENTS ${cuvs_components}
+ COMPONENTS_EXPORT_SET ${cuvs_export_sets}
+ GLOBAL_TARGETS cuvs ${CUVS_C_TARGET}
+ NAMESPACE cuvs::
+ )
+endif()
# ##################################################################################################
# * build test executable ----------------------------------------------------
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 8cbf8c8b3..c36e70ace 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -32,6 +32,7 @@ option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benc
option(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB "Include cuVS CAGRA with HNSW search in benchmark" ON)
option(CUVS_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" OFF)
+option(CUVS_ANN_BENCH_USE_CUVS_MG "Include cuVS ann mg algorithm in benchmark" ${BUILD_MG_ALGOS})
option(CUVS_ANN_BENCH_SINGLE_EXE
"Make a single executable with benchmark as shared library modules" OFF
)
@@ -55,6 +56,7 @@ if(BUILD_CPU_ONLY)
set(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OFF)
set(CUVS_ANN_BENCH_USE_GGNN OFF)
set(CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE OFF)
+ set(CUVS_ANN_BENCH_USE_CUVS_MG OFF)
else()
set(CUVS_FAISS_ENABLE_GPU ON)
endif()
@@ -66,6 +68,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
OR CUVS_ANN_BENCH_USE_CUVS_CAGRA
OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB
OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE
+ OR CUVS_ANN_BENCH_USE_CUVS_MG
)
set(CUVS_ANN_BENCH_USE_CUVS ON)
endif()
@@ -245,6 +248,21 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
)
endif()
+if(CUVS_ANN_BENCH_USE_CUVS_MG)
+ ConfigureAnnBench(
+ NAME
+ CUVS_MG
+ PATH
+ src/cuvs/cuvs_benchmark.cu
+ $<$:src/cuvs/cuvs_mg_ivf_flat.cu>
+ $<$:src/cuvs/cuvs_mg_ivf_pq.cu>
+ $<$:src/cuvs/cuvs_mg_cagra.cu>
+ LINKS
+ cuvs
+ nccl
+ )
+endif()
+
message("CUVS_FAISS_TARGETS: ${CUVS_FAISS_TARGETS}")
message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}")
if(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT)
diff --git a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
index 22f0cab6f..57d5b1910 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
@@ -45,7 +45,18 @@ extern template class cuvs::bench::cuvs_cagra;
extern template class cuvs::bench::cuvs_cagra;
#endif
-#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT
+#ifdef CUVS_ANN_BENCH_USE_CUVS_MG
+#include "cuvs_ivf_flat_wrapper.h"
+#include "cuvs_mg_ivf_flat_wrapper.h"
+
+#include "cuvs_ivf_pq_wrapper.h"
+#include "cuvs_mg_ivf_pq_wrapper.h"
+
+#include "cuvs_cagra_wrapper.h"
+#include "cuvs_mg_cagra_wrapper.h"
+#endif
+
+#if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) || defined(CUVS_ANN_BENCH_USE_CUVS_MG)
template
void parse_build_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_ivf_flat::build_param& param)
@@ -64,7 +75,7 @@ void parse_search_param(const nlohmann::json& conf,
#endif
#if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \
- defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
+ defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG)
template
void parse_build_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_ivf_pq::build_param& param)
@@ -130,7 +141,8 @@ void parse_search_param(const nlohmann::json& conf,
}
#endif
-#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
+#if defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || \
+ defined(CUVS_ANN_BENCH_USE_CUVS_MG)
template
void parse_build_param(const nlohmann::json& conf, cuvs::neighbors::nn_descent::index_params& param)
{
diff --git a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
index a956ab139..893097236 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
+++ b/cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
@@ -29,6 +29,43 @@
namespace cuvs::bench {
+#ifdef CUVS_ANN_BENCH_USE_CUVS_MG
+void add_distribution_mode(cuvs::neighbors::mg::distribution_mode* dist_mode,
+ const nlohmann::json& conf)
+{
+ if (conf.contains("distribution_mode")) {
+ std::string distribution_mode = conf.at("distribution_mode");
+ if (distribution_mode == "replicated") {
+ *dist_mode = cuvs::neighbors::mg::distribution_mode::REPLICATED;
+ } else if (distribution_mode == "sharded") {
+ *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED;
+ } else {
+ throw std::runtime_error("invalid value for distribution_mode");
+ }
+ } else {
+ // default
+ *dist_mode = cuvs::neighbors::mg::distribution_mode::SHARDED;
+ }
+};
+
+void add_merge_mode(cuvs::neighbors::mg::sharded_merge_mode* merge_mode, const nlohmann::json& conf)
+{
+ if (conf.contains("merge_mode")) {
+ std::string sharded_merge_mode = conf.at("merge_mode");
+ if (sharded_merge_mode == "tree_merge") {
+ *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE;
+ } else if (sharded_merge_mode == "merge_on_root_rank") {
+ *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::MERGE_ON_ROOT_RANK;
+ } else {
+ throw std::runtime_error("invalid value for merge_mode");
+ }
+ } else {
+ // default
+ *merge_mode = cuvs::neighbors::mg::sharded_merge_mode::TREE_MERGE;
+ }
+};
+#endif
+
template
auto create_algo(const std::string& algo_name,
const std::string& distance,
@@ -71,6 +108,32 @@ auto create_algo(const std::string& algo_name,
parse_build_param(conf, param);
a = std::make_unique>(metric, dim, param);
}
+#endif
+#ifdef CUVS_ANN_BENCH_USE_CUVS_MG
+ if constexpr (std::is_same_v || std::is_same_v ||
+ std::is_same_v) {
+ if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") {
+ typename cuvs::bench::cuvs_mg_ivf_flat::build_param param;
+ parse_build_param(conf, param);
+ add_distribution_mode(¶m.mode, conf);
+ a = std::make_unique>(metric, dim, param);
+ }
+ }
+
+ if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") {
+ typename cuvs::bench::cuvs_mg_ivf_pq::build_param param;
+ parse_build_param(conf, param);
+ add_distribution_mode(¶m.mode, conf);
+ a = std::make_unique>(metric, dim, param);
+ }
+
+ if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") {
+ typename cuvs::bench::cuvs_mg_cagra::build_param param;
+ parse_build_param(conf, param);
+ add_distribution_mode(¶m.mode, conf);
+ a = std::make_unique>(metric, dim, param);
+ }
+
#endif
if (!a) { throw std::runtime_error("invalid algo: '" + algo_name + "'"); }
@@ -113,6 +176,32 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con
return param;
}
#endif
+#ifdef CUVS_ANN_BENCH_USE_CUVS_MG
+ if constexpr (std::is_same_v || std::is_same_v ||
+ std::is_same_v) {
+ if (algo_name == "raft_mg_ivf_flat" || algo_name == "cuvs_mg_ivf_flat") {
+ auto param =
+ std::make_unique::search_param>();
+ parse_search_param(conf, *param);
+ add_merge_mode(¶m->merge_mode, conf);
+ return param;
+ }
+ }
+
+ if (algo_name == "raft_mg_ivf_pq" || algo_name == "cuvs_mg_ivf_pq") {
+ auto param = std::make_unique::search_param>();
+ parse_search_param(conf, *param);
+ add_merge_mode(¶m->merge_mode, conf);
+ return param;
+ }
+
+ if (algo_name == "raft_mg_cagra" || algo_name == "cuvs_mg_cagra") {
+ auto param = std::make_unique::search_param>();
+ parse_search_param(conf, *param);
+ add_merge_mode(¶m->merge_mode, conf);
+ return param;
+ }
+#endif
// else
throw std::runtime_error("invalid algo: '" + algo_name + "'");
diff --git a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
index ff854f890..b2ba35eee 100644
--- a/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
+++ b/cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
@@ -72,6 +72,23 @@ class cuvs_cagra : public algo, public algo_gpu {
std::optional ivf_pq_refine_rate = std::nullopt;
std::optional ivf_pq_build_params = std::nullopt;
std::optional ivf_pq_search_params = std::nullopt;
+
+ void prepare_build_params(const raft::extent_2d& dataset_extents)
+ {
+ if (algo == CagraBuildAlgo::kIvfPq) {
+ auto pq_params = cuvs::neighbors::cagra::graph_build_params::ivf_pq_params(
+ dataset_extents, cagra_params.metric);
+ if (ivf_pq_build_params) { pq_params.build_params = *ivf_pq_build_params; }
+ if (ivf_pq_search_params) { pq_params.search_params = *ivf_pq_search_params; }
+ if (ivf_pq_refine_rate) { pq_params.refinement_rate = *ivf_pq_refine_rate; }
+ cagra_params.graph_build_params = pq_params;
+ } else if (algo == CagraBuildAlgo::kNnDescent) {
+ auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params(
+ cagra_params.intermediate_graph_degree);
+ if (nn_descent_params) { nn_params = *nn_descent_params; }
+ cagra_params.graph_build_params = nn_params;
+ }
+ }
};
cuvs_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1)
@@ -168,28 +185,9 @@ template
void cuvs_cagra::build(const T* dataset, size_t nrow)
{
auto dataset_extents = raft::make_extents(nrow, dimension_);
+ index_params_.prepare_build_params(dataset_extents);
auto& params = index_params_.cagra_params;
-
- if (index_params_.algo == CagraBuildAlgo::kIvfPq) {
- auto pq_params =
- cuvs::neighbors::cagra::graph_build_params::ivf_pq_params(dataset_extents, params.metric);
- if (index_params_.ivf_pq_build_params) {
- pq_params.build_params = *index_params_.ivf_pq_build_params;
- }
- if (index_params_.ivf_pq_search_params) {
- pq_params.search_params = *index_params_.ivf_pq_search_params;
- }
- if (index_params_.ivf_pq_refine_rate) {
- pq_params.refinement_rate = *index_params_.ivf_pq_refine_rate;
- }
- params.graph_build_params = pq_params;
- } else if (index_params_.algo == CagraBuildAlgo::kNnDescent) {
- auto nn_params = cuvs::neighbors::cagra::graph_build_params::nn_descent_params(
- params.intermediate_graph_degree);
- if (index_params_.nn_descent_params) { nn_params = *index_params_.nn_descent_params; }
- params.graph_build_params = nn_params;
- }
auto dataset_view_host =
raft::make_mdspan(dataset, dataset_extents);
auto dataset_view_device =
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu
new file mode 100644
index 000000000..801caa85f
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra.cu
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cuvs_mg_cagra_wrapper.h"
+
+namespace cuvs::bench {
+template class cuvs_mg_cagra;
+template class cuvs_mg_cagra;
+template class cuvs_mg_cagra;
+template class cuvs_mg_cagra;
+} // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h
new file mode 100644
index 000000000..50c1ff4db
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_cagra_wrapper.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "cuvs_ann_bench_utils.h"
+#include "cuvs_cagra_wrapper.h"
+#include
+#include
+
+namespace cuvs::bench {
+using namespace cuvs::neighbors;
+
+enum class AllocatorType;
+enum class CagraBuildAlgo;
+
+template
+class cuvs_mg_cagra : public algo, public algo_gpu {
+ public:
+ using search_param_base = typename algo::search_param;
+ using algo::dim_;
+
+ struct build_param : public cuvs::bench::cuvs_cagra::build_param {
+ cuvs::neighbors::mg::distribution_mode mode;
+ };
+
+ struct search_param : public cuvs::bench::cuvs_cagra::search_param {
+ cuvs::neighbors::mg::sharded_merge_mode merge_mode;
+ };
+
+ cuvs_mg_cagra(Metric metric, int dim, const build_param& param, int concurrent_searches = 1)
+ : algo(metric, dim), index_params_(param)
+ {
+ index_params_.cagra_params.metric = parse_metric_type(metric);
+ index_params_.ivf_pq_build_params->metric = parse_metric_type(metric);
+
+ // init nccl clique outside as to not affect benchmark
+ const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_);
+ }
+
+ void build(const T* dataset, size_t nrow) final;
+
+ void set_search_param(const search_param_base& param) override;
+
+ void set_search_dataset(const T* dataset, size_t nrow) override;
+
+ void search(const T* queries,
+ int batch_size,
+ int k,
+ algo_base::index_type* neighbors,
+ float* distances) const override;
+ void search_base(const T* queries,
+ int batch_size,
+ int k,
+ algo_base::index_type* neighbors,
+ float* distances) const;
+
+ [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
+ {
+ auto stream = raft::resource::get_cuda_stream(handle_);
+ return stream;
+ }
+
+ // to enable dataset access from GPU memory
+ [[nodiscard]] auto get_preference() const -> algo_property override
+ {
+ algo_property property;
+ property.dataset_memory_type = MemoryType::kHost;
+ property.query_memory_type = MemoryType::kHost;
+ return property;
+ }
+ void save(const std::string& file) const override;
+ void load(const std::string&) override;
+ void save_to_hnswlib(const std::string& file) const;
+ std::unique_ptr> copy() override;
+
+ private:
+ raft::device_resources handle_;
+ float refine_ratio_;
+ build_param index_params_;
+ cuvs::neighbors::mg::search_params search_params_;
+ std::shared_ptr, T, IdxT>>
+ index_;
+};
+
+template
+void cuvs_mg_cagra::build(const T* dataset, size_t nrow)
+{
+ auto dataset_extents = raft::make_extents(nrow, dim_);
+ index_params_.prepare_build_params(dataset_extents);
+ cuvs::neighbors::mg::index_params build_params = index_params_.cagra_params;
+ build_params.mode = index_params_.mode;
+
+ auto dataset_view =
+ raft::make_host_matrix_view(dataset, nrow, dim_);
+ auto idx = cuvs::neighbors::mg::build(handle_, build_params, dataset_view);
+ index_ =
+ std::make_shared, T, IdxT>>(
+ std::move(idx));
+}
+
+inline auto allocator_to_string(AllocatorType mem_type) -> std::string;
+
+template
+void cuvs_mg_cagra::set_search_param(const search_param_base& param)
+{
+ auto sp = dynamic_cast(param);
+ // search_params_ = static_cast>(sp.p);
+ cagra::search_params* search_params_ptr_ = static_cast(&search_params_);
+ *search_params_ptr_ = sp.p;
+ search_params_.merge_mode = sp.merge_mode;
+ refine_ratio_ = sp.refine_ratio;
+}
+
+template
+void cuvs_mg_cagra::set_search_dataset(const T* dataset, size_t nrow)
+{
+}
+
+template
+void cuvs_mg_cagra::save(const std::string& file) const
+{
+ cuvs::neighbors::mg::serialize(handle_, *index_, file);
+}
+
+template
+void cuvs_mg_cagra::load(const std::string& file)
+{
+ index_ =
+ std::make_shared, T, IdxT>>(
+ std::move(cuvs::neighbors::mg::deserialize_cagra(handle_, file)));
+}
+
+template
+std::unique_ptr> cuvs_mg_cagra::copy()
+{
+ return std::make_unique>(*this); // use copy constructor
+}
+
+template
+void cuvs_mg_cagra::search_base(
+ const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
+{
+ static_assert(std::is_integral_v);
+ static_assert(std::is_integral_v);
+
+ auto queries_view =
+ raft::make_host_matrix_view(queries, batch_size, dim_);
+ auto neighbors_view =
+ raft::make_host_matrix_view((IdxT*)neighbors, batch_size, k);
+ auto distances_view =
+ raft::make_host_matrix_view(distances, batch_size, k);
+
+ cuvs::neighbors::mg::search(
+ handle_, *index_, search_params_, queries_view, neighbors_view, distances_view);
+}
+
+template
+void cuvs_mg_cagra::search(
+ const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
+{
+ auto k0 = static_cast(refine_ratio_ * k);
+ const bool disable_refinement = k0 <= static_cast(k);
+
+ if (disable_refinement) {
+ search_base(queries, batch_size, k, neighbors, distances);
+ } else {
+ throw std::runtime_error("refinement not supported");
+ }
+}
+} // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu
new file mode 100644
index 000000000..20cdc41e3
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat.cu
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cuvs_mg_ivf_flat_wrapper.h"
+
+namespace cuvs::bench {
+template class cuvs_mg_ivf_flat;
+// template class cuvs_mg_ivf_flat;
+template class cuvs_mg_ivf_flat;
+template class cuvs_mg_ivf_flat;
+} // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
new file mode 100644
index 000000000..54a0d2fac
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_flat_wrapper.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cuvs_ann_bench_utils.h"
+#include "cuvs_ivf_flat_wrapper.h"
+#include
+#include
+
+namespace cuvs::bench {
+using namespace cuvs::neighbors;
+
+template
+class cuvs_mg_ivf_flat : public algo, public algo_gpu {
+ public:
+ using search_param_base = typename algo::search_param;
+ using algo::dim_;
+
+ using build_param = cuvs::neighbors::mg::index_params;
+
+ struct search_param : public cuvs::bench::cuvs_ivf_flat::search_param {
+ cuvs::neighbors::mg::sharded_merge_mode merge_mode;
+ };
+
+ cuvs_mg_ivf_flat(Metric metric, int dim, const build_param& param)
+ : algo(metric, dim), index_params_(param)
+ {
+ index_params_.metric = parse_metric_type(metric);
+ // init nccl clique outside as to not affect benchmark
+ const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_);
+ }
+
+ void build(const T* dataset, size_t nrow) final;
+ void set_search_param(const search_param_base& param) override;
+ void search(const T* queries,
+ int batch_size,
+ int k,
+ algo_base::index_type* neighbors,
+ float* distances) const override;
+
+ [[nodiscard]] auto get_preference() const -> algo_property override
+ {
+ algo_property property;
+ property.dataset_memory_type = MemoryType::kHost;
+ property.query_memory_type = MemoryType::kHost;
+ return property;
+ }
+
+ [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
+ {
+ auto stream = raft::resource::get_cuda_stream(handle_);
+ return stream;
+ }
+
+ [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; }
+
+ void save(const std::string& file) const override;
+ void load(const std::string&) override;
+ std::unique_ptr> copy() override;
+
+ private:
+ raft::device_resources handle_;
+ build_param index_params_;
+ cuvs::neighbors::mg::search_params search_params_;
+ std::shared_ptr, T, IdxT>>
+ index_;
+};
+
+template
+void cuvs_mg_ivf_flat::build(const T* dataset, size_t nrow)
+{
+ auto dataset_view =
+ raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_));
+ auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view);
+ index_ = std::make_shared<
+ cuvs::neighbors::mg::index, T, IdxT>>(std::move(idx));
+}
+
+template
+void cuvs_mg_ivf_flat::set_search_param(const search_param_base& param)
+{
+ auto sp = dynamic_cast(param);
+ // search_params_ = sp.ivf_flat_params;
+ ivf_flat::search_params* search_params_ptr_ =
+ static_cast(&search_params_);
+ *search_params_ptr_ = sp.ivf_flat_params;
+ search_params_.merge_mode = sp.merge_mode;
+ assert(search_params_.n_probes <= index_params_.n_lists);
+}
+
+template
+void cuvs_mg_ivf_flat::save(const std::string& file) const
+{
+ cuvs::neighbors::mg::serialize(handle_, *index_, file);
+}
+
+template
+void cuvs_mg_ivf_flat::load(const std::string& file)
+{
+ index_ = std::make_shared<
+ cuvs::neighbors::mg::index, T, IdxT>>(
+ std::move(cuvs::neighbors::mg::deserialize_flat(handle_, file)));
+}
+
+template
+std::unique_ptr> cuvs_mg_ivf_flat::copy()
+{
+ return std::make_unique>(*this); // use copy constructor
+}
+
+template
+void cuvs_mg_ivf_flat::search(
+ const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
+{
+ auto queries_view = raft::make_host_matrix_view(
+ queries, IdxT(batch_size), IdxT(dim_));
+ auto neighbors_view = raft::make_host_matrix_view(
+ (IdxT*)neighbors, IdxT(batch_size), IdxT(k));
+ auto distances_view = raft::make_host_matrix_view(
+ distances, IdxT(batch_size), IdxT(k));
+
+ cuvs::neighbors::mg::search(
+ handle_, *index_, search_params_, queries_view, neighbors_view, distances_view);
+}
+
+} // namespace cuvs::bench
\ No newline at end of file
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu
new file mode 100644
index 000000000..a74bab6f5
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq.cu
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cuvs_mg_ivf_pq_wrapper.h"
+
+namespace cuvs::bench {
+template class cuvs_mg_ivf_pq;
+template class cuvs_mg_ivf_pq;
+template class cuvs_mg_ivf_pq;
+template class cuvs_mg_ivf_pq;
+} // namespace cuvs::bench
diff --git a/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
new file mode 100644
index 000000000..84aea7d4a
--- /dev/null
+++ b/cpp/bench/ann/src/cuvs/cuvs_mg_ivf_pq_wrapper.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cuvs_ann_bench_utils.h"
+#include "cuvs_ivf_pq_wrapper.h"
+#include
+#include
+
+namespace cuvs::bench {
+using namespace cuvs::neighbors;
+
+template
+class cuvs_mg_ivf_pq : public algo, public algo_gpu {
+ public:
+ using search_param_base = typename algo::search_param;
+ using algo::dim_;
+
+ using build_param = cuvs::neighbors::mg::index_params;
+
+ struct search_param : public cuvs::bench::cuvs_ivf_pq::search_param {
+ cuvs::neighbors::mg::sharded_merge_mode merge_mode;
+ };
+
+ cuvs_mg_ivf_pq(Metric metric, int dim, const build_param& param)
+ : algo(metric, dim), index_params_(param)
+ {
+ index_params_.metric = parse_metric_type(metric);
+ // init nccl clique outside as to not affect benchmark
+ const raft::comms::nccl_clique& clique = raft::resource::get_nccl_clique(handle_);
+ }
+
+ void build(const T* dataset, size_t nrow) final;
+ void set_search_param(const search_param_base& param) override;
+ void search(const T* queries,
+ int batch_size,
+ int k,
+ algo_base::index_type* neighbors,
+ float* distances) const override;
+
+ [[nodiscard]] auto get_preference() const -> algo_property override
+ {
+ algo_property property;
+ property.dataset_memory_type = MemoryType::kHost;
+ property.query_memory_type = MemoryType::kHost;
+ return property;
+ }
+
+ [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
+ {
+ auto stream = raft::resource::get_cuda_stream(handle_);
+ return stream;
+ }
+
+ [[nodiscard]] auto uses_stream() const noexcept -> bool override { return false; }
+
+ void save(const std::string& file) const override;
+ void load(const std::string&) override;
+ std::unique_ptr> copy() override;
+
+ private:
+ raft::device_resources handle_;
+ build_param index_params_;
+ cuvs::neighbors::mg::search_params search_params_;
+ std::shared_ptr, T, IdxT>> index_;
+};
+
+template
+void cuvs_mg_ivf_pq::build(const T* dataset, size_t nrow)
+{
+ auto dataset_view =
+ raft::make_host_matrix_view(dataset, IdxT(nrow), IdxT(dim_));
+ auto idx = cuvs::neighbors::mg::build(handle_, index_params_, dataset_view);
+ index_ =
+ std::make_shared, T, IdxT>>(
+ std::move(idx));
+}
+
+template
+void cuvs_mg_ivf_pq::set_search_param(const search_param_base& param)
+{
+ auto sp = dynamic_cast(param);
+ // search_params_ = static_cast>(sp.pq_param);
+ ivf_pq::search_params* search_params_ptr_ = static_cast(&search_params_);
+ *search_params_ptr_ = sp.pq_param;
+ search_params_.merge_mode = sp.merge_mode;
+ assert(search_params_.n_probes <= index_params_.n_lists);
+}
+
+template
+void cuvs_mg_ivf_pq::save(const std::string& file) const
+{
+ cuvs::neighbors::mg::serialize(handle_, *index_, file);
+}
+
+template
+void cuvs_mg_ivf_pq::load(const std::string& file)
+{
+ index_ =
+ std::make_shared, T, IdxT>>(
+ std::move(cuvs::neighbors::mg::deserialize_pq(handle_, file)));
+}
+
+template
+std::unique_ptr> cuvs_mg_ivf_pq::copy()
+{
+ return std::make_unique>(*this); // use copy constructor
+}
+
+template
+void cuvs_mg_ivf_pq::search(
+ const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
+{
+ auto queries_view = raft::make_host_matrix_view(
+ queries, IdxT(batch_size), IdxT(dim_));
+ auto neighbors_view = raft::make_host_matrix_view(
+ (IdxT*)neighbors, IdxT(batch_size), IdxT(k));
+ auto distances_view = raft::make_host_matrix_view(
+ distances, IdxT(batch_size), IdxT(k));
+
+ cuvs::neighbors::mg::search(
+ handle_, *index_, search_params_, queries_view, neighbors_view, distances_view);
+}
+
+} // namespace cuvs::bench
\ No newline at end of file
diff --git a/cpp/cmake/thirdparty/get_cuvs.cmake b/cpp/cmake/thirdparty/get_cuvs.cmake
new file mode 100644
index 000000000..c21cccbcc
--- /dev/null
+++ b/cpp/cmake/thirdparty/get_cuvs.cmake
@@ -0,0 +1,64 @@
+# =============================================================================
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+# Use RAPIDS_VERSION_MAJOR_MINOR from rapids_config.cmake
+set(CUVS_VERSION "${RAPIDS_VERSION_MAJOR_MINOR}")
+set(CUVS_FORK "rapidsai")
+set(CUVS_PINNED_TAG "branch-${RAPIDS_VERSION_MAJOR_MINOR}")
+
+function(find_and_configure_cuvs)
+ set(oneValueArgs VERSION FORK PINNED_TAG ENABLE_NVTX CLONE_ON_PIN BUILD_CPU_ONLY BUILD_SHARED_LIBS)
+ cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
+ "${multiValueArgs}" ${ARGN} )
+
+ if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "branch-${CUVS_VERSION}")
+ message(STATUS "cuVS: pinned tag found: ${PKG_PINNED_TAG}. Cloning cuVS locally.")
+ set(CPM_DOWNLOAD_cuvs ON)
+ endif()
+
+ #-----------------------------------------------------
+ # Invoke CPM find_package()
+ #-----------------------------------------------------
+ rapids_cpm_find(cuvs ${PKG_VERSION}
+ GLOBAL_TARGETS cuvs::cuvs
+ BUILD_EXPORT_SET cuvs-bench-exports
+ INSTALL_EXPORT_SET cuvs-bench-exports
+ COMPONENTS cuvs
+ CPM_ARGS
+ GIT_REPOSITORY https://github.com/${PKG_FORK}/cuvs.git
+ GIT_TAG ${PKG_PINNED_TAG}
+ SOURCE_SUBDIR cpp
+ OPTIONS
+ "BUILD_SHARED_LIBS ${PKG_BUILD_SHARED_LIBS}"
+ "BUILD_CPU_ONLY ${PKG_BUILD_CPU_ONLY}"
+ "BUILD_TESTS OFF"
+ "BUILD_CAGRA_HNSWLIB OFF"
+ "CUVS_CLONE_ON_PIN ${PKG_CLONE_ON_PIN}"
+ )
+endfunction()
+
+
+# Change pinned tag here to test a commit in CI
+# To use a different cuVS locally, set the CMake variable
+# CPM_cuvs_SOURCE=/path/to/local/cuvs
+find_and_configure_cuvs(VERSION ${CUVS_VERSION}.00
+ FORK ${CUVS_FORK}
+ PINNED_TAG ${CUVS_PINNED_TAG}
+ ENABLE_NVTX OFF
+ # When PINNED_TAG above doesn't match the default rapids branch,
+ # force local cuvs clone in build directory
+ # even if it's already installed.
+ CLONE_ON_PIN ${CUVS_CLONE_ON_PIN}
+ BUILD_CPU_ONLY ${BUILD_CPU_ONLY}
+ BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}
+)
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 2459d521d..e28572457 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -2149,7 +2149,7 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-PREDEFINED =
+PREDEFINED = "CUVS_BUILD_MG_ALGOS=1"
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
diff --git a/cpp/include/cuvs/neighbors/cagra.h b/cpp/include/cuvs/neighbors/cagra.h
index 241f5d8b0..14331ebbc 100644
--- a/cpp/include/cuvs/neighbors/cagra.h
+++ b/cpp/include/cuvs/neighbors/cagra.h
@@ -267,6 +267,15 @@ cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t* index);
*/
cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index);
+/**
+ * @brief Get dimension of the CAGRA index
+ *
+ * @param[in] index CAGRA index
+ * @param[out] dim return dimension of the index
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int* dim);
+
/**
* @}
*/
@@ -338,7 +347,7 @@ cuvsError_t cuvsCagraBuild(cuvsResources_t res,
* with the same type of `queries`, such that `index.dtype.code ==
* queries.dl_tensor.dtype.code` Types for input are:
* 1. `queries`:
- *` a. kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
+ * a. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32`
* b. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8`
* c. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8`
* 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32`
diff --git a/cpp/include/cuvs/neighbors/common.hpp b/cpp/include/cuvs/neighbors/common.hpp
index 73ce80b41..60b8cc122 100644
--- a/cpp/include/cuvs/neighbors/common.hpp
+++ b/cpp/include/cuvs/neighbors/common.hpp
@@ -19,7 +19,8 @@
#include
#include
#include
-#include
+#include
+#include
#include
#include
#include // get_device_for_address
@@ -636,5 +637,56 @@ enable_if_valid_list_t deserialize_list(const raft::resources& handle,
const typename ListT::spec_type& store_spec,
const typename ListT::spec_type& device_spec);
} // namespace ivf
+} // namespace cuvs::neighbors
+
+namespace cuvs::neighbors {
+using namespace raft;
+
+template
+struct iface {
+ iface() : mutex_(std::make_shared()) {}
+
+ const IdxT size() const { return index_.value().size(); }
+
+ std::optional index_;
+ std::shared_ptr mutex_;
+};
+
+template
+void build(const raft::device_resources& handle,
+ cuvs::neighbors::iface& interface,
+ const cuvs::neighbors::index_params* index_params,
+ raft::mdspan, row_major, Accessor> index_dataset);
+
+template
+void extend(
+ const raft::device_resources& handle,
+ cuvs::neighbors::iface& interface,
+ raft::mdspan, row_major, Accessor1> new_vectors,
+ std::optional, layout_c_contiguous, Accessor2>>
+ new_indices);
+
+template
+void search(const raft::device_resources& handle,
+ const cuvs::neighbors::iface