diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index fe8e730921..82e56cd95d 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -151,5 +151,5 @@ jobs:
       cuda: '["12.5"]'
       build_command: |
         sccache -z;
-        build-all -DBUILD_PRIMS_BENCH=ON -DBUILD_ANN_BENCH=ON --verbose;
+        build-all -DBUILD_PRIMS_BENCH=ON --verbose;
         sccache -s;
diff --git a/README.md b/README.md
index 3b63014f88..55aa99d0b2 100755
--- a/README.md
+++ b/README.md
@@ -223,7 +223,7 @@ pairwise_distance(in1, in2, out=output, metric="euclidean")
 
 ## Installing
 
-RAFT's C++ and Python libraries can both be installed through Conda and the Python libraries through Pip. 
+RAFT's C++ and Python libraries can both be installed through Conda and the Python libraries through Pip.
 
 
 ### Installing C++ and Python through Conda
diff --git a/build.sh b/build.sh
index 6ccfada555..e5a14051a3 100755
--- a/build.sh
+++ b/build.sh
@@ -67,8 +67,6 @@ BUILD_ALL_GPU_ARCH=0
 BUILD_TESTS=OFF
 BUILD_TYPE=Release
 BUILD_PRIMS_BENCH=OFF
-BUILD_ANN_BENCH=OFF
-BUILD_CPU_ONLY=OFF
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
 BUILD_REPORT_METRICS=""
@@ -200,7 +198,6 @@ if (( ${NUMARGS} != 0 )); then
     cacheTool
     limitTests
     limitBench
-    limitAnnBench
     buildMetrics
     for a in ${ARGS}; do
         if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then
@@ -401,8 +398,6 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
-          -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
-          -DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}
diff --git a/ci/build_python.sh b/ci/build_python.sh
index dc303de4f5..7da665075f 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -41,30 +41,5 @@ rapids-conda-retry mambabuild \
   conda/recipes/raft-dask
 
 sccache --show-adv-stats
-sccache --zero-stats
-
-# Build ann-bench for each cuda and python version
-rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-  conda/recipes/raft-ann-bench
-
-sccache --show-adv-stats
-
-# Build ann-bench-cpu only in CUDA 11 jobs since it only depends on python
-# version
-RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
-if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
-  sccache --zero-stats
-
-  rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-  conda/recipes/raft-ann-bench-cpu
-
-  sccache --show-adv-stats
-fi
 
 rapids-upload-conda-to-s3 python
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 032b88b4aa..a118becb4b 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -81,8 +81,6 @@ sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/bu
 sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md
 sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md
 
-sed_runner "s|:[0-9][0-9].[0-9][0-9]|:${NEXT_SHORT_TAG}|g" docs/source/raft_ann_benchmarks.md
-
 sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" README.md
 
 # .devcontainer files
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 303537acaa..4ed9529a36 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -62,7 +62,7 @@ option(DISABLE_OPENMP "Disable OpenMP" OFF)
 option(RAFT_NVTX "Enable nvtx markers" OFF)
 
 set(RAFT_COMPILE_LIBRARY_DEFAULT OFF)
-if((BUILD_TESTS OR BUILD_PRIMS_BENCH))
+if(BUILD_TESTS OR BUILD_PRIMS_BENCH)
   set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
 endif()
 option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
@@ -81,7 +81,6 @@ include(CMakeDependentOption)
 message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
 message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
 message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}")
-message(VERBOSE "RAFT: Building ANN benchmarks: ${BUILD_ANN_BENCH}")
 message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
 message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS})
 message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
diff --git a/cpp/template/cmake/thirdparty/get_raft.cmake b/cpp/template/cmake/thirdparty/get_raft.cmake
new file mode 100644
index 0000000000..4474fd2875
--- /dev/null
+++ b/cpp/template/cmake/thirdparty/get_raft.cmake
@@ -0,0 +1,67 @@
+# =============================================================================
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+# Use RAPIDS_VERSION from cmake/thirdparty/fetch_rapids.cmake
+set(RAFT_VERSION "${RAPIDS_VERSION}")
+set(RAFT_FORK "rapidsai")
+set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}")
+
+function(find_and_configure_raft)
+    set(oneValueArgs VERSION FORK PINNED_TAG COMPILE_LIBRARY ENABLE_NVTX ENABLE_MNMG_DEPENDENCIES)
+    cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
+            "${multiValueArgs}" ${ARGN} )
+
+    set(RAFT_COMPONENTS "")
+    if(PKG_COMPILE_LIBRARY)
+        string(APPEND RAFT_COMPONENTS " compiled")
+    endif()
+
+    if(PKG_ENABLE_MNMG_DEPENDENCIES)
+        string(APPEND RAFT_COMPONENTS " distributed")
+    endif()
+
+    #-----------------------------------------------------
+    # Invoke CPM find_package()
+    #-----------------------------------------------------
+    # Since the RAFT_NVTX option is used by targets generated by
+    # find_package(RAFT_NVTX) and when building from source we want to
+    # make `RAFT_NVTX` a  cache variable so we get consistent
+    # behavior
+    #
+    set(RAFT_NVTX ${PKG_ENABLE_NVTX} CACHE BOOL "Enable raft nvtx logging" FORCE)
+    rapids_cpm_find(raft ${PKG_VERSION}
+            GLOBAL_TARGETS      raft::raft
+            BUILD_EXPORT_SET    raft-template-exports
+            INSTALL_EXPORT_SET  raft-template-exports
+            COMPONENTS          ${RAFT_COMPONENTS}
+            CPM_ARGS
+            GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git
+            GIT_TAG        ${PKG_PINNED_TAG}
+            SOURCE_SUBDIR  cpp
+            OPTIONS
+            "BUILD_TESTS OFF"
+            "BUILD_PRIMS_BENCH OFF"
+            "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}"
+            )
+endfunction()
+
+# Change pinned tag here to test a commit in CI
+# To use a different RAFT locally, set the CMake variable
+# CPM_raft_SOURCE=/path/to/local/raft
+find_and_configure_raft(VERSION  ${RAFT_VERSION}.00
+        FORK                     ${RAFT_FORK}
+        PINNED_TAG               ${RAFT_PINNED_TAG}
+        COMPILE_LIBRARY          ON
+        ENABLE_MNMG_DEPENDENCIES OFF
+        ENABLE_NVTX              OFF
+)
diff --git a/docs/source/ann_benchmarks_build.md b/docs/source/ann_benchmarks_build.md
deleted file mode 100644
index 56af8e555c..0000000000
--- a/docs/source/ann_benchmarks_build.md
+++ /dev/null
@@ -1,51 +0,0 @@
-### Dependencies
-
-CUDA 11 and a GPU with Pascal architecture or later are required to run the benchmarks. 
-
-Please refer to the  [installation docs](https://docs.rapids.ai/api/raft/stable/build.html#cuda-gpu-requirements) for the base requirements to build RAFT. 
-
-In addition to the base requirements for building RAFT, additional dependencies needed to build the ANN benchmarks include:
-1. FAISS GPU >= 1.7.1
-2. Google Logging (GLog)
-3. H5Py
-4. HNSWLib
-5. nlohmann_json
-6. GGNN
-
-[rapids-cmake](https://github.com/rapidsai/rapids-cmake) is used to build the ANN benchmarks so the code for dependencies not already supplied in the CUDA toolkit will be downloaded and built automatically.
-
-The easiest (and most reproducible) way to install the dependencies needed to build the ANN benchmarks is to use the conda environment file located in the `conda/environments` directory of the RAFT repository. The following command will use `mamba` (which is preferred over `conda`) to build and activate a new environment for compiling the benchmarks:
-
-```bash
-mamba env create --name raft_ann_benchmarks -f conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
-conda activate raft_ann_benchmarks
-```
-
-The above conda environment will also reduce the compile times as dependencies like FAISS will already be installed and not need to be compiled with `rapids-cmake`.
-
-### Compiling the Benchmarks
-
-After the needed dependencies are satisfied, the easiest way to compile ANN benchmarks is through the `build.sh` script in the root of the RAFT source code repository. The following will build the executables for all the support algorithms:
-```bash
-./build.sh bench-ann
-```
-
-You can limit the algorithms that are built by providing a semicolon-delimited list of executable names (each algorithm is suffixed with `_ANN_BENCH`):
-```bash
-./build.sh bench-ann -n --limit-bench-ann=HNSWLIB_ANN_BENCH;RAFT_IVF_PQ_ANN_BENCH
-```
-
-Available targets to use with `--limit-bench-ann` are:
-- FAISS_GPU_IVF_FLAT_ANN_BENCH
-- FAISS_GPU_IVF_PQ_ANN_BENCH
-- FAISS_CPU_IVF_FLAT_ANN_BENCH
-- FAISS_CPU_IVF_PQ_ANN_BENCH
-- FAISS_GPU_FLAT_ANN_BENCH
-- FAISS_CPU_FLAT_ANN_BENCH
-- GGNN_ANN_BENCH
-- HNSWLIB_ANN_BENCH
-- RAFT_CAGRA_ANN_BENCH
-- RAFT_IVF_PQ_ANN_BENCH
-- RAFT_IVF_FLAT_ANN_BENCH
-
-By default, the `*_ANN_BENCH` executables program infer the dataset's datatype from the filename's extension. For example, an extension of `fbin` uses a `float` datatype, `f16bin` uses a `float16` datatype, extension of `i8bin` uses `int8_t` datatype, and `u8bin` uses `uint8_t` type. Currently, only `float`, `float16`, int8_t`, and `unit8_t` are supported.
\ No newline at end of file
diff --git a/docs/source/ann_benchmarks_dataset.md b/docs/source/ann_benchmarks_dataset.md
deleted file mode 100644
index 26c1559504..0000000000
--- a/docs/source/ann_benchmarks_dataset.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# ANN Benchmarks Datasets
-
-A dataset usually has 4 binary files containing database vectors, query vectors, ground truth neighbors and their corresponding distances. For example, Glove-100 dataset has files `base.fbin` (database vectors), `query.fbin` (query vectors), `groundtruth.neighbors.ibin` (ground truth neighbors), and `groundtruth.distances.fbin` (ground truth distances). The first two files are for index building and searching, while the other two are associated with a particular distance and are used for evaluation.
-
-The file suffixes `.fbin`, `.f16bin`, `.ibin`, `.u8bin`, and `.i8bin` denote that the data type of vectors stored in the file are `float32`, `float16`(a.k.a `half`), `int`, `uint8`, and `int8`, respectively.
-These binary files are little-endian and the format is: the first 8 bytes are `num_vectors` (`uint32_t`) and `num_dimensions` (`uint32_t`), and the following `num_vectors * num_dimensions * sizeof(type)` bytes are vectors stored in row-major order.
-
-Some implementation can take `float16` database and query vectors as inputs and will have better performance. Use `script/fbin_to_f16bin.py` to transform dataset from `float32` to `float16` type.
-
-Commonly used datasets can be downloaded from two websites:
-1. Million-scale datasets can be found at the [Data sets](https://github.com/erikbern/ann-benchmarks#data-sets) section of [`ann-benchmarks`](https://github.com/erikbern/ann-benchmarks).
-
-    However, these datasets are in HDF5 format. Use `cpp/bench/ann/scripts/hdf5_to_fbin.py` to transform the format. A few Python packages are required to run it:
-    ```bash
-    pip3 install numpy h5py
-    ```
-    The usage of this script is:
-    ```bash
-    $ cpp/bench/ann/scripts/hdf5_to_fbin.py
-    usage: scripts/hdf5_to_fbin.py [-n] <input>.hdf5
-       -n: normalize base/query set
-     outputs: <input>.base.fbin
-              <input>.query.fbin
-              <input>.groundtruth.neighbors.ibin
-              <input>.groundtruth.distances.fbin
-    ```
-    So for an input `.hdf5` file, four output binary files will be produced. See previous section for an example of prepossessing GloVe dataset.
-
-    Most datasets provided by `ann-benchmarks` use `Angular` or `Euclidean` distance. `Angular` denotes cosine distance. However, computing cosine distance reduces to computing inner product by normalizing vectors beforehand. In practice, we can always do the normalization to decrease computation cost, so it's better to measure the performance of inner product rather than cosine distance. The `-n` option of `hdf5_to_fbin.py` can be used to normalize the dataset.
-
-2. <a id='billion-scale'></a>Billion-scale datasets can be found at [`big-ann-benchmarks`](http://big-ann-benchmarks.com). The ground truth file contains both neighbors and distances, thus should be split. A script is provided for this:
-    ```bash
-    $ cpp/bench/ann/scripts/split_groundtruth.pl
-    usage: script/split_groundtruth.pl input output_prefix
-    ```
-    Take Deep-1B dataset as an example:
-    ```bash
-    pushd
-    cd cpp/bench/ann
-    mkdir -p data/deep-1B && cd data/deep-1B
-    # download manually "Ground Truth" file of "Yandex DEEP"
-    # suppose the file name is deep_new_groundtruth.public.10K.bin
-    ../../scripts/split_groundtruth.pl deep_new_groundtruth.public.10K.bin groundtruth
-    # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
-    popd
-    ```
-    Besides ground truth files for the whole billion-scale datasets, this site also provides ground truth files for the first 10M or 100M vectors of the base sets. This mean we can use these billion-scale datasets as million-scale datasets. To facilitate this, an optional parameter `subset_size` for dataset can be used. See the next step for further explanation.
-
-## Generate ground truth
-
-If you have a dataset, but no corresponding ground truth file, then you can generate ground trunth using the `generate_groundtruth` utility. Example usage:
-
-```bash
-# With existing query file
-python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
-
-# With randomly generated queries
-python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=random --n_queries=10000
-
-# Using only a subset of the dataset. Define queries by randomly
-# selecting vectors from the (subset of the) dataset.
-python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
-```
\ No newline at end of file
diff --git a/docs/source/ann_benchmarks_low_level.md b/docs/source/ann_benchmarks_low_level.md
deleted file mode 100644
index 7ba13dec8d..0000000000
--- a/docs/source/ann_benchmarks_low_level.md
+++ /dev/null
@@ -1,219 +0,0 @@
-### Low-level Scripts and Executables
-#### End-to-end Example
-An end-to-end example (run from the RAFT source code root directory):
-```bash
-# (0) get raft sources
-git clone https://github.com/rapidsai/raft.git
-cd raft
-
-# (1) prepare a dataset
-export PYTHONPATH=python/raft-ann-bench/src:$PYTHONPATH
-python -m raft_ann_bench.get_dataset --dataset glove-100-angular --normalize
-
-# option --normalize is used here to normalize vectors so cosine distance is converted
-# to inner product; don't use -n for l2 distance
-
-# (2) build index
-$CONDA_PREFIX/bin/ann/RAFT_IVF_FLAT_ANN_BENCH \
-  --data_prefix=datasets \
-  --build \
-  --benchmark_filter="raft_ivf_flat\..*" \
-  python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json 
-
-# (3) search
-$CONDA_PREFIX/bin/ann/RAFT_IVF_FLAT_ANN_BENCH\
-  --data_prefix=datasets \
-  --benchmark_min_time=2s \
-  --benchmark_out=ivf_flat_search.csv \
-  --benchmark_out_format=csv \
-  --benchmark_counters_tabular \
-  --search \
-  --benchmark_filter="raft_ivf_flat\..*" \
-    python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json 
-
-
-# optional step: plot QPS-Recall figure using data in ivf_flat_search.csv with your favorite tool
-```
-
-##### Step 1: Prepare Dataset
-Note: the preferred way to download and process smaller (million scale) datasets is to use the `get_dataset` script as demonstrated in the example above.
-
-A dataset usually has 4 binary files containing database vectors, query vectors, ground truth neighbors and their corresponding distances. For example, Glove-100 dataset has files `base.fbin` (database vectors), `query.fbin` (query vectors), `groundtruth.neighbors.ibin` (ground truth neighbors), and `groundtruth.distances.fbin` (ground truth distances). The first two files are for index building and searching, while the other two are associated with a particular distance and are used for evaluation.
-
-The file suffixes `.fbin`, `.f16bin`, `.ibin`, `.u8bin`, and `.i8bin` denote that the data type of vectors stored in the file are `float32`, `float16`(a.k.a `half`), `int`, `uint8`, and `int8`, respectively.
-These binary files are little-endian and the format is: the first 8 bytes are `num_vectors` (`uint32_t`) and `num_dimensions` (`uint32_t`), and the following `num_vectors * num_dimensions * sizeof(type)` bytes are vectors stored in row-major order.
-
-Some implementation can take `float16` database and query vectors as inputs and will have better performance. Use `python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py` to transform dataset from `float32` to `float16` type.
-
-Commonly used datasets can be downloaded from two websites:
-1. Million-scale datasets can be found at the [Data sets](https://github.com/erikbern/ann-benchmarks#data-sets) section of [`ann-benchmarks`](https://github.com/erikbern/ann-benchmarks).
-
-    However, these datasets are in HDF5 format. Use `python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py/hdf5_to_fbin.py` to transform the format. A few Python packages are required to run it:
-    ```bash
-    pip3 install numpy h5py
-    ```
-    The usage of this script is:
-    ```bash
-    $ cpp/bench/ann/scripts/hdf5_to_fbin.py
-    usage: scripts/hdf5_to_fbin.py [-n] <input>.hdf5
-       -n: normalize base/query set
-     outputs: <input>.base.fbin
-              <input>.query.fbin
-              <input>.groundtruth.neighbors.ibin
-              <input>.groundtruth.distances.fbin
-    ```
-    So for an input `.hdf5` file, four output binary files will be produced. See previous section for an example of prepossessing GloVe dataset.
-
-    Most datasets provided by `ann-benchmarks` use `Angular` or `Euclidean` distance. `Angular` denotes cosine distance. However, computing cosine distance reduces to computing inner product by normalizing vectors beforehand. In practice, we can always do the normalization to decrease computation cost, so it's better to measure the performance of inner product rather than cosine distance. The `-n` option of `hdf5_to_fbin.py` can be used to normalize the dataset.
-
-2. Billion-scale datasets can be found at [`big-ann-benchmarks`](http://big-ann-benchmarks.com). The ground truth file contains both neighbors and distances, thus should be split. A script is provided for this:
-    ```bash
-    $ python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl
-    usage: split_groundtruth.pl input output_prefix
-    ```
-    Take Deep-1B dataset as an example:
-    ```bash
-    pushd
-    cd cpp/bench/ann
-    mkdir -p data/deep-1B && cd data/deep-1B
-    # download manually "Ground Truth" file of "Yandex DEEP"
-    # suppose the file name is deep_new_groundtruth.public.10K.bin
-    /path/to/raft/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl deep_new_groundtruth.public.10K.bin groundtruth
-    # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
-    popd
-    ```
-    Besides ground truth files for the whole billion-scale datasets, this site also provides ground truth files for the first 10M or 100M vectors of the base sets. This mean we can use these billion-scale datasets as million-scale datasets. To facilitate this, an optional parameter `subset_size` for dataset can be used. See the next step for further explanation.
-
-
-##### Step 2: Build Index
-An index is a data structure to facilitate searching. Different algorithms may use different data structures for their index. We can use `RAFT_IVF_FLAT_ANN_BENCH --build` to build an index and save it to disk.
-
-To run a benchmark executable, like `RAFT_IVF_FLAT_ANN_BENCH`, a JSON configuration file is required. Refer to [`cpp/bench/ann/conf/glove-100-inner.json`](../../cpp/cpp/bench/ann/conf/glove-100-inner.json) as an example. Configuration file has 3 sections:
-* `dataset` section specifies the name and files of a dataset, and also the distance in use. Since the `*_ANN_BENCH` programs are for index building and searching, only `base_file` for database vectors and `query_file` for query vectors are needed. Ground truth files are for evaluation thus not needed.
-    - To use only a subset of the base dataset, an optional parameter `subset_size` can be specified. It means using only the first `subset_size` vectors of `base_file` as the base dataset.
-* `search_basic_param` section specifies basic parameters for searching:
-    - `k` is the "k" in "k-nn", that is, the number of neighbors (or results) we want from the searching.
-* `index` section specifies an array of configurations for index building and searching:
-    - `build_param` and `search_params` are parameters for building and searching, respectively. `search_params` is an array since we will search with different parameters to get different recall values.
-    - `file` is the file name of index. Building will save built index to this file, while searching will load this file.
-    - if `refine_ratio` is specified, refinement, as a post-processing step of search, will be done. It's for algorithms that compress vectors. For example, if `"refine_ratio" : 2` is set, 2`k` results are first computed, then exact distances of them are computed using original uncompressed vectors, and finally top `k` results among them are kept.
-
-
-The usage of `*_ANN_BENCH` can be found by running `*_ANN_BENCH --help` on one of the executables:
-```bash
-$ ./cpp/build/*_ANN_BENCH --help
-benchmark [--benchmark_list_tests={true|false}]
-          [--benchmark_filter=<regex>]
-          [--benchmark_min_time=`<integer>x` OR `<float>s` ]
-          [--benchmark_min_warmup_time=<min_warmup_time>]
-          [--benchmark_repetitions=<num_repetitions>]
-          [--benchmark_enable_random_interleaving={true|false}]
-          [--benchmark_report_aggregates_only={true|false}]
-          [--benchmark_display_aggregates_only={true|false}]
-          [--benchmark_format=<console|json|csv>]
-          [--benchmark_out=<filename>]
-          [--benchmark_out_format=<json|console|csv>]
-          [--benchmark_color={auto|true|false}]
-          [--benchmark_counters_tabular={true|false}]
-          [--benchmark_context=<key>=<value>,...]
-          [--benchmark_time_unit={ns|us|ms|s}]
-          [--v=<verbosity>]
-          [--build|--search]
-          [--overwrite]
-          [--data_prefix=<prefix>]
-          <conf>.json
-
-Note the non-standard benchmark parameters:
-  --build: build mode, will build index
-  --search: search mode, will search using the built index
-            one and only one of --build and --search should be specified
-  --overwrite: force overwriting existing index files
-  --data_prefix=<prefix>: prepend <prefix> to dataset file paths specified in the <conf>.json.
-  --override_kv=<key:value1:value2:...:valueN>: override a build/search key one or more times multiplying the number of configurations; you can use this parameter multiple times to get the Cartesian product of benchmark configs.
-```
-* `--build`: build index.
-* `--search`: do the searching with built index.
-* `--overwrite`: by default, the building mode skips building an index if it find out it already exists. This is useful when adding more configurations to the config; only new indices are build without the need to specify an elaborate filtering regex. By supplying `overwrite` flag, you disable this behavior; all indices are build regardless whether they are already stored on disk.
-* `--data_prefix`: prepend an arbitrary path to the data file paths. By default, it is equal to `data`. Note, this does not apply to index file paths.
-* `--override_kv`: override a build/search key one or more times multiplying the number of configurations.
-
-In addition to these ANN-specific flags, you can use all of the standard google benchmark flags. Some of the useful flags:
-* `--benchmark_filter`: specify subset of benchmarks to run
-* `--benchmark_out`, `--benchmark_out_format`: store the output to a file
-* `--benchmark_list_tests`: check the available configurations
-* `--benchmark_min_time`: specify the minimum duration or number of iterations per case to improve accuracy of the benchmarks.
-
-Refer to the google benchmark [user guide](https://github.com/google/benchmark/blob/main/docs/user_guide.md#command-line) for more information about the command-line usage.
-
-##### Step 3: Searching
-Use the `--search` flag on any of the `*_ANN_BENCH` executables. Other options are the same as in step 2.
-
-## Adding a new ANN algorithm
-Implementation of a new algorithm should be a class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
-
-In addition, it should define two `struct`s for building and searching parameters. The searching parameter class should inherit `struct ANN<T>::AnnSearchParam`. Take `class HnswLib` as an example, its definition is:
-```c++
-template<typename T>
-class HnswLib : public ANN<T> {
-public:
-  struct BuildParam {
-    int M;
-    int ef_construction;
-    int num_threads;
-  };
-
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    int ef;
-    int num_threads;
-  };
-
-  // ...
-};
-```
-
-The benchmark program uses JSON configuration file. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. Still take the configuration for `HnswLib` as an example:
-```json
-{
-  "name" : "...",
-  "algo" : "hnswlib",
-  "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-  "file" : "/path/to/file",
-  "search_params" : [
-    {"ef":10, "numThreads":1},
-    {"ef":20, "numThreads":1},
-    {"ef":40, "numThreads":1}
-  ]
-},
-```
-
-How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
-1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
-    ```c++
-    template<typename T>
-    void parse_build_param(const nlohmann::json& conf,
-                           typename cuann::HnswLib<T>::BuildParam& param) {
-      param.ef_construction = conf.at("efConstruction");
-      param.M = conf.at("M");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-
-    template<typename T>
-    void parse_search_param(const nlohmann::json& conf,
-                            typename cuann::HnswLib<T>::SearchParam& param) {
-      param.ef = conf.at("ef");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-    ```
-
-2. Next, add corresponding `if` case to functions `create_algo()` and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
-    ```c++
-      // JSON configuration file contains a line like:  "algo" : "hnswlib"
-      if (algo == "hnswlib") {
-         // ...
-      }
-    ```
diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
deleted file mode 100644
index afb4ed18ea..0000000000
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ /dev/null
@@ -1,178 +0,0 @@
-# ANN Benchmarks Parameter Tuning Guide
-
-This guide outlines the various parameter settings that can be specified in [RAFT ANN Benchmark](raft_ann_benchmarks.md) json configuration files and explains the impact they have on corresponding algorithms to help inform their settings for benchmarking across desired levels of recall.
-
-
-## RAFT Indexes
-
-### `raft_brute_force`
-
-Use RAFT brute-force index for exact search. Brute-force has no further build or search parameters.
-
-### `raft_ivf_flat`
-
-IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes.
-
-IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
-
-| Parameter            | Type             | Required | Data Type                  | Default  | Description                                                                                                                                                                       |
-|----------------------|------------------|----------|----------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`              | `build`    | Y        | Positive Integer >0        |          | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `niter`              | `build`    | N        | Positive Integer >0        | 20       | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`              | `build`    | N        | Positive Integer >0        | 2        | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `dataset_memory_type` | `build` | N | ["device", "host", "mmap"] | "mmap" | What memory type should the dataset reside?                                                                                                                                       |
-| `query_memory_type`  | `search` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? |
-| `nprobe`             | `search`  | Y        | Positive Integer >0        |          | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-
-
-### `raft_ivf_pq`
-
-IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
-
-| Parameter              | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                     |
-|------------------------|----------------|---|----------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`                | `build`  | Y | Positive Integer >0              |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `niter`                | `build`  | N | Positive Integer >0              | 20      | Number of k-means iterations to use when training the clusters.                                                                                                                 |
-| `ratio`                | `build`  | N | Positive Integer >0              | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `pq_dim`               | `build`  | N | Positive Integer. Multiple of 8. | 0       | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8.                        |
-| `pq_bits`              | `build`  | N | Positive Integer. [4-8]          | 8       | Bit length of the vector element after quantization.                                                                                                                            |
-| `codebook_kind`        | `build`  | N | ["cluster", "subspace"]          | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail                                 |
-| `dataset_memory_type`  | `build` | N | ["device", "host", "mmap"]       | "host" | What memory type should the dataset reside?                                                                                                                                       |
-| `query_memory_type`    | `search` | N | ["device", "host", "mmap"]       | "device | What memory type should the queries reside? |
-| `nprobe`               | `search` | Y | Positive Integer >0              |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                     |
-| `internalDistanceDtype` | `search` | N | [`float`, `half`]                | `half`  | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy.                                                           |
-| `smemLutDtype`         | `search` | N | [`float`, `half`, `fp8`]         | `half`  | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy.                                                   |
-| `refine_ratio`         | `search` | N| Positive Number >=1              | 1       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.           |
-
-
-### `raft_cagra`
-<a id='raft-cagra'></a>CAGRA uses a graph-based index, which creates an intermediate, approximate kNN graph using IVF-PQ and then further refining and optimizing to create a final kNN graph. This kNN graph is used by CAGRA as an index for search.
-
-| Parameter                   | Type           | Required | Data Type                  | Default | Description                                                                                                                                                                       |
-|-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `graph_degree`              | `build`  | N        | Positive Integer >0        | 64 | Degree of the final kNN graph index. |
-| `intermediate_graph_degree` | `build`  | N        | Positive Integer >0        | 128 | Degree of the intermediate kNN graph. |
-| `graph_build_algo`          | `build`  | N | ["IVF_PQ", "NN_DESCENT"]   | "IVF_PQ" | Algorithm to use for search |
-| `dataset_memory_type`       | `build`  | N | ["device", "host", "mmap"] | "mmap" | What memory type should the dataset reside while constructing the index?                                                                                                                                       |
-| `query_memory_type`         | `search` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? |
-| `itopk`                     | `search_wdith`  | N        | Positive Integer >0        | 64 | Number of intermediate search results retained during the search. Higher values improve search accuracy at the cost of speed. |
-| `search_width`              | `search`  | N        | Positive Integer >0        | 1 | Number of graph nodes to select as the starting point for the search in each iteration. |
-| `max_iterations`            | `search`  | N        | Integer >=0                | 0 | Upper limit of search iterations. Auto select when 0. |
-| `algo`                      | `search`  | N        | string                     | "auto" | Algorithm to use for search. Possible values: {"auto", "single_cta", "multi_cta", "multi_kernel"} |
-| `graph_memory_type`         | `search`  | N        | string                     | "device" | Memory type to store gaph. Must be one of {"device", "host_pinned", "host_huge_page"}. |
-| `internal_dataset_memory_type` | `search`  | N        | string                     | "device" | Memory type to store dataset in the index. Must be one of {"device", "host_pinned", "host_huge_page"}. |
-
-The `graph_memory_type` or `internal_dataset_memory_type` options can be useful for large datasets that do not fit the device memory. Setting `internal_dataset_memory_type` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Management or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper.
-
-To fine tune CAGRA index building we can customize IVF-PQ index builder options using the following settings. These take effect only if `graph_build_algo == "IVF_PQ"`. It is recommended to experiment using a separate IVF-PQ index to find the config that gives the largest QPS for large batch. Recall does not need to be very high, since CAGRA further optimizes the kNN neighbor graph. Some of the default values are derived from the dataset size which is assumed to be [n_vecs, dim].
-
-| Parameter              | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                     |
-|------------------------|----------------|---|----------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `ivf_pq_build_nlist`                | `build`  | N | Positive Integer >0              | n_vecs / 2500        | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ivf_pq_build_niter`                | `build`  | N | Positive Integer >0              | 25      | Number of k-means iterations to use when training the clusters.                                                                                                                 |
-| `ivf_pq_build_ratio`                | `build`  | N | Positive Integer >0              | 10      | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `ivf_pq_build_pq_dim`               | `build`  | N | Positive Integer. Multiple of 8. | dim/2 rounded up to 8     | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8.                        |
-| `ivf_pq_build_pq_bits`              | `build`  | N | Positive Integer. [4-8]          | 8       | Bit length of the vector element after quantization.                                                                                                                            |
-| `ivf_pq_build_codebook_kind`        | `build`  | N | ["cluster", "subspace"]          | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail                                 |
-| `ivf_pq_search_nprobe`               | `build` | N | Positive Integer >0              | min(2*dim, nlist)        | The closest number of clusters to search for each query vector.                                    |
-| `ivf_pq_search_internalDistanceDtype` | `build` | N | [`float`, `half`]                | `fp8`  | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy.                                                           |
-| `ivf_pq_search_smemLutDtype`         | `build` | N | [`float`, `half`, `fp8`]         | `half`  | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy.                                                   |
-| `ivf_pq_search_refine_ratio`         | `build` | N| Positive Number >=1              | 2       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.           |
-
-Alternatively, if `graph_build_algo == "NN_DESCENT"`, then we can customize the following parameters
-
-| Parameter                   | Type           | Required | Data Type                  | Default | Description                                                                                                                                                                       |
-|-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nn_descent_niter`          | `build`  | N        | Positive Integer>0         | 20 | Number of NN Descent iterations. |
-| `nn_descent_intermediate_graph_degree`          | `build`  | N        | Positive Integer>0         | `intermediate_graph_degree` * 1.5 | Intermadiate graph degree during NN descent iterations |
-| `nn_descent_max_iterations`          | `build`  | N        | Positive Integer>0         | 20 | Alias for `nn_descent_niter` |
-| `nn_descent_termination_threshold`          | `build`  | N        | Positive float>0         | 0.0001 | Termination threshold for NN descent. |
-
-### `raft_cagra_hnswlib`
-This is a benchmark that enables interoperability between `CAGRA` built `HNSW` search. It uses the `CAGRA` built graph as the base layer of an `hnswlib` index to search queries only within the base layer (this is enabled with a simple patch to `hnswlib`).
-
-`build` : Same as `build` of [CAGRA](#raft-cagra)
-
-`search` : Same as `search` of [hnswlib](#hnswlib)
-
-## FAISS Indexes
-
-### `faiss_gpu_flat`
-
-Use FAISS flat index on the GPU, which performs an exact search using brute-force and doesn't have any further build or search parameters. 
-
-### `faiss_gpu_ivf_flat`
-
-IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes.
-
-IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
-
-| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
-|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlists`  | `build`  | Y        | Positive Integer >0 |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`   | `build`  | N        | Positive Integer >0 | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `nprobe`  | `search` | Y        | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-
-### `faiss_gpu_ivf_pq`
-
-IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
-
-| Parameter        | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                       |
-|------------------|----------------|----------|----------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`          | `build`  | Y        | Positive Integer >0              |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`          | `build`  | N        | Positive Integer >0              | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `M_ratio`        | `build`  | Y        | Positive Integer Power of 2 [8-64] |         | Ratio of numbeer of chunks or subquantizers for each vector. Computed by `dims` / `M_ratio`                                                                                         |
-| `usePrecomputed` | `build`  | N        | Boolean. Default=`false`         | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage.                                                                                          |
-| `useFloat16`     | `build`  | N        | Boolean. Default=`false`         | `false`  | Use half-precision floats for clustering step.                                                                                                                                    |
-| `nprobe`         | `search` | Y        | Positive Integer >0              |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-| `refine_ratio`   | `search` | N| Positive Number >=1          | 1       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.             |
-
-### `faiss_cpu_flat`
-
-Use FAISS flat index on the CPU, which performs an exact search using brute-force and doesn't have any further build or search parameters.
-
-
-| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
-|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `numThreads`     | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-### `faiss_cpu_ivf_flat`
-
-Use FAISS IVF-Flat index on CPU
-
-| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
-|----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`  | `build`  | Y        | Positive Integer >0 |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`  | `build`  | N        | Positive Integer >0 | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `nprobe` | `search` | Y        | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-| `numThreads`    | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-### `faiss_cpu_ivf_pq`
-
-Use FAISS IVF-PQ index on CPU
-
-| Parameter        | Type           | Required | Data Type                          | Default | Description                                                                                                                                                                   |
-|------------------|----------------|----------|------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`          | `build`  | Y        | Positive Integer >0                |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`          | `build`  | N        | Positive Integer >0                | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                        |
-| `M`              | `build`  | Y        | Positive Integer Power of 2 [8-64] |         | Number of chunks or subquantizers for each vector.                                                                                                                            |
-| `usePrecomputed` | `build`  | N        | Boolean. Default=`false`           | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage.                                                                                      |
-| `bitsPerCode`    | `build`  | N        | Positive Integer [4-8]             | 8       | Number of bits to use for each code.                                                                                                                                          |
-| `nprobe`         | `search` | Y        | Positive Integer >0                |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                   |
-| `refine_ratio`   | `search` | N| Positive Number >=1                | 1       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.         |
-| `numThreads`     | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-
-## HNSW
-<a id='hnswlib'></a>
-### `hnswlib`
-
-| Parameter        | Type      | Required | Data Type                            | Default | Description                                                                                                                                                                                                                                                                                       |
-|------------------|-----------|----------|--------------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `efConstruction` | `build`   | Y        | Positive Integer >0                  |         | Controls index time and accuracy. Bigger values increase the index quality. At some point, increasing this will no longer improve the quality.                                                                                                                                                    |
-| `M`              | `build`   | Y        | Positive Integer often between 2-100 |         | Number of bi-directional links create for every new element during construction. Higher values work for higher intrinsic dimensionality and/or high recall, low values can work for datasets with low intrinsic dimensionality and/or low recalls. Also affects the algorithm's memory consumption. |
-| `numThreads`     | `build`   | N        | Positive Integer >0                  | 1       | Number of threads to use to build the index.                                                                                                                                                                                                                                                      |
-| `ef`             | `search`  | Y        | Positive Integer >0                  |         | Size of the dynamic list for the nearest neighbors used for search. Higher value leads to more accurate but slower search. Cannot be lower than `k`.                                                                                                                                              |
-| `numThreads`     | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-Please refer to [HNSW algorithm parameters guide](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) from `hnswlib` to learn more about these arguments.
\ No newline at end of file
diff --git a/docs/source/build.md b/docs/source/build.md
index b9a1832b02..3d059d5a69 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -1,6 +1,6 @@
 # Installation
 
-RAFT currently provides libraries for C++ and Python. The C++ libraries, including the header-only and optional shared library, can be installed with Conda. 
+RAFT currently provides libraries for C++ and Python. The C++ libraries, including the header-only and optional shared library, can be installed with Conda.
 
 Both the C++ and Python APIs require CMake to build from source.
 
@@ -34,8 +34,6 @@ The easiest way to install RAFT is through conda and several packages are provid
 - `libraft` (optional) C++ shared library containing pre-compiled template instantiations and runtime API.
 - `pylibraft` (optional) Python library
 - `raft-dask` (optional) Python library for deployment of multi-node multi-GPU algorithms that use the RAFT `raft::comms` abstraction layer in Dask clusters.
-- `raft-ann-bench` (optional) Benchmarking tool for easily producing benchmarks that compare RAFT's vector search algorithms against other state-of-the-art implementations.
-- `raft-ann-bench-cpu` (optional) Reproducible benchmarking tool similar to above, but doesn't require CUDA to be installed on the machine. Can be used to test in environments with competitive CPUs.
 
 Use the following command, depending on your CUDA version, to install all of the RAFT packages with conda (replace `rapidsai` with `rapidsai-nightly` to install more up-to-date but less stable nightly packages). `mamba` is preferred over the `conda` command.
 ```bash
@@ -60,7 +58,7 @@ If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api
 
 ## Installing Python through Pip
 
-`pylibraft` and `raft-dask` both have packages that can be [installed through pip](https://rapids.ai/pip.html#install). 
+`pylibraft` and `raft-dask` both have packages that can be [installed through pip](https://rapids.ai/pip.html#install).
 
 For CUDA 11 packages:
 ```bash
@@ -74,7 +72,7 @@ pip install pylibraft-cu12 --extra-index-url=https://pypi.nvidia.com
 pip install raft-dask-cu12 --extra-index-url=https://pypi.nvidia.com
 ```
 
-These packages statically build RAFT's pre-compiled instantiations, so the C++ headers and pre-compiled shared library won't be readily available to use in your code. 
+These packages statically build RAFT's pre-compiled instantiations, so the C++ headers and pre-compiled shared library won't be readily available to use in your code.
 
 ## Building C++ and Python from source
 
@@ -124,7 +122,7 @@ The recommended way to build and install RAFT from source is to use the `build.s
 
 `build.sh` uses [rapids-cmake](https://github.com/rapidsai/rapids-cmake), which will automatically download any dependencies which are not already installed. It's important to note that while all the headers will be installed and available, some parts of the RAFT API depend on libraries like CUTLASS, which will need to be explicitly enabled in `build.sh`.
 
-The following example will download the needed dependencies and install the RAFT headers into `$INSTALL_PREFIX/include/raft`. 
+The following example will download the needed dependencies and install the RAFT headers into `$INSTALL_PREFIX/include/raft`.
 ```bash
 ./build.sh libraft
 ```
@@ -201,8 +199,6 @@ It can take sometime to compile all of the benchmarks. You can build individual
 ./build.sh libraft bench-prims -n --limit-bench=NEIGHBORS_PRIMS_BENCH;DISTANCE_PRIMS_BENCH;LINALG_PRIMS_BENCH
 ```
 
-In addition to microbenchmarks for individual primitives, RAFT contains a reproducible benchmarking tool for evaluating the performance of RAFT's vector search algorithms against the existing state-of-the-art. Please refer to the [RAFT ANN Benchmarks](https://docs.rapids.ai/api/raft/nightly/raft_ann_benchmarks/) guide for more information on this tool.
-
 ### Python libraries
 
 The Python libraries can be built and installed using the `build.sh` script:
@@ -242,7 +238,7 @@ The Python packages can also be uninstalled using the `build.sh` script:
 
 ### Using CMake directly
 
-When building RAFT from source, the `build.sh` script offers a nice wrapper around the `cmake` commands to ease the burdens of manually configuring the various available cmake options. When more fine-grained control over the CMake configuration is desired, the `cmake` command can be invoked directly as the below example demonstrates. 
+When building RAFT from source, the `build.sh` script offers a nice wrapper around the `cmake` commands to ease the burdens of manually configuring the various available cmake options. When more fine-grained control over the CMake configuration is desired, the `cmake` command can be invoked directly as the below example demonstrates.
 
 The `CMAKE_INSTALL_PREFIX` installs RAFT into a specific location. The example below installs RAFT into the current Conda environment:
 ```bash
@@ -259,7 +255,6 @@ RAFT's CMake has the following configurable flags available:
 |---------------------------------|----------------------| --- |------------------------------------------------------------------------------|
 | BUILD_TESTS                     | ON, OFF              | ON  | Compile Googletests                                                          |
 | BUILD_PRIMS_BENCH               | ON, OFF              | OFF | Compile benchmarks                                                           |
-| BUILD_ANN_BENCH                 | ON, OFF              | OFF | Compile end-to-end ANN benchmarks                                            |
 | CUDA_ENABLE_KERNELINFO          | ON, OFF              | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer`         |
 | CUDA_ENABLE_LINEINFO            | ON, OFF              | OFF | Enable the -lineinfo option for nvcc                                         |
 | CUDA_STATIC_RUNTIME             | ON, OFF              | OFF | Statically link the CUDA runtime                                             |
@@ -267,10 +262,10 @@ RAFT's CMake has the following configurable flags available:
 | DETECT_CONDA_ENV                | ON, OFF              | ON  | Enable detection of conda environment for dependencies                       |
 | raft_FIND_COMPONENTS            | compiled distributed |     | Configures the optional components as a space-separated list                 |
 | RAFT_COMPILE_LIBRARY            | ON, OFF              | ON if either BUILD_TESTS or BUILD_PRIMS_BENCH is ON; otherwise OFF | Compiles all `libraft` shared libraries (these are required for Googletests) |
-| RAFT_ENABLE_CUBLAS_DEPENDENCY   | ON, OFF              | ON  | Link against cublas library in `raft::raft`                                  | 
-| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against cusolver library in `raft::raft`                                | 
-| RAFT_ENABLE_CUSPARSE_DEPENDENCY | ON, OFF              | ON  | Link against cusparse library in `raft::raft`                                | 
-| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against curand library in `raft::raft`                                  | 
+| RAFT_ENABLE_CUBLAS_DEPENDENCY   | ON, OFF              | ON  | Link against cublas library in `raft::raft`                                  |
+| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against cusolver library in `raft::raft`                                |
+| RAFT_ENABLE_CUSPARSE_DEPENDENCY | ON, OFF              | ON  | Link against cusparse library in `raft::raft`                                |
+| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against curand library in `raft::raft`                                  |
 | RAFT_NVTX                       | ON, OFF              | OFF | Enable NVTX Markers                                                          |
 
 ### Build documentation
@@ -316,4 +311,4 @@ The `raft::raft` CMake target is made available when including RAFT into your CM
 |-------------|---------------------|----------------------------------------------------------|----------------------------------------|
 | n/a         | `raft::raft`        | Full RAFT header library                                 | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
 | compiled    | `raft::compiled`    | Pre-compiled template instantiations and runtime library | raft::raft                             |
-| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
\ No newline at end of file
+| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL
diff --git a/docs/source/index.rst b/docs/source/index.rst
index bee0e948ff..46ebd1b737 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -63,7 +63,6 @@ While not exhaustive, the following general categories help summarize the accele
    pylibraft_api.rst
    using_libraft.md
    vector_search_tutorial.md
-   raft_ann_benchmarks.md
    raft_dask_api.rst
    using_raft_comms.rst
    developer_guide.md
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
deleted file mode 100644
index 12a94e45ce..0000000000
--- a/docs/source/raft_ann_benchmarks.md
+++ /dev/null
@@ -1,597 +0,0 @@
-# RAFT ANN Benchmarks
-
-This project provides a benchmark program for various ANN search implementations. It's especially suitable for comparing GPU implementations as well as comparing GPU against CPU.
-
-> [!IMPORTANT]
-> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). As a result, `raft-ann-bench` is being migrated to `cuvs-bench` and will be removed from RAFT altogether in the 24.12 (December) release.
-
-
-## Table of Contents
-
-- [Installing the benchmarks](#installing-the-benchmarks)
-    - [Conda](#conda)
-    - [Docker](#docker)
-- [How to run the benchmarks](#how-to-run-the-benchmarks)
-  - [Step 1: prepare dataset](#step-1-prepare-dataset)
-  - [Step 2: build and search index](#step-2-build-and-search-index)
-  - [Step 3: data export](#step-3-data-export)
-  - [Step 4: plot results](#step-4-plot-results)
-- [Running the benchmarks](#running-the-benchmarks)
-  - [End to end: small-scale (<1M to 10M)](#end-to-end-small-scale-benchmarks-1m-to-10m)
-  - [End to end: large-scale (>10M)](#end-to-end-large-scale-benchmarks-10m-vectors)
-  - [Running with Docker containers](#running-with-docker-containers)
-  - [Evaluating the results](#evaluating-the-results)
-- [Creating and customizing dataset configurations](#creating-and-customizing-dataset-configurations)
-- [Adding a new ANN algorithm](#adding-a-new-ann-algorithm)
-- [Parameter tuning guide](https://docs.rapids.ai/api/raft/nightly/ann_benchmarks_param_tuning/)
-- [Wiki-all RAG/LLM Dataset](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/)
-
-## Installing the benchmarks
-
-There are two main ways pre-compiled benchmarks are distributed:
-
-- [Conda](#Conda): For users not using containers but want an easy to install and use Python package. Pip wheels are planned to be added as an alternative for users that cannot use conda and prefer to not use containers.
-- [Docker](#Docker): Only needs docker and [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker) to use. Provides a single docker run command for basic dataset benchmarking, as well as all the functionality of the conda solution inside the containers.
-
-## Conda
-
-If containers are not an option or not preferred, the easiest way to install the ANN benchmarks is through conda. We provide packages for GPU enabled systems, as well for systems without a GPU. We suggest using mamba as it generally leads to a faster install time:
-
-```bash
-
-mamba create --name raft_ann_benchmarks
-conda activate raft_ann_benchmarks
-
-# to install GPU package:
-mamba install -c rapidsai -c conda-forge -c nvidia raft-ann-bench=<rapids_version> cuda-version=11.8*
-
-# to install CPU package for usage in CPU-only systems:
-mamba install -c rapidsai -c conda-forge  raft-ann-bench-cpu
-```
-
-The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired. The CPU package currently allows to run the HNSW benchmarks.
-
-Please see the [build instructions](ann_benchmarks_build.md) to build the benchmarks from source.
-
-## Docker
-
-We provide images for GPU enabled systems, as well as systems without a GPU. The following images are available:
-
-- `raft-ann-bench`: Contains GPU and CPU benchmarks, can run all algorithms supported. Will download million-scale datasets as required. Best suited for users that prefer a smaller container size for GPU based systems. Requires the NVIDIA Container Toolkit to run GPU algorithms, can run CPU algorithms without it.
-- `raft-ann-bench-datasets`: Contains the GPU and CPU benchmarks with million-scale datasets already included in the container. Best suited for users that want to run multiple million scale datasets already included in the image.
-- `raft-ann-bench-cpu`: Contains only CPU benchmarks with minimal size. Best suited for users that want the smallest containers to reproduce benchmarks on systems without a GPU.
-
-Nightly images are located in [dockerhub](https://hub.docker.com/r/rapidsai/raft-ann-bench/tags), meanwhile release (stable) versions are located in [NGC](https://hub.docker.com/r/rapidsai/raft-ann-bench), starting with release 23.12.
-
-- The following command pulls the nightly container for python version 10, cuda version 12, and RAFT version 23.10:
-
-```bash
-docker pull rapidsai/raft-ann-bench:24.12a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container.
-```
-
-The CUDA and python versions can be changed for the supported values:
-
-Supported CUDA versions: 11.2 and 12.0
-Supported Python versions: 3.9 and 3.10.
-
-You can see the exact versions as well in the dockerhub site:
-
-- [RAFT ANN Benchmark images](https://hub.docker.com/r/rapidsai/raft-ann-bench/tags)
-- [RAFT ANN Benchmark with datasets preloaded images](https://hub.docker.com/r/rapidsai/raft-ann-bench-cpu/tags)
-- [RAFT ANN Benchmark CPU only images](https://hub.docker.com/r/rapidsai/raft-ann-bench-datasets/tags)
-
-**Note:** GPU containers use the CUDA toolkit from inside the container, the only requirement is a driver installed on the host machine that supports that version. So, for example, CUDA 11.8 containers can run in systems with a CUDA 12.x capable driver. Please also note that the Nvidia-Docker runtime from the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker) is required to use GPUs inside docker containers.
-
-[//]: # (-  The following command &#40;only available after RAPIDS 23.10 release&#41; pulls the container:)
-
-[//]: # ()
-[//]: # (```bash)
-
-[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.12-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.)
-
-[//]: # (```)
-
-## How to run the benchmarks
-
-We provide a collection of lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. 
-1. Prepare Dataset
-2. Build Index and Search Index
-3. Data Export
-4. Plot Results
-
-### Step 1: Prepare Dataset
-The script `raft_ann_bench.get_dataset` will download and unpack the dataset in directory
-that the user provides. As of now, only million-scale datasets are supported by this
-script. For more information on [datasets and formats](ann_benchmarks_dataset.md).
-
-The usage of this script is:
-```bash
-usage: get_dataset.py [-h] [--name NAME] [--dataset-path DATASET_PATH] [--normalize]
-
-options:
-  -h, --help            show this help message and exit
-  --dataset DATASET     dataset to download (default: glove-100-angular)
-  --dataset-path DATASET_PATH
-                        path to download dataset (default: ${RAPIDS_DATASET_ROOT_DIR})
-  --normalize           normalize cosine distance to inner product (default: False)
-```
-
-When option `normalize` is provided to the script, any dataset that has cosine distances
-will be normalized to inner product. So, for example, the dataset `glove-100-angular`
-will be written at location `datasets/glove-100-inner/`.
-
-### Step 2: Build and Search Index
-The script `raft_ann_bench.run` will build and search indices for a given dataset and its
-specified configuration.
-
-The usage of the script `raft_ann_bench.run` is:
-```bash
-usage: __main__.py [-h] [--subset-size SUBSET_SIZE] [-k COUNT] [-bs BATCH_SIZE] [--dataset-configuration DATASET_CONFIGURATION] [--configuration CONFIGURATION] [--dataset DATASET]
-                   [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS] [-f] [-m SEARCH_MODE]
-
-options:
-  -h, --help            show this help message and exit
-  --subset-size SUBSET_SIZE
-                        the number of subset rows of the dataset to build the index (default: None)
-  -k COUNT, --count COUNT
-                        the number of nearest neighbors to search for (default: 10)
-  -bs BATCH_SIZE, --batch-size BATCH_SIZE
-                        number of query vectors to use in each query trial (default: 10000)
-  --dataset-configuration DATASET_CONFIGURATION
-                        path to YAML configuration file for datasets (default: None)
-  --configuration CONFIGURATION
-                        path to YAML configuration file or directory for algorithms Any run groups found in the specified file/directory will automatically override groups of the same name
-                        present in the default configurations, including `base` (default: None)
-  --dataset DATASET     name of dataset (default: glove-100-inner)
-  --dataset-path DATASET_PATH
-                        path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets subdirectory from the calling directory (default:
-                        os.getcwd()/datasets/)
-  --build
-  --search
-  --algorithms ALGORITHMS
-                        run only comma separated list of named algorithms. If parameters `groups` and `algo-groups are both undefined, then group `base` is run by default (default: None)
-  --groups GROUPS       run only comma separated groups of parameters (default: base)
-  --algo-groups ALGO_GROUPS
-                        add comma separated <algorithm>.<group> to run. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large" (default: None)
-  -f, --force           re-run algorithms even if their results already exist (default: False)
-  -m SEARCH_MODE, --search-mode SEARCH_MODE
-                        run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput)
-  -t SEARCH_THREADS, --search-threads SEARCH_THREADS
-                        specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is
-                        specified, then a single test is run with 'min' threads. By default min=1, max=<num hyper threads>. (default: None)
-  -r, --dry-run         dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but
-                        will not actually execute the command. (default: False)
-```
-
-`dataset`: name of the dataset to be searched in [datasets.yaml](#yaml-dataset-config)
-
-`dataset-configuration`: optional filepath to custom dataset YAML config which has an entry for arg `dataset`
-
-`configuration`: optional filepath to YAML configuration for an algorithm or to directory that contains YAML configurations for several algorithms. [Here's how to configure an algorithm.](#yaml-algo-config)
-
-`algorithms`: runs all algorithms that it can find in YAML configs found by `configuration`. By default, only `base` group will be run.
-
-`groups`: run only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group
-
-`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to run the benchmark for in addition to all the arguments from `algorithms` and `groups`. It is of the format `<algorithm>.<group>`, or for example, `raft_cagra.large`
-
-For every algorithm run by this script, it outputs an index build statistics JSON file in `<dataset-path/<dataset>/result/build/<{algo},{group}.json>`
-and an index search statistics JSON file in `<dataset-path/<dataset>/result/search/<{algo},{group},k{k},bs{batch_size}.json>`. NOTE: The filenames will not have ",{group}" if `group = "base"`.
-
-`dataset-path` :
-1. data is read from `<dataset-path>/<dataset>`
-2. indices are built in `<dataset-path>/<dataset>/index`
-3. build/search results are stored in `<dataset-path>/<dataset>/result`
-
-`build` and `search` : if both parameters are not supplied to the script then
-it is assumed both are `True`.
-
-`indices` and `algorithms` : these parameters ensure that the algorithm specified for an index
-is available in `algos.yaml` and not disabled, as well as having an associated executable.
-
-### Step 3: Data Export
-The script `raft_ann_bench.data_export` will convert the intermediate JSON outputs produced by `raft_ann_bench.run` to more
-easily readable CSV files, which are needed to build charts made by `raft_ann_bench.plot`.
-
-```bash
-usage: data_export.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH]
-
-options:
-  -h, --help            show this help message and exit
-  --dataset DATASET     dataset to download (default: glove-100-inner)
-  --dataset-path DATASET_PATH
-                        path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR})
-```
-Build statistics CSV file is stored in `<dataset-path/<dataset>/result/build/<{algo},{group}.csv>`
-and index search statistics CSV file in `<dataset-path/<dataset>/result/search/<{algo},{group},k{k},bs{batch_size},{suffix}.csv>`, where suffix has three values:
-1. `raw`: All search results are exported
-2. `throughput`: Pareto frontier of throughput results is exported
-3. `latency`: Pareto frontier of latency results is exported
-
-
-### Step 4: Plot Results
-The script `raft_ann_bench.plot` will plot results for all algorithms found in index search statistics
-CSV files `<dataset-path/<dataset>/result/search/*.csv`.
-
-The usage of this script is:
-```bash
-usage:  [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filepath OUTPUT_FILEPATH] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS]
-        [-k COUNT] [-bs BATCH_SIZE] [--build] [--search] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--x-start X_START] [--mode {throughput,latency}]
-        [--time-unit {s,ms,us}] [--raw]
-
-options:
-  -h, --help            show this help message and exit
-  --dataset DATASET     dataset to plot (default: glove-100-inner)
-  --dataset-path DATASET_PATH
-                        path to dataset folder (default: /home/coder/raft/datasets/)
-  --output-filepath OUTPUT_FILEPATH
-                        directory for PNG to be saved (default: /home/coder/raft)
-  --algorithms ALGORITHMS
-                        plot only comma separated list of named algorithms. If parameters `groups` and `algo-groups are both undefined, then group `base` is plot by default
-                        (default: None)
-  --groups GROUPS       plot only comma separated groups of parameters (default: base)
-  --algo-groups ALGO_GROUPS, --algo-groups ALGO_GROUPS
-                        add comma separated <algorithm>.<group> to plot. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large" (default: None)
-  -k COUNT, --count COUNT
-                        the number of nearest neighbors to search for (default: 10)
-  -bs BATCH_SIZE, --batch-size BATCH_SIZE
-                        number of query vectors to use in each query trial (default: 10000)
-  --build
-  --search
-  --x-scale X_SCALE     Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear)
-  --y-scale {linear,log,symlog,logit}
-                        Scale to use when drawing the Y-axis (default: linear)
-  --x-start X_START     Recall values to start the x-axis from (default: 0.8)
-  --mode {throughput,latency}
-                        search mode whose Pareto frontier is used on the y-axis (default: throughput)
-  --time-unit {s,ms,us}
-                        time unit to plot when mode is latency (default: ms)
-  --raw                 Show raw results (not just Pareto frontier) of mode arg (default: False)
-```
-`mode`: plots pareto frontier of `throughput` or `latency` results exported in the previous step
-
-`algorithms`: plots all algorithms that it can find results for the specified `dataset`. By default, only `base` group will be plotted.
-
-`groups`: plot only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group
-
-`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to plot results for in addition to all the arguments from `algorithms` and `groups`. It is of the format `<algorithm>.<group>`, or for example, `raft_cagra.large`
-
-The figure below is the resulting plot of running our benchmarks as of August 2023 for a batch size of 10, on an NVIDIA H100 GPU and an Intel Xeon Platinum 8480CL CPU. It presents the throughput (in Queries-Per-Second) performance for every level of recall.
-
-![Throughput vs recall plot comparing popular ANN algorithms with RAFT's at batch size 10](../../img/raft-vector-search-batch-10.png)
-
-## Running the benchmarks
-
-### End to end: small-scale benchmarks (<1M to 10M)
-
-The steps below demonstrate how to download, install, and run benchmarks on a subset of 10M vectors from the Yandex Deep-1B dataset By default the datasets will be stored and used from the folder indicated by the `RAPIDS_DATASET_ROOT_DIR` environment variable if defined, otherwise a datasets sub-folder from where the script is being called:
-
-```bash
-
-# (1) prepare dataset.
-python -m raft_ann_bench.get_dataset --dataset deep-image-96-angular --normalize
-
-# (2) build and search index
-python -m raft_ann_bench.run --dataset deep-image-96-inner --algorithms raft_cagra --batch-size 10 -k 10
-
-# (3) export data
-python -m raft_ann_bench.data_export --dataset deep-image-96-inner
-
-# (4) plot results
-python -m raft_ann_bench.plot --dataset deep-image-96-inner
-```
-
-Configuration files already exist for the following list of the million-scale datasets. Please refer to [ann-benchmarks datasets](https://github.com/erikbern/ann-benchmarks/#data-sets) for more information, including actual train and sizes. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `$CONDA_PREFIX/lib/python3.xx/site-packages/raft_ann_bench/run/conf`, or you can specify the `--configuration` option to use a specific file.
-
-| Dataset Name | Train Rows | Columns | Test Rows      | Distance   | 
-|-----|------------|----|----------------|------------|
-| `deep-image-96-angular` | 10M        | 96 | 10K            | Angular    |
-| `fashion-mnist-784-euclidean` | 60K        | 784 | 10K |  Euclidean |
-| `glove-50-angular` | 1.1M       | 50 | 10K | Angular |
-| `glove-100-angular` | 1.1M | 100 | 10K | Angular |
-| `mnist-784-euclidean` | 60K | 784 | 10K | Euclidean |
-| `nytimes-256-angular` | 290K | 256 | 10K | Angular |
-| `sift-128-euclidean` | 1M | 128 | 10K | Euclidean|
-
-All of the datasets above contain ground test datasets with 100 neighbors. Thus `k` for these datasets must be  less than or equal to 100.
-
-### End to end: large-scale benchmarks (>10M vectors)
-
-`raft_ann_bench.get_dataset` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale)
-due to their size. You should instead use our billion-scale datasets guide to download and prepare them.
-All other python commands mentioned below work as intended once the
-billion-scale dataset has been downloaded.
-To download billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-benchmarks.com/neurips21.html)
-
-We also provide a new dataset called `wiki-all` containing 88 million 768-dimensional vectors. This dataset is meant for benchmarking a realistic retrieval-augmented generation (RAG)/LLM embedding size at scale. It also contains 1M and 10M vector subsets for smaller-scale experiments. See our [Wiki-all Dataset Guide](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/) for more information and to download the dataset.
-
-The steps below demonstrate how to download, install, and run benchmarks on a subset of 100M vectors from the Yandex Deep-1B dataset. Please note that datasets of this scale are recommended for GPUs with larger amounts of memory, such as the A100 or H100. 
-```bash
-
-mkdir -p datasets/deep-1B
-# (1) prepare dataset
-# download manually "Ground Truth" file of "Yandex DEEP"
-# suppose the file name is deep_new_groundtruth.public.10K.bin
-python -m raft_ann_bench.split_groundtruth --groundtruth datasets/deep-1B/deep_new_groundtruth.public.10K.bin
-# two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
-
-# (2) build and search index
-python -m raft_ann_bench.run --dataset deep-1B --algorithms raft_cagra --batch-size 10 -k 10
-
-# (3) export data
-python -m raft_ann_bench.data_export --dataset deep-1B
-
-# (4) plot results
-python -m raft_ann_bench.plot --dataset deep-1B
-```
-
-The usage of `python -m raft_ann_bench.split_groundtruth` is:
-```bash
-usage: split_groundtruth.py [-h] --groundtruth GROUNDTRUTH
-
-options:
-  -h, --help            show this help message and exit
-  --groundtruth GROUNDTRUTH
-                        Path to billion-scale dataset groundtruth file (default: None)
-```
-
-### Running with Docker containers
-
-Two methods are provided for running the benchmarks with the Docker containers. 
-
-#### End-to-end run on GPU
-
-When no other entrypoint is provided, an end-to-end script will run through all the steps in [Running the benchmarks](#running-the-benchmarks) above. 
-
-For GPU-enabled systems, the `DATA_FOLDER` variable should be a local folder where you want datasets stored in `$DATA_FOLDER/datasets` and results in `$DATA_FOLDER/result` (we highly recommend `$DATA_FOLDER` to be a dedicated folder for the datasets and results of the containers):
-```bash
-export DATA_FOLDER=path/to/store/datasets/and/results
-docker run --gpus all --rm -it -u $(id -u)                      \
-    -v $DATA_FOLDER:/data/benchmarks                            \
-    rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10              \
-    "--dataset deep-image-96-angular"                           \
-    "--normalize"                                               \
-    "--algorithms raft_cagra,raft_ivf_pq --batch-size 10 -k 10" \
-    ""
-```
-
-Usage of the above command is as follows:
-
-| Argument                                                  | Description                                                                                        |
-|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------|
-| `rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10`          | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets`                          |
-| `"--dataset deep-image-96-angular"`                       | Dataset name                                                                                       |
-| `"--normalize"`                                           | Whether to normalize the dataset                                                                   |
-| `"--algorithms raft_cagra,hnswlib --batch-size 10 -k 10"` | Arguments passed to the `run` script, such as the algorithms to benchmark, the batch size, and `k` |
-| `""`                                                      | Additional (optional) arguments that will be passed to the `plot` script.                          |
-
-***Note about user and file permissions:*** The flag `-u $(id -u)` allows the user inside the container to match the `uid` of the user outside the container, allowing the container to read and write to the mounted volume indicated by the `$DATA_FOLDER` variable.
-
-#### End-to-end run on CPU
-
-The container arguments in the above section also be used for the CPU-only container, which can be used on systems that don't have a GPU installed. 
-
-***Note:*** the image changes to `raft-ann-bench-cpu` container and the `--gpus all` argument is no longer used:
-```bash
-export DATA_FOLDER=path/to/store/datasets/and/results
-docker run  --rm -it -u $(id -u)                  \
-    -v $DATA_FOLDER:/data/benchmarks              \
-    rapidsai/raft-ann-bench-cpu:24.12a-py3.10     \
-     "--dataset deep-image-96-angular"            \
-     "--normalize"                                \
-     "--algorithms hnswlib --batch-size 10 -k 10" \
-     ""
-```
-
-#### Manually run the scripts inside the container
-
-All of the `raft-ann-bench` images contain the Conda packages, so they can be used directly by logging directly into the container itself:
-
-```bash
-export DATA_FOLDER=path/to/store/datasets/and/results
-docker run --gpus all --rm -it -u $(id -u)          \
-    --entrypoint /bin/bash                          \
-    --workdir /data/benchmarks                      \
-    -v $DATA_FOLDER:/data/benchmarks                \
-    rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10 
-```
-
-This will drop you into a command line in the container, with the `raft-ann-bench` python package ready to use, as described in the [Running the benchmarks](#running-the-benchmarks) section above:
-
-```
-(base) root@00b068fbb862:/data/benchmarks# python -m raft_ann_bench.get_dataset --dataset deep-image-96-angular --normalize
-```
-
-Additionally, the containers can be run in detached mode without any issue.
-
-
-### Evaluating the results
-
-The benchmarks capture several different measurements. The table below describes each of the measurements for index build benchmarks:
-
-| Name       | Description                                            | 
-|------------|--------------------------------------------------------|
-| Benchmark  | A name that uniquely identifies the benchmark instance | 
-| Time       | Wall-time spent training the index                     | 
-| CPU        | CPU time spent training the index                      |
-| Iterations | Number of iterations (this is usually 1)               |
-| GPU        | GPU time spent building                                |
-| index_size | Number of vectors used to train index |
-
-
-The table below describes each of the measurements for the index search benchmarks. The most important measurements `Latency`, `items_per_second`, `end_to_end`.
-
-| Name       | Description                                                                                                                                           |
-|------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Benchmark  | A name that uniquely identifies the benchmark instance                                                                                                |
-| Time       | The wall-clock time of a single iteration (batch) divided by the number of threads.                                                                   |
-| CPU        | The average CPU time (user + sys time). This does not include idle time (which can also happen while waiting for GPU sync).                           |
-| Iterations | Total number of batches. This is going to be `total_queries` / `n_queries`.                                                                            | 
-| GPU        | GPU latency of a single batch (seconds). In throughput mode this is averaged over multiple threads.                                                   |
-| Latency    | Latency of a single batch (seconds), calculated from wall-clock time. In throughput mode this is averaged over multiple threads.                       |
-| Recall     | Proportion of correct neighbors to ground truth neighbors. Note this column is only present if groundtruth file is specified in dataset configuration.|
-| items_per_second | Total throughput, a.k.a Queries per second (QPS). This is approximately `total_queries` / `end_to_end`.                                         |
-| k          | Number of neighbors being queried in each iteration                                                                                                   |
-| end_to_end | Total time taken to run all batches for all iterations                                                                                                | 
-| n_queries  | Total number of query vectors in each batch                                                                                                           |
-| total_queries | Total number of vectors queries across all iterations ( = `iterations` * `n_queries`)                                                                 |
-
-Note the following:
-- A slightly different method is used to measure `Time` and `end_to_end`. That is why `end_to_end` = `Time` * `Iterations` holds only approximately.
-- The actual table displayed on the screen may differ slightly as the hyper-parameters will also be displayed for each different combination being benchmarked.
-- Recall calculation: the number of queries processed per test depends on the number of iterations. Because of this, recall can show slight fluctuations if less neighbors are processed then it is available for the benchmark. 
-
-## Creating and customizing dataset configurations
-
-A single configuration will often define a set of algorithms, with associated index and search parameters, that can be generalize across datasets. We use YAML to define dataset specific and algorithm specific configurations.
-
-<a id='yaml-dataset-config'></a>A default `datasets.yaml` is provided by RAFT in `${RAFT_HOME}/python/raft-ann-bench/src/raft_ann_bench/run/conf` with configurations available for several datasets. Here's a simple example entry for the `sift-128-euclidean` dataset:
-
-```yaml
-- name: sift-128-euclidean
-  base_file: sift-128-euclidean/base.fbin
-  query_file: sift-128-euclidean/query.fbin
-  groundtruth_neighbors_file: sift-128-euclidean/groundtruth.neighbors.ibin
-  dims: 128
-  distance: euclidean
-```
-
-<a id='yaml-algo-config'></a>Configuration files for ANN algorithms supported by `raft-ann-bench` are provided in `${RAFT_HOME}/python/raft-ann-bench/src/raft_ann_bench/run/conf`. `raft_cagra` algorithm configuration looks like:
-```yaml
-name: raft_cagra
-groups:
-  base:
-    build:
-      graph_degree: [32, 64]
-      intermediate_graph_degree: [64, 96]
-      graph_build_algo: ["NN_DESCENT"]
-    search:
-      itopk: [32, 64, 128]
-
-  large:
-    build:
-      graph_degree: [32, 64]
-    search:
-      itopk: [32, 64, 128]
-```
-The default parameters for which the benchmarks are run can be overridden by creating a custom YAML file for algorithms with a `base` group.
-
-There config above has 2 fields:
-1. `name` - define the name of the algorithm for which the parameters are being specified.
-2. `groups` - define a run group which has a particular set of parameters. Each group helps create a cross-product of all hyper-parameter fields for `build` and `search`.
-
-The table below contains all algorithms supported by RAFT. Each unique algorithm will have its own set of `build` and `search` settings. The [ANN Algorithm Parameter Tuning Guide](ann_benchmarks_param_tuning.md) contains detailed instructions on choosing build and search parameters for each supported algorithm.
-
-| Library   | Algorithms                                                                            |
-|-----------|---------------------------------------------------------------------------------------|
-| FAISS GPU | `faiss_gpu_flat`, `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq`                            |
-| FAISS CPU | `faiss_cpu_flat`, `faiss_cpu_ivf_flat`, `faiss_cpu_ivf_pq`                            |
-| GGNN      | `ggnn`                                                                                |
-| HNSWlib   | `hnswlib`                                                                             |
-| RAFT      | `raft_brute_force`, `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq`, `raft_cagra_hnswlib`|
-
-## Adding a new ANN algorithm
-
-### Implementation and Configuration
-Implementation of a new algorithm should be a C++ class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
-
-In addition, it should define two `struct`s for building and searching parameters. The searching parameter class should inherit `struct ANN<T>::AnnSearchParam`. Take `class HnswLib` as an example, its definition is:
-```c++
-template<typename T>
-class HnswLib : public ANN<T> {
-public:
-  struct BuildParam {
-    int M;
-    int ef_construction;
-    int num_threads;
-  };
-
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    int ef;
-    int num_threads;
-  };
-
-  // ...
-};
-```
-
-<a id='json-index-config'></a>The benchmark program uses JSON format in a configuration file to specify indexes to build, along with the build and search parameters. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. The `build_param` and `search_param` arguments will vary depending on the algorithm.  Take the configuration for `HnswLib` as an example:
-```json
-{
-  "name" : "hnswlib.M12.ef500.th32",
-  "algo" : "hnswlib",
-  "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-  "file" : "/path/to/file",
-  "search_params" : [
-    {"ef":10, "numThreads":1},
-    {"ef":20, "numThreads":1},
-    {"ef":40, "numThreads":1},
-  ],
-  "search_result_file" : "/path/to/file"
-},
-```
-How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
-1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
-    ```c++
-    template<typename T>
-    void parse_build_param(const nlohmann::json& conf,
-                           typename cuann::HnswLib<T>::BuildParam& param) {
-      param.ef_construction = conf.at("efConstruction");
-      param.M = conf.at("M");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-
-    template<typename T>
-    void parse_search_param(const nlohmann::json& conf,
-                            typename cuann::HnswLib<T>::SearchParam& param) {
-      param.ef = conf.at("ef");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-    ```
-
-2. Next, add corresponding `if` case to functions `create_algo()` (in `cpp/bench/ann/) and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
-    ```c++
-      // JSON configuration file contains a line like:  "algo" : "hnswlib"
-      if (algo == "hnswlib") {
-         // ...
-      }
-    ```
-
-
-### Adding a CMake Target
-In `raft/cpp/bench/ann/CMakeLists.txt`, we provide a `CMake` function to configure a new Benchmark target with the following signature:
-```
-ConfigureAnnBench(
-  NAME <algo_name> 
-  PATH </path/to/algo/benchmark/source/file> 
-  INCLUDES <additional_include_directories> 
-  CXXFLAGS <additional_cxx_flags>
-  LINKS <additional_link_library_targets>
-)
-```
-
-To add a target for `HNSWLIB`, we would call the function as:
-```
-ConfigureAnnBench(
-  NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES
-  ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib CXXFLAGS "${HNSW_CXX_FLAGS}"
-)
-```
-
-This will create an executable called `HNSWLIB_ANN_BENCH`, which can then be used to run `HNSWLIB` benchmarks.
-
-Add a new entry to `algos.yaml` to map the name of the algorithm to its binary executable and specify whether the algorithm requires GPU support.
-```yaml
-raft_ivf_pq:
-  executable: RAFT_IVF_PQ_ANN_BENCH
-  requires_gpu: true
-```
-
-`executable` : specifies the name of the binary that will build/search the index. It is assumed to be
-available in `raft/cpp/build/`.
-`requires_gpu` : denotes whether an algorithm requires GPU to run.
diff --git a/docs/source/vector_search_tutorial.md b/docs/source/vector_search_tutorial.md
index d1d5c57700..8f7b2d1bfd 100644
--- a/docs/source/vector_search_tutorial.md
+++ b/docs/source/vector_search_tutorial.md
@@ -17,7 +17,7 @@
 
 RAFT has several important algorithms for performing vector search on the GPU and this tutorial walks through the primary vector search APIs from start to finish to provide a reference for quick setup and C++ API usage.
 
-This tutorial assumes RAFT has been installed and/or added to your build so that you are able to compile and run RAFT code. If not done already, please follow the [build and install instructions](build.md) and consider taking a look at the [example c++ template project](https://github.com/rapidsai/raft/tree/HEAD/cpp/template) for ready-to-go examples that you can immediately build and start playing with. Also take a look at RAFT's library of [reproducible vector search benchmarks](raft_ann_benchmarks.md) to run benchmarks that compare RAFT against other state-of-the-art nearest neighbors algorithms at scale.
+This tutorial assumes RAFT has been installed and/or added to your build so that you are able to compile and run RAFT code. If not done already, please follow the [build and install instructions](build.md) and consider taking a look at the [example c++ template project](https://github.com/rapidsai/raft/tree/HEAD/cpp/template) for ready-to-go examples that you can immediately build and start playing with.
 
 For more information about the various APIs demonstrated in this tutorial, along with comprehensive usage examples of all the APIs offered by RAFT, please refer to the [RAFT's C++ API Documentation](https://docs.rapids.ai/api/raft/nightly/cpp_api/).
 
@@ -271,7 +271,7 @@ auto removed_indices = raft::make_device_vector<IdxT>(res, n_removed_indices);
 raft::core::bitset<std::uint32_t, IdxT> removed_indices_bitset(
   res, removed_indices.view(), dataset.extent(0));
 
-// ... Populate the bitset ... 
+// ... Populate the bitset ...
 
 // search K nearest neighbours according to a bitset filter
 auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
@@ -406,4 +406,4 @@ The below example specifies the total number of bytes that RAFT can use for temp
 
 std::shared_ptr<rmm::mr::managed_memory_resource> managed_resource;
 raft::device_resource res(managed_resource, std::make_optional<std::size_t>(3 * 1024^3));
-```
\ No newline at end of file
+```
diff --git a/docs/source/wiki_all_dataset.md b/docs/source/wiki_all_dataset.md
deleted file mode 100644
index c001bdc409..0000000000
--- a/docs/source/wiki_all_dataset.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# Wiki-all Dataset
-
-The `wiki-all` dataset was created to stress vector search algorithms at scale with both a large number of vectors and dimensions. The entire dataset contains 88M vectors with 768 dimensions and is meant for testing the types of vectors one would typically encounter in retrieval augmented generation (RAG) workloads. The full dataset is ~251GB in size, which is intentionally larger than the typical memory of GPUs. The massive scale is intended to promote the use of compression and efficient out-of-core methods for both indexing and search.
-
-The dataset is composed of English wiki texts from [Kaggle](https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) and multi-lingual wiki texts from [Cohere Wikipedia](https://huggingface.co/datasets/Cohere/wikipedia-22-12). 
-
-Cohere's English Texts are older (2022) and smaller than the Kaggle English Wiki texts (2023) so the English texts have been removed from Cohere completely. The final Wiki texts include English Wiki from Kaggle and the other languages from Cohere. The English texts constitute 50% of the total text size. 
-
-To form the final dataset, the Wiki texts were chunked into 85 million 128-token pieces. For reference, Cohere chunks Wiki texts into 104-token pieces. Finally, the embeddings of each chunk were computed using the [paraphrase-multilingual-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2) embedding model. The resulting dataset is an embedding matrix of size 88 million by 768. Also included with the dataset is a query file containing 10k query vectors and a groundtruth file to evaluate nearest neighbors algorithms.
-
-## Getting the dataset
-
-### Full dataset
-
-A version of the dataset is made available in the binary format that can be used directly by the [raft-ann-bench](https://docs.rapids.ai/api/raft/nightly/raft_ann_benchmarks/) tool. The full 88M dataset is ~251GB and the download link below contains tarballs that have been split into multiple parts.
-
-The following will download all 10 the parts and untar them to a `wiki_all_88M` directory:
-```bash
-curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.{00..9} | tar -xf - -C wiki_all_88M/
-```
-
-The above has the unfortunate drawback that if the command should fail for any reason, all the parts need to be re-downloaded. The files can also be downloaded individually and then untarred to the directory. Each file is ~27GB and there are 10 of them.
-
-```bash
-curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.00
-...
-curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.09
-
-cat wiki_all.tar.* | tar -xf - -C wiki_all_88M/
-```
-
-### 1M and 10M subsets
-
-Also available are 1M and 10M subsets of the full dataset which are 2.9GB and 29GB, respectively. These subsets also include query sets of 10k vectors and corresponding groundtruth files. 
-
-```bash
-curl -s https://data.rapids.ai/raft/datasets/wiki_all_1M/wiki_all_1M.tar
-curl -s https://data.rapids.ai/raft/datasets/wiki_all_10M/wiki_all_10M.tar
-```
-
-## Using the dataset
-
-After the dataset is downloaded and extracted to the `wiki_all_88M` directory (or `wiki_all_1M`/`wiki_all_10M` depending on whether the subsets are used), the files can be used in the benchmarking tool. The dataset name is `wiki_all` (or `wiki_all_1M`/`wiki_all_10M`), and the benchmarking tool can be used by specifying the appropriate name `--dataset wiki_all_88M` in the scripts. 
-
-## License info
-
-The English wiki texts available on Kaggle come with the [CC BY-NCSA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license and the Cohere wikipedia data set comes with the [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) license.
\ No newline at end of file
diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt
index 197ddae05f..9ebbaa5298 100644
--- a/python/raft-dask/CMakeLists.txt
+++ b/python/raft-dask/CMakeLists.txt
@@ -45,7 +45,6 @@ if(NOT raft_FOUND)
   # raft-dask doesn't actually use raft libraries, it just needs the headers, so we can turn off all
   # library compilation and we don't need to install anything here.
   set(BUILD_TESTS OFF)
-  set(BUILD_ANN_BENCH OFF)
   set(BUILD_PRIMS_BENCH OFF)
   set(RAFT_COMPILE_LIBRARIES OFF)
   set(RAFT_COMPILE_DIST_LIBRARY OFF)