From fccf33e367e40f3b459973025aef0dc60ea08df8 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 15 Nov 2024 21:04:10 -0500
Subject: [PATCH] Remove raft-ann-bench (#2497)

This PR removes raft-ann-bench from the conda packages, build system, and documentation. This removal was previously announced for the 24.12 release in #2448.

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ben Frederickson (https://github.com/benfred)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/raft/pull/2497
---
 .github/workflows/pr.yaml                     |    2 +-
 .gitignore                                    |    1 -
 .pre-commit-config.yaml                       |    3 +-
 README.md                                     |    4 +-
 build.sh                                      |   46 +-
 ci/build_python.sh                            |   25 -
 ci/release/update-version.sh                  |    2 -
 .../bench_ann_cuda-118_arch-aarch64.yaml      |   46 -
 .../bench_ann_cuda-118_arch-x86_64.yaml       |   46 -
 .../bench_ann_cuda-120_arch-aarch64.yaml      |   42 -
 .../bench_ann_cuda-120_arch-x86_64.yaml       |   42 -
 conda/recipes/libraft/conda_build_config.yaml |   15 -
 conda/recipes/raft-ann-bench-cpu/build.sh     |    5 -
 .../conda_build_config.yaml                   |   29 -
 conda/recipes/raft-ann-bench-cpu/meta.yaml    |   69 -
 conda/recipes/raft-ann-bench/build.sh         |    5 -
 .../raft-ann-bench/conda_build_config.yaml    |   70 -
 conda/recipes/raft-ann-bench/meta.yaml        |  110 --
 cpp/CMakeLists.txt                            |   88 +-
 cpp/bench/ann/CMakeLists.txt                  |  349 -----
 cpp/bench/ann/README.md                       |    3 -
 cpp/bench/ann/src/common/ann_types.hpp        |  168 --
 cpp/bench/ann/src/common/benchmark.cpp        |  111 --
 cpp/bench/ann/src/common/benchmark.hpp        |  736 ---------
 cpp/bench/ann/src/common/conf.hpp             |  161 --
 .../src/common/cuda_huge_page_resource.hpp    |  104 --
 .../ann/src/common/cuda_pinned_resource.hpp   |   98 --
 cpp/bench/ann/src/common/cuda_stub.hpp        |  240 ---
 cpp/bench/ann/src/common/dataset.hpp          |  495 ------
 cpp/bench/ann/src/common/thread_pool.hpp      |  134 --
 cpp/bench/ann/src/common/util.hpp             |  557 -------
 .../ann/src/faiss/faiss_cpu_benchmark.cpp     |  163 --
 cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h   |  326 ----
 .../ann/src/faiss/faiss_gpu_benchmark.cu      |  192 ---
 cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h   |  515 -------
 cpp/bench/ann/src/ggnn/ggnn_benchmark.cu      |  130 --
 cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh       |  322 ----
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |  120 --
 cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h   |  239 ---
 .../src/raft/raft_ann_bench_param_parser.h    |  275 ----
 cpp/bench/ann/src/raft/raft_ann_bench_utils.h |  255 ----
 cpp/bench/ann/src/raft/raft_benchmark.cu      |  140 --
 cpp/bench/ann/src/raft/raft_cagra_float.cu    |   20 -
 cpp/bench/ann/src/raft/raft_cagra_half.cu     |   20 -
 cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu  |  100 --
 .../ann/src/raft/raft_cagra_hnswlib_wrapper.h |  108 --
 cpp/bench/ann/src/raft/raft_cagra_int8_t.cu   |   20 -
 cpp/bench/ann/src/raft/raft_cagra_uint8_t.cu  |   20 -
 cpp/bench/ann/src/raft/raft_cagra_wrapper.h   |  339 -----
 cpp/bench/ann/src/raft/raft_ivf_flat.cu       |   22 -
 .../ann/src/raft/raft_ivf_flat_wrapper.h      |  165 --
 cpp/bench/ann/src/raft/raft_ivf_pq.cu         |   23 -
 cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h  |  203 ---
 cpp/bench/ann/src/raft/raft_wrapper.h         |  156 --
 cpp/cmake/config.json                         |    2 +-
 cpp/cmake/modules/FindAVX.cmake               |  110 --
 cpp/cmake/patches/faiss_override.json         |    9 -
 cpp/cmake/patches/ggnn.diff                   |  230 ---
 cpp/cmake/patches/ggnn_override.json          |   16 -
 cpp/cmake/thirdparty/get_faiss.cmake          |  119 --
 cpp/cmake/thirdparty/get_fmt.cmake            |   22 -
 cpp/cmake/thirdparty/get_ggnn.cmake           |   50 -
 cpp/cmake/thirdparty/get_glog.cmake           |   48 -
 cpp/cmake/thirdparty/get_nlohmann_json.cmake  |   39 -
 cpp/template/cmake/thirdparty/get_raft.cmake  |    1 -
 dependencies.yaml                             |   46 -
 docs/source/ann_benchmarks_build.md           |   51 -
 docs/source/ann_benchmarks_dataset.md         |   63 -
 docs/source/ann_benchmarks_low_level.md       |  219 ---
 docs/source/ann_benchmarks_param_tuning.md    |  178 ---
 docs/source/build.md                          |   25 +-
 docs/source/index.rst                         |    1 -
 docs/source/raft_ann_benchmarks.md            |  597 --------
 docs/source/vector_search_tutorial.md         |    6 +-
 docs/source/wiki_all_dataset.md               |   47 -
 python/pylibraft/CMakeLists.txt               |    1 -
 python/raft-ann-bench/LICENSE                 |    1 -
 python/raft-ann-bench/pyproject.toml          |   71 -
 .../raft-ann-bench/src/raft_ann_bench/VERSION |    1 -
 .../src/raft_ann_bench/__init__.py            |   16 -
 .../src/raft_ann_bench/_version.py            |   34 -
 .../raft_ann_bench/constraints/__init__.py    |   77 -
 .../raft_ann_bench/data_export/__main__.py    |  257 ----
 .../generate_groundtruth/__main__.py          |  240 ---
 .../generate_groundtruth/utils.py             |  103 --
 .../raft_ann_bench/get_dataset/__main__.py    |  115 --
 .../get_dataset/fbin_to_f16bin.py             |   49 -
 .../get_dataset/hdf5_to_fbin.py               |   90 --
 .../src/raft_ann_bench/plot/__main__.py       |  623 --------
 .../src/raft_ann_bench/run/__main__.py        |  614 --------
 .../src/raft_ann_bench/run/algos.yaml         |   42 -
 .../run/conf/algos/faiss_cpu_flat.yaml        |    5 -
 .../run/conf/algos/faiss_cpu_ivf_flat.yaml    |   10 -
 .../run/conf/algos/faiss_cpu_ivf_pq.yaml      |   18 -
 .../run/conf/algos/faiss_gpu_flat.yaml        |    5 -
 .../run/conf/algos/faiss_gpu_ivf_flat.yaml    |   21 -
 .../run/conf/algos/faiss_gpu_ivf_pq.yaml      |   77 -
 .../run/conf/algos/hnswlib.yaml               |   10 -
 .../run/conf/algos/raft_brute_force.yaml      |    5 -
 .../run/conf/algos/raft_cagra.yaml            |   13 -
 .../run/conf/algos/raft_cagra_hnswlib.yaml    |   11 -
 .../run/conf/algos/raft_ivf_flat.yaml         |    9 -
 .../run/conf/algos/raft_ivf_pq.yaml           |   41 -
 .../raft_ann_bench/run/conf/bigann-100M.json  |  192 ---
 .../src/raft_ann_bench/run/conf/datasets.yaml |  127 --
 .../raft_ann_bench/run/conf/deep-100M.json    |  458 ------
 .../src/raft_ann_bench/run/conf/deep-1B.json  |   34 -
 .../run/conf/deep-image-96-inner.json         | 1013 ------------
 .../run/conf/fashion-mnist-784-euclidean.json | 1352 -----------------
 .../run/conf/gist-960-euclidean.json          | 1351 ----------------
 .../run/conf/glove-100-angular.json           | 1351 ----------------
 .../run/conf/glove-100-inner.json             | 1314 ----------------
 .../run/conf/glove-50-angular.json            | 1351 ----------------
 .../run/conf/glove-50-inner.json              | 1351 ----------------
 .../run/conf/lastfm-65-angular.json           | 1351 ----------------
 .../run/conf/mnist-784-euclidean.json         | 1352 -----------------
 .../run/conf/nytimes-256-angular.json         | 1352 -----------------
 .../run/conf/nytimes-256-inner.json           | 1352 -----------------
 .../run/conf/sift-128-euclidean.json          |  498 ------
 .../raft_ann_bench/run/conf/wiki_all_10M.json |  200 ---
 .../raft_ann_bench/run/conf/wiki_all_1M.json  |  216 ---
 .../raft_ann_bench/run/conf/wiki_all_88M.json |  200 ---
 .../split_groundtruth/__main__.py             |   57 -
 .../split_groundtruth/split_groundtruth.pl    |   45 -
 python/raft-dask/CMakeLists.txt               |    1 -
 125 files changed, 46 insertions(+), 28589 deletions(-)
 delete mode 100644 conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
 delete mode 100644 conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
 delete mode 100644 conda/environments/bench_ann_cuda-120_arch-aarch64.yaml
 delete mode 100644 conda/environments/bench_ann_cuda-120_arch-x86_64.yaml
 delete mode 100644 conda/recipes/raft-ann-bench-cpu/build.sh
 delete mode 100644 conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
 delete mode 100644 conda/recipes/raft-ann-bench-cpu/meta.yaml
 delete mode 100644 conda/recipes/raft-ann-bench/build.sh
 delete mode 100644 conda/recipes/raft-ann-bench/conda_build_config.yaml
 delete mode 100644 conda/recipes/raft-ann-bench/meta.yaml
 delete mode 100644 cpp/bench/ann/CMakeLists.txt
 delete mode 100644 cpp/bench/ann/README.md
 delete mode 100644 cpp/bench/ann/src/common/ann_types.hpp
 delete mode 100644 cpp/bench/ann/src/common/benchmark.cpp
 delete mode 100644 cpp/bench/ann/src/common/benchmark.hpp
 delete mode 100644 cpp/bench/ann/src/common/conf.hpp
 delete mode 100644 cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
 delete mode 100644 cpp/bench/ann/src/common/cuda_pinned_resource.hpp
 delete mode 100644 cpp/bench/ann/src/common/cuda_stub.hpp
 delete mode 100644 cpp/bench/ann/src/common/dataset.hpp
 delete mode 100644 cpp/bench/ann/src/common/thread_pool.hpp
 delete mode 100644 cpp/bench/ann/src/common/util.hpp
 delete mode 100644 cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
 delete mode 100644 cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
 delete mode 100644 cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
 delete mode 100644 cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
 delete mode 100644 cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
 delete mode 100644 cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
 delete mode 100644 cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
 delete mode 100644 cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
 delete mode 100644 cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
 delete mode 100644 cpp/bench/ann/src/raft/raft_ann_bench_utils.h
 delete mode 100644 cpp/bench/ann/src/raft/raft_benchmark.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_cagra_float.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_cagra_half.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h
 delete mode 100644 cpp/bench/ann/src/raft/raft_cagra_int8_t.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_cagra_uint8_t.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_cagra_wrapper.h
 delete mode 100644 cpp/bench/ann/src/raft/raft_ivf_flat.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
 delete mode 100644 cpp/bench/ann/src/raft/raft_ivf_pq.cu
 delete mode 100644 cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
 delete mode 100644 cpp/bench/ann/src/raft/raft_wrapper.h
 delete mode 100644 cpp/cmake/modules/FindAVX.cmake
 delete mode 100644 cpp/cmake/patches/faiss_override.json
 delete mode 100644 cpp/cmake/patches/ggnn.diff
 delete mode 100644 cpp/cmake/patches/ggnn_override.json
 delete mode 100644 cpp/cmake/thirdparty/get_faiss.cmake
 delete mode 100644 cpp/cmake/thirdparty/get_fmt.cmake
 delete mode 100644 cpp/cmake/thirdparty/get_ggnn.cmake
 delete mode 100644 cpp/cmake/thirdparty/get_glog.cmake
 delete mode 100644 cpp/cmake/thirdparty/get_nlohmann_json.cmake
 delete mode 100644 docs/source/ann_benchmarks_build.md
 delete mode 100644 docs/source/ann_benchmarks_dataset.md
 delete mode 100644 docs/source/ann_benchmarks_low_level.md
 delete mode 100644 docs/source/ann_benchmarks_param_tuning.md
 delete mode 100644 docs/source/raft_ann_benchmarks.md
 delete mode 100644 docs/source/wiki_all_dataset.md
 delete mode 120000 python/raft-ann-bench/LICENSE
 delete mode 100644 python/raft-ann-bench/pyproject.toml
 delete mode 120000 python/raft-ann-bench/src/raft_ann_bench/VERSION
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/__init__.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/_version.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/data_export/__main__.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/__main__.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/utils.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/get_dataset/__main__.py
 delete mode 100755 python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py
 delete mode 100755 python/raft-ann-bench/src/raft_ann_bench/get_dataset/hdf5_to_fbin.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/plot/__main__.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/__main__.py
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/algos.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_flat.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_flat.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/hnswlib.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_brute_force.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra_hnswlib.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_flat.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/bigann-100M.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/datasets.yaml
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-100M.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-1B.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-image-96-inner.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/fashion-mnist-784-euclidean.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/gist-960-euclidean.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-angular.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-angular.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-inner.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/lastfm-65-angular.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/mnist-784-euclidean.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-angular.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-inner.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/sift-128-euclidean.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_10M.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_1M.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_88M.json
 delete mode 100644 python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/__main__.py
 delete mode 100755 python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index fe8e730921..82e56cd95d 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -151,5 +151,5 @@ jobs:
       cuda: '["12.5"]'
       build_command: |
         sccache -z;
-        build-all -DBUILD_PRIMS_BENCH=ON -DBUILD_ANN_BENCH=ON --verbose;
+        build-all -DBUILD_PRIMS_BENCH=ON --verbose;
         sccache -s;
diff --git a/.gitignore b/.gitignore
index 11b7bc3eba..3d6c84a83f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,7 +25,6 @@ log
 dask-worker-space/
 *.egg-info/
 *.bin
-bench/ann/data
 temporary_*.json
 
 ## scikit-build
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5a5342a74e..d8ccf92ce5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -62,7 +62,7 @@ repos:
                 entry: ./cpp/scripts/run-cmake-format.sh cmake-format
                 language: python
                 types: [cmake]
-                exclude: .*/thirdparty/.*|.*FindAVX.cmake.*
+                exclude: .*/thirdparty/.*
                 # Note that pre-commit autoupdate does not update the versions
                 # of dependencies, so we'll have to update this manually.
                 additional_dependencies:
@@ -114,7 +114,6 @@ repos:
                   cpp/include/raft/neighbors/detail/faiss_select/|
                   cpp/include/raft/thirdparty/|
                   docs/source/sphinxext/github_link[.]py|
-                  cpp/cmake/modules/FindAVX[.]cmake
           - id: verify-alpha-spec
       - repo: https://github.com/rapidsai/dependency-file-generator
         rev: v1.16.0
diff --git a/README.md b/README.md
index 8870e9385e..7f43eb89dc 100755
--- a/README.md
+++ b/README.md
@@ -223,7 +223,7 @@ pairwise_distance(in1, in2, out=output, metric="euclidean")
 
 ## Installing
 
-RAFT's C++ and Python libraries can both be installed through Conda and the Python libraries through Pip. 
+RAFT's C++ and Python libraries can both be installed through Conda and the Python libraries through Pip.
 
 
 ### Installing C++ and Python through Conda
@@ -233,8 +233,6 @@ The easiest way to install RAFT is through conda and several packages are provid
 - `libraft` (optional) C++ shared library containing pre-compiled template instantiations and runtime API.
 - `pylibraft` (optional) Python library
 - `raft-dask` (optional) Python library for deployment of multi-node multi-GPU algorithms that use the RAFT `raft::comms` abstraction layer in Dask clusters.
-- `raft-ann-bench` (optional) Benchmarking tool for easily producing benchmarks that compare RAFT's vector search algorithms against other state-of-the-art implementations.
-- `raft-ann-bench-cpu` (optional) Reproducible benchmarking tool similar to above, but doesn't require CUDA to be installed on the machine. Can be used to test in environments with competitive CPUs.
 
 Use the following command, depending on your CUDA version, to install all of the RAFT packages with conda (replace `rapidsai` with `rapidsai-nightly` to install more up-to-date but less stable nightly packages). `mamba` is preferred over the `conda` command.
 ```bash
diff --git a/build.sh b/build.sh
index feb2d7256e..d54a8895a3 100755
--- a/build.sh
+++ b/build.sh
@@ -18,8 +18,8 @@ ARGS=$*
 # scripts, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall  -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --cpu-only --show_depr_warn --incl-cache-stats --time -h"
-HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-prims=<targets>] [--limit-bench-ann=<targets>] [--build-metrics=<filename>]
+VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims clean --uninstall  -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-prims=<targets>] [--build-metrics=<filename>]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
    libraft          - build the raft C++ code only. Also builds the C-wrapper library
@@ -29,7 +29,6 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
    docs             - build the documentation
    tests            - build the tests
    bench-prims      - build micro-benchmarks for primitives
-   bench-ann        - build end-to-end ann benchmarks
    template         - build the example RAFT application template
 
  and <flag> is:
@@ -39,10 +38,8 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
    --uninstall                 - uninstall files for specified targets which were built and installed prior
    --compile-lib               - compile shared library for all components
    --compile-static-lib        - compile static library for all components
-   --cpu-only                  - build CPU only components without CUDA. Applies to bench-ann only currently.
    --limit-tests               - semicolon-separated list of test executables to compile (e.g. NEIGHBORS_TEST;CLUSTER_TEST)
    --limit-bench-prims         - semicolon-separated list of prims benchmark executables to compute (e.g. NEIGHBORS_PRIMS_BENCH;CLUSTER_PRIMS_BENCH)
-   --limit-bench-ann           - semicolon-separated list of ann benchmark executables to compute (e.g. HNSWLIB_ANN_BENCH;RAFT_IVF_PQ_ANN_BENCH)
    --allgpuarch                - build for all supported GPU architectures
    --no-nvtx                   - disable nvtx (profiling markers), but allow enabling it in downstream projects
    --show_depr_warn            - show cmake deprecation warnings
@@ -71,8 +68,6 @@ BUILD_ALL_GPU_ARCH=0
 BUILD_TESTS=OFF
 BUILD_TYPE=Release
 BUILD_PRIMS_BENCH=OFF
-BUILD_ANN_BENCH=OFF
-BUILD_CPU_ONLY=OFF
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
 BUILD_REPORT_METRICS=""
@@ -174,21 +169,6 @@ function limitBench {
     fi
 }
 
-function limitAnnBench {
-    # Check for option to limit the set of test binaries to build
-    if [[ -n $(echo $ARGS | { grep -E "\-\-limit\-bench-ann" || true; } ) ]]; then
-        # There are possible weird edge cases that may cause this regex filter to output nothing and fail silently
-        # the true pipe will catch any weird edge cases that may happen and will cause the program to fall back
-        # on the invalid option error
-        LIMIT_ANN_BENCH_TARGETS=$(echo $ARGS | sed -e 's/.*--limit-bench-ann=//' -e 's/ .*//')
-        if [[ -n ${LIMIT_ANN_BENCH_TARGETS} ]]; then
-            # Remove the full LIMIT_TEST_TARGETS argument from list of args so that it passes validArgs function
-            ARGS=${ARGS//--limit-bench-ann=$LIMIT_ANN_BENCH_TARGETS/}
-            ANN_BENCH_TARGETS=${LIMIT_ANN_BENCH_TARGETS}
-        fi
-    fi
-}
-
 function buildMetrics {
     # Check for multiple build-metrics options
     if [[ $(echo $ARGS | { grep -Eo "\-\-build\-metrics" || true; } | wc -l ) -gt 1 ]]; then
@@ -219,7 +199,6 @@ if (( ${NUMARGS} != 0 )); then
     cacheTool
     limitTests
     limitBench
-    limitAnnBench
     buildMetrics
     for a in ${ARGS}; do
         if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then
@@ -349,18 +328,6 @@ if hasArg bench-prims || (( ${NUMARGS} == 0 )); then
     fi
 fi
 
-if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
-    BUILD_ANN_BENCH=ON
-    CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}"
-    if hasArg --cpu-only; then
-        COMPILE_LIBRARY=OFF
-        BUILD_CPU_ONLY=ON
-        NVTX=OFF
-    else
-        COMPILE_LIBRARY=ON
-    fi
-fi
-
 if hasArg --no-nvtx; then
     NVTX=OFF
 fi
@@ -405,7 +372,7 @@ fi
 
 ################################################################################
 # Configure for building all C++ targets
-if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench-prims || hasArg bench-ann; then
+if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench-prims; then
     if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
         RAFT_CMAKE_CUDA_ARCHITECTURES="NATIVE"
         echo "Building for the architecture of the GPU in the system..."
@@ -432,8 +399,6 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
-          -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
-          -DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}
@@ -505,11 +470,6 @@ if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
         python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/raft-dask
 fi
 
-# Build and (optionally) install the raft-ann-bench Python package
-if (( ${NUMARGS} == 0 )) || hasArg bench-ann; then
-    python -m pip install --no-build-isolation --no-deps --config-settings rapidsai.disable-cuda=true ${REPODIR}/python/raft-ann-bench -vvv
-fi
-
 if hasArg docs; then
     set -x
     export RAPIDS_VERSION="$(sed -E -e 's/^([0-9]{2})\.([0-9]{2})\.([0-9]{2}).*$/\1.\2.\3/' "${REPODIR}/VERSION")"
diff --git a/ci/build_python.sh b/ci/build_python.sh
index dc303de4f5..7da665075f 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -41,30 +41,5 @@ rapids-conda-retry mambabuild \
   conda/recipes/raft-dask
 
 sccache --show-adv-stats
-sccache --zero-stats
-
-# Build ann-bench for each cuda and python version
-rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-  conda/recipes/raft-ann-bench
-
-sccache --show-adv-stats
-
-# Build ann-bench-cpu only in CUDA 11 jobs since it only depends on python
-# version
-RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
-if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
-  sccache --zero-stats
-
-  rapids-conda-retry mambabuild \
-  --no-test \
-  --channel "${CPP_CHANNEL}" \
-  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
-  conda/recipes/raft-ann-bench-cpu
-
-  sccache --show-adv-stats
-fi
 
 rapids-upload-conda-to-s3 python
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 032b88b4aa..a118becb4b 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -81,8 +81,6 @@ sed_runner "/^set(RAFT_VERSION/ s|\".*\"|\"${NEXT_SHORT_TAG}\"|g" docs/source/bu
 sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/build.md
 sed_runner "/rapidsai\/raft/ s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" docs/source/developer_guide.md
 
-sed_runner "s|:[0-9][0-9].[0-9][0-9]|:${NEXT_SHORT_TAG}|g" docs/source/raft_ann_benchmarks.md
-
 sed_runner "s|branch-[0-9][0-9].[0-9][0-9]|branch-${NEXT_SHORT_TAG}|g" README.md
 
 # .devcontainer files
diff --git a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
deleted file mode 100644
index 39bdf2671d..0000000000
--- a/conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- benchmark>=1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-nvtx=11.8
-- cuda-profiler-api=11.8.86
-- cuda-version=11.8
-- cudatoolkit
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-aarch64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev=11.11.3.6
-- libcublas=11.11.3.6
-- libcurand-dev=10.3.0.86
-- libcurand=10.3.0.86
-- libcusolver-dev=11.4.1.48
-- libcusolver=11.4.1.48
-- libcusparse-dev=11.7.5.86
-- libcusparse=11.7.5.86
-- libucxx==0.41.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- nvcc_linux-aarch64=11.8
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==24.12.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-aarch64==2.17
-name: bench_ann_cuda-118_arch-aarch64
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
deleted file mode 100644
index 56004fa818..0000000000
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- benchmark>=1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-nvtx=11.8
-- cuda-profiler-api=11.8.86
-- cuda-version=11.8
-- cudatoolkit
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev=11.11.3.6
-- libcublas=11.11.3.6
-- libcurand-dev=10.3.0.86
-- libcurand=10.3.0.86
-- libcusolver-dev=11.4.1.48
-- libcusolver=11.4.1.48
-- libcusparse-dev=11.7.5.86
-- libcusparse=11.7.5.86
-- libucxx==0.41.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- nvcc_linux-64=11.8
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==24.12.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-64==2.17
-name: bench_ann_cuda-118_arch-x86_64
diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml
deleted file mode 100644
index 5f0599d9ae..0000000000
--- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- benchmark>=1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-version=12.0
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-aarch64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- libucxx==0.41.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==24.12.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-aarch64==2.17
-name: bench_ann_cuda-120_arch-aarch64
diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml
deleted file mode 100644
index 849e6c1412..0000000000
--- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- benchmark>=1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-version=12.0
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- libucxx==0.41.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==24.12.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-64==2.17
-name: bench_ann_cuda-120_arch-x86_64
diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml
index bc0ff1fae7..4857f12cd1 100644
--- a/conda/recipes/libraft/conda_build_config.yaml
+++ b/conda/recipes/libraft/conda_build_config.yaml
@@ -19,21 +19,6 @@ c_stdlib_version:
 cmake_version:
   - ">=3.26.4,!=3.30.0"
 
-nccl_version:
-  - ">=2.19"
-
-glog_version:
-  - ">=0.6.0"
-
-faiss_version:
-  - ">=1.7.1"
-
-h5py_version:
-  - ">=3.8.0"
-
-nlohmann_json_version:
-  - ">=3.11.2"
-
 # The CTK libraries below are missing from the conda-forge::cudatoolkit package
 # for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
 # and the "*_run_*" version specifiers correspond to `11.x` packages.
diff --git a/conda/recipes/raft-ann-bench-cpu/build.sh b/conda/recipes/raft-ann-bench-cpu/build.sh
deleted file mode 100644
index 4462d5124b..0000000000
--- a/conda/recipes/raft-ann-bench-cpu/build.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann_cpu --incl-cache-stats
-cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
deleted file mode 100644
index ed6f708e14..0000000000
--- a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-c_compiler_version:
-  - 11
-
-cxx_compiler_version:
-  - 11
-
-c_stdlib:
-  - sysroot
-
-c_stdlib_version:
-  - "2.17"
-
-cmake_version:
-  - ">=3.26.4,!=3.30.0"
-
-glog_version:
-  - ">=0.6.0"
-
-h5py_version:
-  - ">=3.8.0"
-
-nlohmann_json_version:
-  - ">=3.11.2"
-
-spdlog_version:
-  - ">=1.14.1,<1.15"
-
-fmt_version:
-  - ">=11.0.2,<12"
diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml
deleted file mode 100644
index 94f7102726..0000000000
--- a/conda/recipes/raft-ann-bench-cpu/meta.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-
-# Usage:
-#   conda build . -c conda-forge -c nvidia -c rapidsai
-{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
-{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
-{% set py_version = environ['CONDA_PY'] %}
-{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
-{% set date_string = environ['RAPIDS_DATE_STRING'] %}
-
-package:
-  name: raft-ann-bench-cpu
-  version: {{ version }}
-  script: build.sh
-
-source:
-  path: ../../..
-
-build:
-  script_env:
-    - AWS_ACCESS_KEY_ID
-    - AWS_SECRET_ACCESS_KEY
-    - AWS_SESSION_TOKEN
-    - CMAKE_C_COMPILER_LAUNCHER
-    - CMAKE_CUDA_COMPILER_LAUNCHER
-    - CMAKE_CXX_COMPILER_LAUNCHER
-    - CMAKE_GENERATOR
-    - PARALLEL_LEVEL
-    - RAPIDS_ARTIFACTS_DIR
-    - SCCACHE_BUCKET
-    - SCCACHE_IDLE_TIMEOUT
-    - SCCACHE_REGION
-    - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
-    - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
-    - SCCACHE_S3_USE_SSL
-  number: {{ GIT_DESCRIBE_NUMBER }}
-  string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
-
-requirements:
-  build:
-    - {{ compiler('c') }}
-    - {{ compiler('cxx') }}
-    - cmake {{ cmake_version }}
-    - ninja
-    - {{ stdlib("c") }}
-
-  host:
-    - glog {{ glog_version }}
-    - matplotlib
-    - nlohmann_json {{ nlohmann_json_version }}
-    - spdlog {{ spdlog_version }}
-    - fmt {{ fmt_version }}
-    - python
-    - pyyaml
-    - pandas
-    - rapids-build-backend>=0.3.0,<0.4.0.dev0
-
-  run:
-    - glog {{ glog_version }}
-    - h5py {{ h5py_version }}
-    - matplotlib
-    - python
-    - pyyaml
-    - pandas
-    - benchmark
-about:
-  home: https://rapids.ai/
-  license: Apache-2.0
-  summary: RAFT ANN CPU benchmarks
diff --git a/conda/recipes/raft-ann-bench/build.sh b/conda/recipes/raft-ann-bench/build.sh
deleted file mode 100644
index 00078792a1..0000000000
--- a/conda/recipes/raft-ann-bench/build.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
-cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml
deleted file mode 100644
index 47bd730daf..0000000000
--- a/conda/recipes/raft-ann-bench/conda_build_config.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-c_compiler_version:
-  - 11
-
-cxx_compiler_version:
-  - 11
-
-cuda_compiler:
-  - cuda-nvcc
-
-cuda11_compiler:
-  - nvcc
-
-c_stdlib:
-  - sysroot
-
-c_stdlib_version:
-  - "2.17"
-
-cmake_version:
-  - ">=3.26.4,!=3.30.0"
-
-nccl_version:
-  - ">=2.19"
-
-glog_version:
-  - ">=0.6.0"
-
-h5py_version:
-  - ">=3.8.0"
-
-nlohmann_json_version:
-  - ">=3.11.2"
-
-# The CTK libraries below are missing from the conda-forge::cudatoolkit package
-# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
-# and the "*_run_*" version specifiers correspond to `11.x` packages.
-
-cuda11_libcublas_host_version:
-  - "=11.11.3.6"
-
-cuda11_libcublas_run_version:
-  - ">=11.5.2.43,<12.0.0"
-
-cuda11_libcurand_host_version:
-  - "=10.3.0.86"
-
-cuda11_libcurand_run_version:
-  - ">=10.2.5.43,<10.3.1"
-
-cuda11_libcusolver_host_version:
-  - "=11.4.1.48"
-
-cuda11_libcusolver_run_version:
-  - ">=11.2.0.43,<11.4.2"
-
-cuda11_libcusparse_host_version:
-  - "=11.7.5.86"
-
-cuda11_libcusparse_run_version:
-  - ">=11.6.0.43,<12.0.0"
-
-# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all
-# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the
-# "*_run_*" version specifiers correspond to `11.x` packages.
-
-cuda11_cuda_profiler_api_host_version:
-  - "=11.8.86"
-
-cuda11_cuda_profiler_api_run_version:
-  - ">=11.4.240,<12"
diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml
deleted file mode 100644
index d6aeb5f860..0000000000
--- a/conda/recipes/raft-ann-bench/meta.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-
-# Usage:
-#   conda build . -c conda-forge -c nvidia -c rapidsai
-{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
-{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
-{% set py_version = environ['CONDA_PY'] %}
-{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
-{% set cuda_major = cuda_version.split('.')[0] %}
-{% set date_string = environ['RAPIDS_DATE_STRING'] %}
-
-package:
-  name: raft-ann-bench
-  version: {{ version }}
-  script: build.sh
-
-source:
-  path: ../../..
-
-build:
-  script_env:
-    - AWS_ACCESS_KEY_ID
-    - AWS_SECRET_ACCESS_KEY
-    - AWS_SESSION_TOKEN
-    - CMAKE_C_COMPILER_LAUNCHER
-    - CMAKE_CUDA_COMPILER_LAUNCHER
-    - CMAKE_CXX_COMPILER_LAUNCHER
-    - CMAKE_GENERATOR
-    - PARALLEL_LEVEL
-    - RAPIDS_ARTIFACTS_DIR
-    - SCCACHE_BUCKET
-    - SCCACHE_IDLE_TIMEOUT
-    - SCCACHE_REGION
-    - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
-    - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
-    - SCCACHE_S3_USE_SSL
-  number: {{ GIT_DESCRIBE_NUMBER }}
-  string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
-  ignore_run_exports_from:
-    {% if cuda_major == "11" %}
-    - {{ compiler('cuda11') }}
-    {% else %}
-    - {{ compiler('cuda') }}
-    - cuda-cudart-dev
-    - libcublas-dev
-    {% endif %}
-
-requirements:
-  build:
-    - {{ compiler('c') }}
-    - {{ compiler('cxx') }}
-    {% if cuda_major == "11" %}
-    - {{ compiler('cuda11') }} ={{ cuda_version }}
-    {% else %}
-    - {{ compiler('cuda') }}
-    {% endif %}
-    - cuda-version ={{ cuda_version }}
-    - cmake {{ cmake_version }}
-    - ninja
-    - {{ stdlib("c") }}
-
-  host:
-    - python
-    - libraft {{ version }}
-    - cuda-version ={{ cuda_version }}
-    {% if cuda_major == "11" %}
-    - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
-    - libcublas {{ cuda11_libcublas_host_version }}
-    - libcublas-dev {{ cuda11_libcublas_host_version }}
-    {% else %}
-    - cuda-cudart-dev
-    - cuda-profiler-api
-    - libcublas-dev
-    {% endif %}
-    - glog {{ glog_version }}
-    - nlohmann_json {{ nlohmann_json_version }}
-    - h5py {{ h5py_version }}
-    - benchmark
-    - matplotlib
-    - python
-    - pandas
-    - pyyaml
-    # rmm is needed to determine if package is gpu-enabled
-    - rmm ={{ minor_version }}
-    - rapids-build-backend>=0.3.0,<0.4.0.dev0
-
-  run:
-    - python
-    - libraft {{ version }}
-    - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
-    {% if cuda_major == "11" %}
-    - cudatoolkit
-    {% else %}
-    - cuda-cudart
-    - libcublas
-    {% endif %}
-    - glog {{ glog_version }}
-    - h5py {{ h5py_version }}
-    - benchmark
-    - glog {{ glog_version }}
-    - matplotlib
-    - python
-    - pandas
-    - pyyaml
-    # rmm is needed to determine if package is gpu-enabled
-    - rmm ={{ minor_version }}
-about:
-  home: https://rapids.ai/
-  license: Apache-2.0
-  summary: RAFT ANN GPU and CPU benchmarks
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d7eeb60b27..f4c18d53a8 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -17,17 +17,13 @@ include(rapids-cpm)
 include(rapids-export)
 include(rapids-find)
 
-option(BUILD_CPU_ONLY "Build CPU only components. Applies to RAFT ANN benchmarks currently" OFF)
-
 # workaround for rapids_cuda_init_architectures not working for arch detection with
 # enable_language(CUDA)
 set(lang_list "CXX")
 
-if(NOT BUILD_CPU_ONLY)
-  include(rapids-cuda)
-  rapids_cuda_init_architectures(RAFT)
-  list(APPEND lang_list "CUDA")
-endif()
+include(rapids-cuda)
+rapids_cuda_init_architectures(RAFT)
+list(APPEND lang_list "CUDA")
 
 project(
   RAFT
@@ -53,7 +49,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 option(BUILD_SHARED_LIBS "Build raft shared libraries" ON)
 option(BUILD_TESTS "Build raft unit-tests" ON)
 option(BUILD_PRIMS_BENCH "Build raft C++ benchmark tests" OFF)
-option(BUILD_ANN_BENCH "Build raft ann benchmarks" OFF)
 option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON)
 option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
 option(CUDA_ENABLE_LINEINFO
@@ -68,23 +63,13 @@ option(DISABLE_OPENMP "Disable OpenMP" OFF)
 option(RAFT_NVTX "Enable nvtx markers" OFF)
 
 set(RAFT_COMPILE_LIBRARY_DEFAULT OFF)
-if((BUILD_TESTS
-    OR BUILD_PRIMS_BENCH
-    OR BUILD_ANN_BENCH
-   )
-   AND NOT BUILD_CPU_ONLY
-)
+if(BUILD_TESTS OR BUILD_PRIMS_BENCH)
   set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
 endif()
 option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
        ${RAFT_COMPILE_LIBRARY_DEFAULT}
 )
 
-if(BUILD_CPU_ONLY)
-  set(BUILD_SHARED_LIBS OFF)
-  set(BUILD_TESTS OFF)
-endif()
-
 # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
 # have different values for the `Threads::Threads` target. Setting this flag ensures
 # `Threads::Threads` is the same value across all builds so that cache hits occur
@@ -97,20 +82,14 @@ include(CMakeDependentOption)
 message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
 message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
 message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}")
-message(VERBOSE "RAFT: Building ANN benchmarks: ${BUILD_ANN_BENCH}")
-message(VERBOSE "RAFT: Build CPU only components: ${BUILD_CPU_ONLY}")
 message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
 message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS})
 message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
 message(VERBOSE "RAFT: Enable kernel resource usage info: ${CUDA_ENABLE_KERNELINFO}")
 message(VERBOSE "RAFT: Enable lineinfo in nvcc: ${CUDA_ENABLE_LINEINFO}")
 message(VERBOSE "RAFT: Enable nvtx markers: ${RAFT_NVTX}")
-message(VERBOSE
-        "RAFT: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}"
-)
-message(VERBOSE
-        "RAFT: Statically link the CUDA math libraries: ${CUDA_STATIC_MATH_LIBRARIES}"
-)
+message(VERBOSE "RAFT: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
+message(VERBOSE "RAFT: Statically link the CUDA math libraries: ${CUDA_STATIC_MATH_LIBRARIES}")
 
 # Set RMM logging level
 set(RMM_LOGGING_LEVEL
@@ -143,21 +122,17 @@ if(CUDA_STATIC_MATH_LIBRARIES)
   set(_ctk_static_suffix "_static")
 endif()
 
-if(NOT BUILD_CPU_ONLY)
-  # CUDA runtime
-  rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
-  # * find CUDAToolkit package
-  # * determine GPU architectures
-  # * enable the CMake CUDA language
-  # * set other CUDA compilation flags
-  rapids_find_package(
-    CUDAToolkit REQUIRED
-    BUILD_EXPORT_SET raft-exports
-    INSTALL_EXPORT_SET raft-exports
-  )
-else()
-  add_compile_definitions(BUILD_CPU_ONLY)
-endif()
+# CUDA runtime
+rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
+# * find CUDAToolkit package
+# * determine GPU architectures
+# * enable the CMake CUDA language
+# * set other CUDA compilation flags
+rapids_find_package(
+  CUDAToolkit REQUIRED
+  BUILD_EXPORT_SET raft-exports
+  INSTALL_EXPORT_SET raft-exports
+)
 
 if(NOT DISABLE_OPENMP)
   rapids_find_package(
@@ -178,22 +153,20 @@ include(cmake/modules/ConfigureCUDA.cmake)
 # add third party dependencies using CPM
 rapids_cpm_init()
 
-if(NOT BUILD_CPU_ONLY)
-  # CCCL before rmm/cuco so we get the right version of CCCL
-  include(cmake/thirdparty/get_cccl.cmake)
-  include(cmake/thirdparty/get_rmm.cmake)
-  include(cmake/thirdparty/get_cutlass.cmake)
+# CCCL before rmm/cuco so we get the right version of CCCL
+include(cmake/thirdparty/get_cccl.cmake)
+include(cmake/thirdparty/get_rmm.cmake)
+include(cmake/thirdparty/get_cutlass.cmake)
 
-  include(${rapids-cmake-dir}/cpm/cuco.cmake)
-  rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
-endif()
+include(${rapids-cmake-dir}/cpm/cuco.cmake)
+rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
 
 if(BUILD_TESTS)
   include(${rapids-cmake-dir}/cpm/gtest.cmake)
   rapids_cpm_gtest(BUILD_STATIC)
 endif()
 
-if(BUILD_PRIMS_BENCH OR BUILD_ANN_BENCH)
+if(BUILD_PRIMS_BENCH)
   include(${rapids-cmake-dir}/cpm/gbench.cmake)
   rapids_cpm_gbench(BUILD_STATIC)
 endif()
@@ -214,10 +187,8 @@ if(BUILD_CAGRA_HNSWLIB)
   target_link_libraries(raft INTERFACE hnswlib::hnswlib)
 endif()
 
-if(NOT BUILD_CPU_ONLY)
-  # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
-  target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass CCCL::CCCL)
-endif()
+# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
+target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass CCCL::CCCL)
 
 target_compile_features(raft INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
 target_compile_options(
@@ -847,10 +818,3 @@ endif()
 if(BUILD_PRIMS_BENCH)
   add_subdirectory(bench/prims/)
 endif()
-
-# ##################################################################################################
-# * build ann benchmark executable -----------------------------------------------
-
-if(BUILD_ANN_BENCH)
-  add_subdirectory(bench/ann/)
-endif()
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
deleted file mode 100644
index 35df378438..0000000000
--- a/cpp/bench/ann/CMakeLists.txt
+++ /dev/null
@@ -1,349 +0,0 @@
-# =============================================================================
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-list(APPEND CMAKE_MODULE_PATH "${RAFT_SOURCE_DIR}")
-
-# ##################################################################################################
-# * benchmark options ------------------------------------------------------------------------------
-
-option(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON)
-
-option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark"
-       ON
-)
-option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE "Include raft's brute force knn in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB "Include raft's CAGRA in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_SINGLE_EXE
-       "Make a single executable with benchmark as shared library modules" OFF
-)
-
-# ##################################################################################################
-# * Process options ----------------------------------------------------------
-
-find_package(Threads REQUIRED)
-
-set(RAFT_ANN_BENCH_USE_FAISS ON)
-set(RAFT_FAISS_ENABLE_GPU ON)
-set(RAFT_USE_FAISS_STATIC ON)
-
-if(BUILD_CPU_ONLY)
-
-  # Include necessary logging dependencies
-  include(cmake/thirdparty/get_fmt)
-  include(cmake/thirdparty/get_spdlog)
-  set(RAFT_FAISS_ENABLE_GPU OFF)
-  set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF)
-  set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF)
-  set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF)
-  set(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE OFF)
-  set(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF)
-  set(RAFT_ANN_BENCH_USE_GGNN OFF)
-endif()
-
-set(RAFT_ANN_BENCH_USE_RAFT OFF)
-if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
-   OR RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE
-   OR RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT
-   OR RAFT_ANN_BENCH_USE_RAFT_CAGRA
-   OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB
-)
-  set(RAFT_ANN_BENCH_USE_RAFT ON)
-endif()
-
-# ##################################################################################################
-# * Fetch requirements -------------------------------------------------------------
-
-if(RAFT_ANN_BENCH_USE_HNSWLIB OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
-  include(cmake/thirdparty/get_hnswlib)
-endif()
-
-include(cmake/thirdparty/get_nlohmann_json)
-
-if(RAFT_ANN_BENCH_USE_GGNN)
-  include(cmake/thirdparty/get_ggnn)
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS)
-  include(cmake/thirdparty/get_faiss)
-endif()
-
-# ##################################################################################################
-# * Enable NVTX if available
-
-# Note: ANN_BENCH wrappers have extra NVTX code not related to raft::nvtx.They track gbench
-# benchmark cases and iterations. This is to make limited NVTX available to all algos, not just
-# raft.
-if(TARGET CUDA::nvtx3)
-  set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
-  get_target_property(CMAKE_REQUIRED_INCLUDES CUDA::nvtx3 INTERFACE_INCLUDE_DIRECTORIES)
-  unset(NVTX3_HEADERS_FOUND CACHE)
-  # Check the headers explicitly to make sure the cpu-only build succeeds
-  CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
-  set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
-endif()
-
-# ##################################################################################################
-# * Configure tests function-------------------------------------------------------------
-
-function(ConfigureAnnBench)
-
-  set(oneValueArgs NAME)
-  set(multiValueArgs PATH LINKS CXXFLAGS)
-
-  if(NOT BUILD_CPU_ONLY)
-    set(GPU_BUILD ON)
-  endif()
-
-  cmake_parse_arguments(
-    ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}
-  )
-
-  set(BENCH_NAME ${ConfigureAnnBench_NAME}_ANN_BENCH)
-
-  if(RAFT_ANN_BENCH_SINGLE_EXE)
-    add_library(${BENCH_NAME} SHARED ${ConfigureAnnBench_PATH})
-    string(TOLOWER ${BENCH_NAME} BENCH_LIB_NAME)
-    set_target_properties(${BENCH_NAME} PROPERTIES OUTPUT_NAME ${BENCH_LIB_NAME})
-    add_dependencies(${BENCH_NAME} ANN_BENCH)
-  else()
-    add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH})
-    target_compile_definitions(
-      ${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN
-                            $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
-    )
-    target_link_libraries(
-      ${BENCH_NAME} PRIVATE benchmark::benchmark $<$<BOOL:${NVTX3_HEADERS_FOUND}>:CUDA::nvtx3>
-    )
-  endif()
-
-  target_link_libraries(
-    ${BENCH_NAME}
-    PRIVATE raft::raft
-            nlohmann_json::nlohmann_json
-            ${ConfigureAnnBench_LINKS}
-            Threads::Threads
-            $<$<BOOL:${GPU_BUILD}>:${RAFT_CTK_MATH_DEPENDENCIES}>
-            $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
-            $<TARGET_NAME_IF_EXISTS:conda_env>
-            $<$<BOOL:${BUILD_CPU_ONLY}>:fmt::fmt-header-only>
-            $<$<BOOL:${BUILD_CPU_ONLY}>:spdlog::spdlog_header_only>
-  )
-
-  set_target_properties(
-    ${BENCH_NAME}
-    PROPERTIES # set target compile options
-               CXX_STANDARD 17
-               CXX_STANDARD_REQUIRED ON
-               CUDA_STANDARD 17
-               CUDA_STANDARD_REQUIRED ON
-               POSITION_INDEPENDENT_CODE ON
-               INTERFACE_POSITION_INDEPENDENT_CODE ON
-               BUILD_RPATH "\$ORIGIN"
-               INSTALL_RPATH "\$ORIGIN"
-  )
-
-  set(${ConfigureAnnBench_CXXFLAGS} ${RAFT_CXX_FLAGS} ${ConfigureAnnBench_CXXFLAGS})
-
-  target_compile_options(
-    ${BENCH_NAME} PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${ConfigureAnnBench_CXXFLAGS}>"
-                          "$<$<COMPILE_LANGUAGE:CUDA>:${RAFT_CUDA_FLAGS}>"
-  )
-
-  if(RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME})
-    target_compile_definitions(
-      ${BENCH_NAME}
-      PUBLIC
-        RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}=RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}
-    )
-  endif()
-
-  target_include_directories(
-    ${BENCH_NAME}
-    PUBLIC "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/include>"
-    PRIVATE ${ConfigureAnnBench_INCLUDES}
-  )
-
-  install(
-    TARGETS ${BENCH_NAME}
-    COMPONENT ann_bench
-    DESTINATION bin/ann
-  )
-endfunction()
-
-# ##################################################################################################
-# * Configure tests-------------------------------------------------------------
-
-if(RAFT_ANN_BENCH_USE_HNSWLIB)
-  ConfigureAnnBench(
-    NAME HNSWLIB PATH src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib
-  )
-
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ)
-  ConfigureAnnBench(
-    NAME
-    RAFT_IVF_PQ
-    PATH
-    src/raft/raft_benchmark.cu
-    src/raft/raft_ivf_pq.cu
-    LINKS
-    raft::compiled
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT)
-  ConfigureAnnBench(
-    NAME
-    RAFT_IVF_FLAT
-    PATH
-    src/raft/raft_benchmark.cu
-    src/raft/raft_ivf_flat.cu
-    LINKS
-    raft::compiled
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE)
-  ConfigureAnnBench(
-    NAME RAFT_BRUTE_FORCE PATH src/raft/raft_benchmark.cu LINKS raft::compiled
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_CAGRA)
-  ConfigureAnnBench(
-    NAME
-    RAFT_CAGRA
-    PATH
-    src/raft/raft_benchmark.cu
-    src/raft/raft_cagra_float.cu
-    src/raft/raft_cagra_half.cu
-    src/raft/raft_cagra_int8_t.cu
-    src/raft/raft_cagra_uint8_t.cu
-    LINKS
-    raft::compiled
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
-  ConfigureAnnBench(
-    NAME RAFT_CAGRA_HNSWLIB PATH src/raft/raft_cagra_hnswlib.cu LINKS raft::compiled
-    hnswlib::hnswlib
-  )
-endif()
-
-message("RAFT_FAISS_TARGETS: ${RAFT_FAISS_TARGETS}")
-message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}")
-if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT)
-  ConfigureAnnBench(
-    NAME FAISS_CPU_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS
-    ${RAFT_FAISS_TARGETS}
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT)
-  ConfigureAnnBench(
-    NAME FAISS_CPU_IVF_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS
-    ${RAFT_FAISS_TARGETS}
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ)
-  ConfigureAnnBench(
-    NAME FAISS_CPU_IVF_PQ PATH src/faiss/faiss_cpu_benchmark.cpp LINKS
-    ${RAFT_FAISS_TARGETS}
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT AND RAFT_FAISS_ENABLE_GPU)
-  ConfigureAnnBench(
-    NAME FAISS_GPU_IVF_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS
-    ${RAFT_FAISS_TARGETS}
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ AND RAFT_FAISS_ENABLE_GPU)
-  ConfigureAnnBench(
-    NAME FAISS_GPU_IVF_PQ PATH src/faiss/faiss_gpu_benchmark.cu LINKS
-    ${RAFT_FAISS_TARGETS}
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT AND RAFT_FAISS_ENABLE_GPU)
-  ConfigureAnnBench(
-    NAME FAISS_GPU_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${RAFT_FAISS_TARGETS}
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_GGNN)
-  include(cmake/thirdparty/get_glog)
-  ConfigureAnnBench(NAME GGNN PATH src/ggnn/ggnn_benchmark.cu LINKS glog::glog ggnn::ggnn)
-endif()
-
-# ##################################################################################################
-# * Dynamically-loading ANN_BENCH executable -------------------------------------------------------
-if(RAFT_ANN_BENCH_SINGLE_EXE)
-  add_executable(ANN_BENCH src/common/benchmark.cpp)
-
-  target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-
-  target_link_libraries(
-    ANN_BENCH
-    PRIVATE raft::raft
-            nlohmann_json::nlohmann_json
-            benchmark::benchmark
-            dl
-            -static-libgcc
-            fmt::fmt-header-only
-            spdlog::spdlog_header_only
-            -static-libstdc++
-            $<$<BOOL:${NVTX3_HEADERS_FOUND}>:CUDA::nvtx3>
-  )
-  set_target_properties(
-    ANN_BENCH
-    PROPERTIES # set target compile options
-               CXX_STANDARD 17
-               CXX_STANDARD_REQUIRED ON
-               CUDA_STANDARD 17
-               CUDA_STANDARD_REQUIRED ON
-               POSITION_INDEPENDENT_CODE ON
-               INTERFACE_POSITION_INDEPENDENT_CODE ON
-               BUILD_RPATH "\$ORIGIN"
-               INSTALL_RPATH "\$ORIGIN"
-  )
-  target_compile_definitions(
-    ANN_BENCH
-    PRIVATE
-      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}">
-      $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
-  )
-
-  target_link_options(ANN_BENCH PRIVATE -export-dynamic)
-
-  install(
-    TARGETS ANN_BENCH
-    COMPONENT ann_bench
-    DESTINATION bin/ann
-    EXCLUDE_FROM_ALL
-  )
-endif()
diff --git a/cpp/bench/ann/README.md b/cpp/bench/ann/README.md
deleted file mode 100644
index 1a8af2e448..0000000000
--- a/cpp/bench/ann/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# RAFT CUDA ANN Benchmarks
-
-Please see the [ANN Benchmarks](https://docs.rapids.ai/api/raft/stable/cuda_ann_benchmarks.html) section of the RAFT documentation for instructions on building and using the ANN benchmarks.
\ No newline at end of file
diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
deleted file mode 100644
index b010063dee..0000000000
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cuda_stub.hpp"  // cudaStream_t
-
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-namespace raft::bench::ann {
-
-enum Objective {
-  THROUGHPUT,  // See how many vectors we can push through
-  LATENCY      // See how fast we can push a vector through
-};
-
-enum class MemoryType {
-  Host,
-  HostMmap,
-  Device,
-};
-
-enum class Metric {
-  kInnerProduct,
-  kEuclidean,
-};
-
-inline auto parse_metric(const std::string& metric_str) -> Metric
-{
-  if (metric_str == "inner_product") {
-    return raft::bench::ann::Metric::kInnerProduct;
-  } else if (metric_str == "euclidean") {
-    return raft::bench::ann::Metric::kEuclidean;
-  } else {
-    throw std::runtime_error("invalid metric: '" + metric_str + "'");
-  }
-}
-
-inline auto parse_memory_type(const std::string& memory_type) -> MemoryType
-{
-  if (memory_type == "host") {
-    return MemoryType::Host;
-  } else if (memory_type == "mmap") {
-    return MemoryType::HostMmap;
-  } else if (memory_type == "device") {
-    return MemoryType::Device;
-  } else {
-    throw std::runtime_error("invalid memory type: '" + memory_type + "'");
-  }
-}
-
-struct AlgoProperty {
-  MemoryType dataset_memory_type;
-  // neighbors/distances should have same memory type as queries
-  MemoryType query_memory_type;
-};
-
-class AnnBase {
- public:
-  using index_type = size_t;
-
-  inline AnnBase(Metric metric, int dim) : metric_(metric), dim_(dim) {}
-  virtual ~AnnBase() noexcept = default;
-
- protected:
-  Metric metric_;
-  int dim_;
-};
-
-/**
- * The GPU-based algorithms, which do not perform CPU synchronization at the end of their build or
- * search methods, must implement this interface.
- *
- * The `cuda_timer` / `cuda_lap`  from `util.hpp` uses this stream to record GPU times with events
- * and, if necessary, also synchronize (via events) between iterations.
- *
- * If the algo does not implement this interface, GPU timings are disabled.
- */
-class AnnGPU {
- public:
-  /**
-   * Return the main cuda stream for this algorithm.
-   * If any work is done in multiple streams, they should synchornize with the main stream at the
-   * end.
-   */
-  [[nodiscard]] virtual auto get_sync_stream() const noexcept -> cudaStream_t = 0;
-  /**
-   * By default a GPU algorithm uses a fixed stream to order GPU operations.
-   * However, an algorithm may need to synchronize with the host at the end of its execution.
-   * In that case, also synchronizing with a benchmark event would put it at disadvantage.
-   *
-   * We can disable event sync by passing `false` here
-   *   - ONLY IF THE ALGORITHM HAS PRODUCED ITS OUTPUT BY THE TIME IT SYNCHRONIZES WITH CPU.
-   */
-  [[nodiscard]] virtual auto uses_stream() const noexcept -> bool { return true; }
-  virtual ~AnnGPU() noexcept = default;
-};
-
-template <typename T>
-class ANN : public AnnBase {
- public:
-  struct AnnSearchParam {
-    Objective metric_objective = Objective::LATENCY;
-    virtual ~AnnSearchParam()  = default;
-    [[nodiscard]] virtual auto needs_dataset() const -> bool { return false; };
-  };
-
-  inline ANN(Metric metric, int dim) : AnnBase(metric, dim) {}
-  virtual ~ANN() noexcept override = default;
-
-  virtual void build(const T* dataset, size_t nrow) = 0;
-
-  virtual void set_search_param(const AnnSearchParam& param) = 0;
-  // TODO: this assumes that an algorithm can always return k results.
-  // This is not always possible.
-  virtual void search(const T* queries,
-                      int batch_size,
-                      int k,
-                      AnnBase::index_type* neighbors,
-                      float* distances) const = 0;
-
-  virtual void save(const std::string& file) const = 0;
-  virtual void load(const std::string& file)       = 0;
-
-  virtual AlgoProperty get_preference() const = 0;
-
-  // Some algorithms don't save the building dataset in their indices.
-  // So they should be given the access to that dataset during searching.
-  // The advantage of this way is that index has smaller size
-  // and many indices can share one dataset.
-  //
-  // SearchParam::needs_dataset() of such algorithm should be true,
-  // and set_search_dataset() should save the passed-in pointer somewhere.
-  // The client code should call set_search_dataset() before searching,
-  // and should not release dataset before searching is finished.
-  virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/){};
-
-  /**
-   * Make a shallow copy of the ANN wrapper that shares the resources and ensures thread-safe access
-   * to them. */
-  virtual auto copy() -> std::unique_ptr<ANN<T>> = 0;
-};
-
-}  // namespace raft::bench::ann
-
-#define REGISTER_ALGO_INSTANCE(DataT)                                                            \
-  template auto raft::bench::ann::create_algo<DataT>(                                            \
-    const std::string&, const std::string&, int, const nlohmann::json&, const std::vector<int>&) \
-    ->std::unique_ptr<raft::bench::ann::ANN<DataT>>;                                             \
-  template auto raft::bench::ann::create_search_param<DataT>(const std::string&,                 \
-                                                             const nlohmann::json&)              \
-    ->std::unique_ptr<typename raft::bench::ann::ANN<DataT>::AnnSearchParam>;
diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
deleted file mode 100644
index 5510abf42f..0000000000
--- a/cpp/bench/ann/src/common/benchmark.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// clang-format off
-#include "cuda_stub.hpp"  // must go first
-// clang-format on
-
-#include "ann_types.hpp"
-
-#include <dlfcn.h>
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-#include <filesystem>
-#include <memory>
-#include <unordered_map>
-
-namespace raft::bench::ann {
-
-struct lib_handle {
-  void* handle{nullptr};
-  explicit lib_handle(const std::string& name)
-  {
-    handle = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL);
-    if (handle == nullptr) {
-      auto error_msg = "Failed to load " + name;
-      auto err       = dlerror();
-      if (err != nullptr && err[0] != '\0') { error_msg += ": " + std::string(err); }
-      throw std::runtime_error(error_msg);
-    }
-  }
-  ~lib_handle() noexcept
-  {
-    if (handle != nullptr) { dlclose(handle); }
-  }
-};
-
-auto load_lib(const std::string& algo) -> void*
-{
-  static std::unordered_map<std::string, lib_handle> libs{};
-  auto found = libs.find(algo);
-
-  if (found != libs.end()) { return found->second.handle; }
-  auto lib_name = "lib" + algo + "_ann_bench.so";
-  return libs.emplace(algo, lib_name).first->second.handle;
-}
-
-auto get_fun_name(void* addr) -> std::string
-{
-  Dl_info dl_info;
-  if (dladdr(addr, &dl_info) != 0) {
-    if (dl_info.dli_sname != nullptr && dl_info.dli_sname[0] != '\0') {
-      return std::string{dl_info.dli_sname};
-    }
-  }
-  throw std::logic_error("Failed to find out name of the looked up function");
-}
-
-template <typename T>
-auto create_algo(const std::string& algo,
-                 const std::string& distance,
-                 int dim,
-                 const nlohmann::json& conf,
-                 const std::vector<int>& dev_list) -> std::unique_ptr<raft::bench::ann::ANN<T>>
-{
-  static auto fname = get_fun_name(reinterpret_cast<void*>(&create_algo<T>));
-  auto handle       = load_lib(algo);
-  auto fun_addr     = dlsym(handle, fname.c_str());
-  if (fun_addr == nullptr) {
-    throw std::runtime_error("Couldn't load the create_algo function (" + algo + ")");
-  }
-  auto fun = reinterpret_cast<decltype(&create_algo<T>)>(fun_addr);
-  return fun(algo, distance, dim, conf, dev_list);
-}
-
-template <typename T>
-std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
-  const std::string& algo, const nlohmann::json& conf)
-{
-  static auto fname = get_fun_name(reinterpret_cast<void*>(&create_search_param<T>));
-  auto handle       = load_lib(algo);
-  auto fun_addr     = dlsym(handle, fname.c_str());
-  if (fun_addr == nullptr) {
-    throw std::runtime_error("Couldn't load the create_search_param function (" + algo + ")");
-  }
-  auto fun = reinterpret_cast<decltype(&create_search_param<T>)>(fun_addr);
-  return fun(algo, conf);
-}
-
-};  // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#include "benchmark.hpp"
-
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
deleted file mode 100644
index 185d54a0a3..0000000000
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "ann_types.hpp"
-#include "conf.hpp"
-#include "dataset.hpp"
-#include "util.hpp"
-
-#include <raft/core/logger.hpp>
-
-#include <benchmark/benchmark.h>
-#include <unistd.h>
-
-#include <algorithm>
-#include <chrono>
-#include <cmath>
-#include <cstdint>
-#include <fstream>
-#include <limits>
-#include <memory>
-#include <mutex>
-#include <numeric>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace raft::bench::ann {
-
-static inline std::unique_ptr<AnnBase> current_algo{nullptr};
-static inline std::unique_ptr<AlgoProperty> current_algo_props{nullptr};
-
-using kv_series = std::vector<std::tuple<std::string, std::vector<nlohmann::json>>>;
-
-inline auto apply_overrides(const std::vector<nlohmann::json>& configs,
-                            const kv_series& overrides,
-                            std::size_t override_idx = 0) -> std::vector<nlohmann::json>
-{
-  std::vector<nlohmann::json> results{};
-  if (override_idx >= overrides.size()) {
-    auto n = configs.size();
-    for (size_t i = 0; i < n; i++) {
-      auto c               = configs[i];
-      c["override_suffix"] = n > 1 ? "/" + std::to_string(i) : "";
-      results.push_back(c);
-    }
-    return results;
-  }
-  auto rec_configs = apply_overrides(configs, overrides, override_idx + 1);
-  auto [key, vals] = overrides[override_idx];
-  auto n           = vals.size();
-  for (size_t i = 0; i < n; i++) {
-    const auto& val = vals[i];
-    for (auto rc : rec_configs) {
-      if (n > 1) {
-        rc["override_suffix"] =
-          static_cast<std::string>(rc["override_suffix"]) + "/" + std::to_string(i);
-      }
-      rc[key] = val;
-      results.push_back(rc);
-    }
-  }
-  return results;
-}
-
-inline auto apply_overrides(const nlohmann::json& config,
-                            const kv_series& overrides,
-                            std::size_t override_idx = 0)
-{
-  return apply_overrides(std::vector{config}, overrides, 0);
-}
-
-inline void dump_parameters(::benchmark::State& state, nlohmann::json params)
-{
-  std::string label = "";
-  bool label_empty  = true;
-  for (auto& [key, val] : params.items()) {
-    if (val.is_number()) {
-      state.counters.insert({{key, val}});
-    } else if (val.is_boolean()) {
-      state.counters.insert({{key, val ? 1.0 : 0.0}});
-    } else {
-      auto kv = key + "=" + val.dump();
-      if (label_empty) {
-        label = kv;
-      } else {
-        label += "#" + kv;
-      }
-      label_empty = false;
-    }
-  }
-  if (!label_empty) { state.SetLabel(label); }
-}
-
-inline auto parse_algo_property(AlgoProperty prop, const nlohmann::json& conf) -> AlgoProperty
-{
-  if (conf.contains("dataset_memory_type")) {
-    prop.dataset_memory_type = parse_memory_type(conf.at("dataset_memory_type"));
-  }
-  if (conf.contains("query_memory_type")) {
-    prop.query_memory_type = parse_memory_type(conf.at("query_memory_type"));
-  }
-  return prop;
-};
-
-template <typename T>
-void bench_build(::benchmark::State& state,
-                 std::shared_ptr<const Dataset<T>> dataset,
-                 Configuration::Index index,
-                 bool force_overwrite)
-{
-  // NB: these two thread-local vars can be used within algo wrappers
-  raft::bench::ann::benchmark_thread_id = state.thread_index();
-  raft::bench::ann::benchmark_n_threads = state.threads();
-  dump_parameters(state, index.build_param);
-  if (file_exists(index.file)) {
-    if (force_overwrite) {
-      log_info("Overwriting file: %s", index.file.c_str());
-    } else {
-      return state.SkipWithMessage(
-        "Index file already exists (use --force to overwrite the index).");
-    }
-  }
-
-  std::unique_ptr<ANN<T>> algo;
-  try {
-    algo = ann::create_algo<T>(
-      index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
-  } catch (const std::exception& e) {
-    return state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
-  }
-
-  const auto algo_property = parse_algo_property(algo->get_preference(), index.build_param);
-
-  const T* base_set      = dataset->base_set(algo_property.dataset_memory_type);
-  std::size_t index_size = dataset->base_set_size();
-
-  cuda_timer gpu_timer{algo};
-  {
-    nvtx_case nvtx{state.name()};
-    for (auto _ : state) {
-      [[maybe_unused]] auto ntx_lap = nvtx.lap();
-      [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
-      try {
-        algo->build(base_set, index_size);
-      } catch (const std::exception& e) {
-        state.SkipWithError(std::string(e.what()));
-      }
-    }
-  }
-  if (gpu_timer.active()) {
-    state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
-  }
-  state.counters.insert({{"index_size", index_size}});
-
-  if (state.skipped()) { return; }
-  make_sure_parent_dir_exists(index.file);
-  algo->save(index.file);
-}
-
-template <typename T>
-void bench_search(::benchmark::State& state,
-                  Configuration::Index index,
-                  std::size_t search_param_ix,
-                  std::shared_ptr<const Dataset<T>> dataset,
-                  Objective metric_objective)
-{
-  // NB: these two thread-local vars can be used within algo wrappers
-  raft::bench::ann::benchmark_thread_id = state.thread_index();
-  raft::bench::ann::benchmark_n_threads = state.threads();
-  std::size_t queries_processed         = 0;
-
-  const auto& sp_json = index.search_params[search_param_ix];
-
-  if (state.thread_index() == 0) { dump_parameters(state, sp_json); }
-
-  // NB: `k` and `n_queries` are guaranteed to be populated in conf.cpp
-  const std::uint32_t k = sp_json["k"];
-  // Amount of data processes in one go
-  const std::size_t n_queries = sp_json["n_queries"];
-  // Round down the query data to a multiple of the batch size to loop over full batches of data
-  const std::size_t query_set_size = (dataset->query_set_size() / n_queries) * n_queries;
-
-  if (dataset->query_set_size() < n_queries) {
-    std::stringstream msg;
-    msg << "Not enough queries in benchmark set. Expected " << n_queries << ", actual "
-        << dataset->query_set_size();
-    state.SkipWithError(msg.str());
-    return;
-  }
-
-  // Each thread start from a different offset, so that the queries that they process do not
-  // overlap.
-  std::ptrdiff_t batch_offset   = (state.thread_index() * n_queries) % query_set_size;
-  std::ptrdiff_t queries_stride = state.threads() * n_queries;
-  // Output is saved into a contiguous buffer (separate buffers for each thread).
-  std::ptrdiff_t out_offset = 0;
-
-  const T* query_set = nullptr;
-
-  if (!file_exists(index.file)) {
-    state.SkipWithError("Index file is missing. Run the benchmark in the build mode first.");
-    return;
-  }
-
-  /**
-   * Make sure the first thread loads the algo and dataset
-   */
-  progress_barrier load_barrier{};
-  if (load_barrier.arrive(1) == 0) {
-    // algo is static to cache it between close search runs to save time on index loading
-    static std::string index_file = "";
-    if (index.file != index_file) {
-      current_algo.reset();
-      index_file = index.file;
-    }
-
-    std::unique_ptr<typename ANN<T>::AnnSearchParam> search_param;
-    ANN<T>* algo;
-    try {
-      if (!current_algo || (algo = dynamic_cast<ANN<T>*>(current_algo.get())) == nullptr) {
-        auto ualgo = ann::create_algo<T>(
-          index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
-        algo = ualgo.get();
-        algo->load(index_file);
-        current_algo = std::move(ualgo);
-      }
-      search_param                   = ann::create_search_param<T>(index.algo, sp_json);
-      search_param->metric_objective = metric_objective;
-    } catch (const std::exception& e) {
-      state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
-      return;
-    }
-
-    current_algo_props = std::make_unique<AlgoProperty>(
-      std::move(parse_algo_property(algo->get_preference(), sp_json)));
-
-    if (search_param->needs_dataset()) {
-      try {
-        algo->set_search_dataset(dataset->base_set(current_algo_props->dataset_memory_type),
-                                 dataset->base_set_size());
-      } catch (const std::exception& ex) {
-        state.SkipWithError("The algorithm '" + index.name +
-                            "' requires the base set, but it's not available. " +
-                            "Exception: " + std::string(ex.what()));
-        return;
-      }
-    }
-    try {
-      algo->set_search_param(*search_param);
-    } catch (const std::exception& ex) {
-      state.SkipWithError("An error occurred setting search parameters: " + std::string(ex.what()));
-      return;
-    }
-
-    query_set = dataset->query_set(current_algo_props->query_memory_type);
-    load_barrier.arrive(state.threads());
-  } else {
-    // All other threads will wait for the first thread to initialize the algo.
-    load_barrier.wait(state.threads() * 2);
-    // gbench ensures that all threads are synchronized at the start of the benchmark loop.
-    // We are accessing shared variables (like current_algo, current_algo_probs) before the
-    // benchmark loop, therefore the synchronization here is necessary.
-  }
-  query_set = dataset->query_set(current_algo_props->query_memory_type);
-
-  /**
-   * Each thread will manage its own outputs
-   */
-  using index_type                 = AnnBase::index_type;
-  constexpr size_t kAlignResultBuf = 64;
-  size_t result_elem_count         = k * query_set_size;
-  result_elem_count =
-    ((result_elem_count + kAlignResultBuf - 1) / kAlignResultBuf) * kAlignResultBuf;
-  auto& result_buf =
-    get_result_buffer_from_global_pool(result_elem_count * (sizeof(float) + sizeof(index_type)));
-  auto* neighbors_ptr =
-    reinterpret_cast<index_type*>(result_buf.data(current_algo_props->query_memory_type));
-  auto* distances_ptr = reinterpret_cast<float*>(neighbors_ptr + result_elem_count);
-
-  {
-    nvtx_case nvtx{state.name()};
-
-    std::unique_ptr<ANN<T>> algo{nullptr};
-    try {
-      dynamic_cast<ANN<T>*>(current_algo.get())->copy().swap(algo);
-    } catch (const std::exception& e) {
-      state.SkipWithError("Algo::copy: " + std::string(e.what()));
-      return;
-    }
-    // Initialize with algo, so that the timer.lap() object can sync with algo::get_sync_stream()
-    cuda_timer gpu_timer{algo};
-    auto start = std::chrono::high_resolution_clock::now();
-    for (auto _ : state) {
-      [[maybe_unused]] auto ntx_lap = nvtx.lap();
-      [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
-      try {
-        algo->search(query_set + batch_offset * dataset->dim(),
-                     n_queries,
-                     k,
-                     neighbors_ptr + out_offset * k,
-                     distances_ptr + out_offset * k);
-      } catch (const std::exception& e) {
-        state.SkipWithError("Benchmark loop: " + std::string(e.what()));
-        break;
-      }
-
-      // advance to the next batch
-      batch_offset = (batch_offset + queries_stride) % query_set_size;
-      out_offset   = (out_offset + n_queries) % query_set_size;
-
-      queries_processed += n_queries;
-    }
-    auto end      = std::chrono::high_resolution_clock::now();
-    auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
-    if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
-    state.counters.insert({"Latency", {duration, benchmark::Counter::kAvgIterations}});
-
-    if (gpu_timer.active()) {
-      state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
-    }
-  }
-
-  state.SetItemsProcessed(queries_processed);
-
-  // This will be the total number of queries across all threads
-  state.counters.insert({{"total_queries", queries_processed}});
-
-  if (state.skipped()) { return; }
-
-  // Each thread calculates recall on their partition of queries.
-  // evaluate recall
-  if (dataset->max_k() >= k) {
-    const std::int32_t* gt    = dataset->gt_set();
-    const std::uint32_t max_k = dataset->max_k();
-    result_buf.transfer_data(MemoryType::Host, current_algo_props->query_memory_type);
-    auto* neighbors_host    = reinterpret_cast<index_type*>(result_buf.data(MemoryType::Host));
-    std::size_t rows        = std::min(queries_processed, query_set_size);
-    std::size_t match_count = 0;
-    std::size_t total_count = rows * static_cast<size_t>(k);
-
-    // We go through the groundtruth with same stride as the benchmark loop.
-    size_t out_offset   = 0;
-    size_t batch_offset = (state.thread_index() * n_queries) % query_set_size;
-    while (out_offset < rows) {
-      for (std::size_t i = 0; i < n_queries; i++) {
-        size_t i_orig_idx = batch_offset + i;
-        size_t i_out_idx  = out_offset + i;
-        if (i_out_idx < rows) {
-          for (std::uint32_t j = 0; j < k; j++) {
-            auto act_idx = std::int32_t(neighbors_host[i_out_idx * k + j]);
-            for (std::uint32_t l = 0; l < k; l++) {
-              auto exp_idx = gt[i_orig_idx * max_k + l];
-              if (act_idx == exp_idx) {
-                match_count++;
-                break;
-              }
-            }
-          }
-        }
-      }
-      out_offset += n_queries;
-      batch_offset = (batch_offset + queries_stride) % query_set_size;
-    }
-    double actual_recall = static_cast<double>(match_count) / static_cast<double>(total_count);
-    state.counters.insert({"Recall", {actual_recall, benchmark::Counter::kAvgThreads}});
-  }
-}
-
-inline void printf_usage()
-{
-  ::benchmark::PrintDefaultHelp();
-  fprintf(stdout,
-          "          [--build|--search] \n"
-          "          [--force]\n"
-          "          [--data_prefix=<prefix>]\n"
-          "          [--index_prefix=<prefix>]\n"
-          "          [--override_kv=<key:value1:value2:...:valueN>]\n"
-          "          [--mode=<latency|throughput>\n"
-          "          [--threads=min[:max]]\n"
-          "          <conf>.json\n"
-          "\n"
-          "Note the non-standard benchmark parameters:\n"
-          "  --build: build mode, will build index\n"
-          "  --search: search mode, will search using the built index\n"
-          "            one and only one of --build and --search should be specified\n"
-          "  --force: force overwriting existing index files\n"
-          "  --data_prefix=<prefix>:"
-          " prepend <prefix> to dataset file paths specified in the <conf>.json (default = "
-          "'data/').\n"
-          "  --index_prefix=<prefix>:"
-          " prepend <prefix> to index file paths specified in the <conf>.json (default = "
-          "'index/').\n"
-          "  --override_kv=<key:value1:value2:...:valueN>:"
-          " override a build/search key one or more times multiplying the number of configurations;"
-          " you can use this parameter multiple times to get the Cartesian product of benchmark"
-          " configs.\n"
-          "  --mode=<latency|throughput>"
-          " run the benchmarks in latency (accumulate times spent in each batch) or "
-          " throughput (pipeline batches and measure end-to-end) mode\n"
-          "  --threads=min[:max] specify the number threads to use for throughput benchmark."
-          " Power of 2 values between 'min' and 'max' will be used. If only 'min' is specified,"
-          " then a single test is run with 'min' threads. By default min=1, max=<num hyper"
-          " threads>.\n");
-}
-
-template <typename T>
-void register_build(std::shared_ptr<const Dataset<T>> dataset,
-                    std::vector<Configuration::Index> indices,
-                    bool force_overwrite)
-{
-  for (auto index : indices) {
-    auto suf      = static_cast<std::string>(index.build_param["override_suffix"]);
-    auto file_suf = suf;
-    index.build_param.erase("override_suffix");
-    std::replace(file_suf.begin(), file_suf.end(), '/', '-');
-    index.file += file_suf;
-    auto* b = ::benchmark::RegisterBenchmark(
-      index.name + suf, bench_build<T>, dataset, index, force_overwrite);
-    b->Unit(benchmark::kSecond);
-    b->MeasureProcessCPUTime();
-    b->UseRealTime();
-  }
-}
-
-template <typename T>
-void register_search(std::shared_ptr<const Dataset<T>> dataset,
-                     std::vector<Configuration::Index> indices,
-                     Objective metric_objective,
-                     const std::vector<int>& threads)
-{
-  for (auto index : indices) {
-    for (std::size_t i = 0; i < index.search_params.size(); i++) {
-      auto suf = static_cast<std::string>(index.search_params[i]["override_suffix"]);
-      index.search_params[i].erase("override_suffix");
-
-      auto* b = ::benchmark::RegisterBenchmark(
-                  index.name + suf, bench_search<T>, index, i, dataset, metric_objective)
-                  ->Unit(benchmark::kMillisecond)
-                  /**
-                   * The following are important for getting accuracy QPS measurements on both CPU
-                   * and GPU These make sure that
-                   *   - `end_to_end` ~ (`Time` * `Iterations`)
-                   *   - `items_per_second` ~ (`total_queries` / `end_to_end`)
-                   *   - Throughput = `items_per_second`
-                   */
-                  ->MeasureProcessCPUTime()
-                  ->UseRealTime();
-      if (metric_objective == Objective::THROUGHPUT) {
-        if (index.algo.find("faiss_gpu") != std::string::npos) {
-          log_warn(
-            "FAISS GPU does not work in throughput mode because the underlying "
-            "StandardGpuResources object is not thread-safe. This will cause unexpected results");
-        }
-        b->ThreadRange(threads[0], threads[1]);
-      }
-    }
-  }
-}
-
-template <typename T>
-void dispatch_benchmark(const Configuration& conf,
-                        bool force_overwrite,
-                        bool build_mode,
-                        bool search_mode,
-                        std::string data_prefix,
-                        std::string index_prefix,
-                        kv_series override_kv,
-                        Objective metric_objective,
-                        const std::vector<int>& threads)
-{
-  if (cudart.found()) {
-    for (auto [key, value] : cuda_info()) {
-      ::benchmark::AddCustomContext(key, value);
-    }
-  }
-  const auto dataset_conf = conf.get_dataset_conf();
-  auto base_file          = combine_path(data_prefix, dataset_conf.base_file);
-  auto query_file         = combine_path(data_prefix, dataset_conf.query_file);
-  auto gt_file            = dataset_conf.groundtruth_neighbors_file;
-  if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); }
-  auto dataset = std::make_shared<BinDataset<T>>(dataset_conf.name,
-                                                 base_file,
-                                                 dataset_conf.subset_first_row,
-                                                 dataset_conf.subset_size,
-                                                 query_file,
-                                                 dataset_conf.distance,
-                                                 gt_file);
-  ::benchmark::AddCustomContext("dataset", dataset_conf.name);
-  ::benchmark::AddCustomContext("distance", dataset_conf.distance);
-  std::vector<Configuration::Index> indices = conf.get_indices();
-  if (build_mode) {
-    if (file_exists(base_file)) {
-      log_info("Using the dataset file '%s'", base_file.c_str());
-      ::benchmark::AddCustomContext("n_records", std::to_string(dataset->base_set_size()));
-      ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
-    } else {
-      log_warn("Dataset file '%s' does not exist; benchmarking index building is impossible.",
-               base_file.c_str());
-    }
-    std::vector<Configuration::Index> more_indices{};
-    for (auto& index : indices) {
-      for (auto param : apply_overrides(index.build_param, override_kv)) {
-        auto modified_index        = index;
-        modified_index.build_param = param;
-        modified_index.file        = combine_path(index_prefix, modified_index.file);
-        more_indices.push_back(modified_index);
-      }
-    }
-    register_build<T>(dataset, more_indices, force_overwrite);
-  } else if (search_mode) {
-    if (file_exists(query_file)) {
-      log_info("Using the query file '%s'", query_file.c_str());
-      ::benchmark::AddCustomContext("max_n_queries", std::to_string(dataset->query_set_size()));
-      ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
-      if (gt_file.has_value()) {
-        if (file_exists(*gt_file)) {
-          log_info("Using the ground truth file '%s'", gt_file->c_str());
-          ::benchmark::AddCustomContext("max_k", std::to_string(dataset->max_k()));
-        } else {
-          log_warn("Ground truth file '%s' does not exist; the recall won't be reported.",
-                   gt_file->c_str());
-        }
-      } else {
-        log_warn(
-          "Ground truth file is not provided; the recall won't be reported. NB: use "
-          "the 'groundtruth_neighbors_file' alongside the 'query_file' key to specify the "
-          "path to "
-          "the ground truth in your conf.json.");
-      }
-    } else {
-      log_warn("Query file '%s' does not exist; benchmarking search is impossible.",
-               query_file.c_str());
-    }
-    for (auto& index : indices) {
-      index.search_params = apply_overrides(index.search_params, override_kv);
-      index.file          = combine_path(index_prefix, index.file);
-    }
-    register_search<T>(dataset, indices, metric_objective, threads);
-  }
-}
-
-inline auto parse_bool_flag(const char* arg, const char* pat, bool& result) -> bool
-{
-  if (strcmp(arg, pat) == 0) {
-    result = true;
-    return true;
-  }
-  return false;
-}
-
-inline auto parse_string_flag(const char* arg, const char* pat, std::string& result) -> bool
-{
-  auto n = strlen(pat);
-  if (strncmp(pat, arg, strlen(pat)) == 0) {
-    result = arg + n + 1;
-    return true;
-  }
-  return false;
-}
-
-inline auto run_main(int argc, char** argv) -> int
-{
-  bool force_overwrite        = false;
-  bool build_mode             = false;
-  bool search_mode            = false;
-  std::string data_prefix     = "data";
-  std::string index_prefix    = "index";
-  std::string new_override_kv = "";
-  std::string mode            = "latency";
-  std::string threads_arg_txt = "";
-  std::vector<int> threads    = {1, -1};  // min_thread, max_thread
-  std::string log_level_str   = "";
-  int raft_log_level          = raft::logger::get(RAFT_NAME).get_level();
-  kv_series override_kv{};
-
-  char arg0_default[] = "benchmark";  // NOLINT
-  char* args_default  = arg0_default;
-  if (!argv) {
-    argc = 1;
-    argv = &args_default;
-  }
-  if (argc == 1) {
-    printf_usage();
-    return -1;
-  }
-
-  char* conf_path = argv[--argc];
-  std::ifstream conf_stream(conf_path);
-
-  for (int i = 1; i < argc; i++) {
-    if (parse_bool_flag(argv[i], "--force", force_overwrite) ||
-        parse_bool_flag(argv[i], "--build", build_mode) ||
-        parse_bool_flag(argv[i], "--search", search_mode) ||
-        parse_string_flag(argv[i], "--data_prefix", data_prefix) ||
-        parse_string_flag(argv[i], "--index_prefix", index_prefix) ||
-        parse_string_flag(argv[i], "--mode", mode) ||
-        parse_string_flag(argv[i], "--override_kv", new_override_kv) ||
-        parse_string_flag(argv[i], "--threads", threads_arg_txt) ||
-        parse_string_flag(argv[i], "--raft_log_level", log_level_str)) {
-      if (!log_level_str.empty()) {
-        raft_log_level = std::stoi(log_level_str);
-        log_level_str  = "";
-      }
-      if (!threads_arg_txt.empty()) {
-        auto threads_arg = split(threads_arg_txt, ':');
-        threads[0]       = std::stoi(threads_arg[0]);
-        if (threads_arg.size() > 1) {
-          threads[1] = std::stoi(threads_arg[1]);
-        } else {
-          threads[1] = threads[0];
-        }
-        threads_arg_txt = "";
-      }
-      if (!new_override_kv.empty()) {
-        auto kvv = split(new_override_kv, ':');
-        auto key = kvv[0];
-        std::vector<nlohmann::json> vals{};
-        for (std::size_t j = 1; j < kvv.size(); j++) {
-          vals.push_back(nlohmann::json::parse(kvv[j]));
-        }
-        override_kv.emplace_back(key, vals);
-        new_override_kv = "";
-      }
-      for (int j = i; j < argc - 1; j++) {
-        argv[j] = argv[j + 1];
-      }
-      argc--;
-      i--;
-    }
-  }
-
-  raft::logger::get(RAFT_NAME).set_level(raft_log_level);
-
-  Objective metric_objective = Objective::LATENCY;
-  if (mode == "throughput") { metric_objective = Objective::THROUGHPUT; }
-
-  int max_threads =
-    (metric_objective == Objective::THROUGHPUT) ? std::thread::hardware_concurrency() : 1;
-  if (threads[1] == -1) threads[1] = max_threads;
-
-  if (metric_objective == Objective::LATENCY) {
-    if (threads[0] != 1 || threads[1] != 1) {
-      log_warn("Latency mode enabled. Overriding threads arg, running with single thread.");
-      threads = {1, 1};
-    }
-  }
-
-  if (build_mode == search_mode) {
-    log_error("One and only one of --build and --search should be specified");
-    printf_usage();
-    return -1;
-  }
-
-  if (!conf_stream) {
-    log_error("Can't open configuration file: %s", conf_path);
-    return -1;
-  }
-
-  if (cudart.needed() && !cudart.found()) {
-    log_warn("cudart library is not found, GPU-based indices won't work.");
-  }
-
-  Configuration conf(conf_stream);
-  std::string dtype = conf.get_dataset_conf().dtype;
-
-  if (dtype == "float") {
-    dispatch_benchmark<float>(conf,
-                              force_overwrite,
-                              build_mode,
-                              search_mode,
-                              data_prefix,
-                              index_prefix,
-                              override_kv,
-                              metric_objective,
-                              threads);
-  } else if (dtype == "half") {
-    dispatch_benchmark<half>(conf,
-                             force_overwrite,
-                             build_mode,
-                             search_mode,
-                             data_prefix,
-                             index_prefix,
-                             override_kv,
-                             metric_objective,
-                             threads);
-  } else if (dtype == "uint8") {
-    dispatch_benchmark<std::uint8_t>(conf,
-                                     force_overwrite,
-                                     build_mode,
-                                     search_mode,
-                                     data_prefix,
-                                     index_prefix,
-                                     override_kv,
-                                     metric_objective,
-                                     threads);
-  } else if (dtype == "int8") {
-    dispatch_benchmark<std::int8_t>(conf,
-                                    force_overwrite,
-                                    build_mode,
-                                    search_mode,
-                                    data_prefix,
-                                    index_prefix,
-                                    override_kv,
-                                    metric_objective,
-                                    threads);
-  } else {
-    log_error("datatype '%s' is not supported", dtype.c_str());
-    return -1;
-  }
-
-  ::benchmark::Initialize(&argc, argv, printf_usage);
-  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return -1;
-  ::benchmark::RunSpecifiedBenchmarks();
-  ::benchmark::Shutdown();
-  // Release a possibly cached ANN object, so that it cannot be alive longer than the handle
-  // to a shared library it depends on (dynamic benchmark executable).
-  current_algo.reset();
-  current_algo_props.reset();
-  reset_global_device_resources();
-  return 0;
-}
-};  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
deleted file mode 100644
index 92ba86c6cf..0000000000
--- a/cpp/bench/ann/src/common/conf.hpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "util.hpp"
-
-#include <iostream>
-#include <optional>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-namespace raft::bench::ann {
-
-class Configuration {
- public:
-  struct Index {
-    std::string name;
-    std::string algo;
-    nlohmann::json build_param;
-    std::string file;
-    std::vector<int> dev_list;
-
-    int batch_size;
-    int k;
-    std::vector<nlohmann::json> search_params;
-  };
-
-  struct DatasetConf {
-    std::string name;
-    std::string base_file;
-    // use only a subset of base_file,
-    // the range of rows is [subset_first_row, subset_first_row + subset_size)
-    // however, subset_size = 0 means using all rows after subset_first_row
-    // that is, the subset is [subset_first_row, #rows in base_file)
-    size_t subset_first_row{0};
-    size_t subset_size{0};
-    std::string query_file;
-    std::string distance;
-    std::optional<std::string> groundtruth_neighbors_file{std::nullopt};
-
-    // data type of input dataset, possible values ["float", "int8", "uint8"]
-    std::string dtype;
-  };
-
-  explicit inline Configuration(std::istream& conf_stream)
-  {
-    // to enable comments in json
-    auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
-
-    parse_dataset_(conf.at("dataset"));
-    parse_index_(conf.at("index"), conf.at("search_basic_param"));
-  }
-
-  [[nodiscard]] inline auto get_dataset_conf() const -> DatasetConf { return dataset_conf_; }
-  [[nodiscard]] inline auto get_indices() const -> std::vector<Index> { return indices_; };
-
- private:
-  inline void parse_dataset_(const nlohmann::json& conf)
-  {
-    dataset_conf_.name       = conf.at("name");
-    dataset_conf_.base_file  = conf.at("base_file");
-    dataset_conf_.query_file = conf.at("query_file");
-    dataset_conf_.distance   = conf.at("distance");
-
-    if (conf.contains("groundtruth_neighbors_file")) {
-      dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file");
-    }
-    if (conf.contains("subset_first_row")) {
-      dataset_conf_.subset_first_row = conf.at("subset_first_row");
-    }
-    if (conf.contains("subset_size")) { dataset_conf_.subset_size = conf.at("subset_size"); }
-
-    if (conf.contains("dtype")) {
-      dataset_conf_.dtype = conf.at("dtype");
-    } else {
-      auto filename = dataset_conf_.base_file;
-      if (filename.size() > 6 && filename.compare(filename.size() - 6, 6, "f16bin") == 0) {
-        dataset_conf_.dtype = "half";
-      } else if (filename.size() > 9 &&
-                 filename.compare(filename.size() - 9, 9, "fp16.fbin") == 0) {
-        dataset_conf_.dtype = "half";
-      } else if (filename.size() > 4 && filename.compare(filename.size() - 4, 4, "fbin") == 0) {
-        dataset_conf_.dtype = "float";
-      } else if (filename.size() > 5 && filename.compare(filename.size() - 5, 5, "u8bin") == 0) {
-        dataset_conf_.dtype = "uint8";
-      } else if (filename.size() > 5 && filename.compare(filename.size() - 5, 5, "i8bin") == 0) {
-        dataset_conf_.dtype = "int8";
-      } else {
-        log_error("Could not determine data type of the dataset %s", filename.c_str());
-      }
-    }
-  }
-  inline void parse_index_(const nlohmann::json& index_conf,
-                           const nlohmann::json& search_basic_conf)
-  {
-    const int batch_size = search_basic_conf.at("batch_size");
-    const int k          = search_basic_conf.at("k");
-
-    for (const auto& conf : index_conf) {
-      Index index;
-      index.name        = conf.at("name");
-      index.algo        = conf.at("algo");
-      index.build_param = conf.at("build_param");
-      index.file        = conf.at("file");
-      index.batch_size  = batch_size;
-      index.k           = k;
-
-      if (conf.contains("multigpu")) {
-        for (auto it : conf.at("multigpu")) {
-          index.dev_list.push_back(it);
-        }
-        if (index.dev_list.empty()) { throw std::runtime_error("dev_list shouln't be empty!"); }
-        index.dev_list.shrink_to_fit();
-        index.build_param["multigpu"] = conf["multigpu"];
-      }
-
-      for (auto param : conf.at("search_params")) {
-        /*  ### Special parameters for backward compatibility ###
-
-          - Local values of `k` and `n_queries` take priority.
-          - The legacy "batch_size" renamed to `n_queries`.
-          - Basic search params are used otherwise.
-        */
-        if (!param.contains("k")) { param["k"] = k; }
-        if (!param.contains("n_queries")) {
-          if (param.contains("batch_size")) {
-            param["n_queries"] = param["batch_size"];
-            param.erase("batch_size");
-          } else {
-            param["n_queries"] = batch_size;
-          }
-        }
-        index.search_params.push_back(param);
-      }
-
-      indices_.push_back(index);
-    }
-  }
-
-  DatasetConf dataset_conf_;
-  std::vector<Index> indices_;
-};
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
deleted file mode 100644
index 27be26dfe9..0000000000
--- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <raft/core/error.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
-
-#include <sys/mman.h>
-
-#include <cstddef>
-#include <cstring>
-
-namespace raft::mr {
-/**
- * @brief `device_memory_resource` derived class that uses mmap to allocate memory.
- * This class enables memory allocation using huge pages.
- * It is assumed that the allocated memory is directly accessible on device. This currently only
- * works on GH systems.
- *
- * TODO(tfeher): consider improving or removing this helper once we made progress with
- * https://github.com/rapidsai/raft/issues/1819
- */
-class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
- public:
-  cuda_huge_page_resource()                                          = default;
-  ~cuda_huge_page_resource() override                                = default;
-  cuda_huge_page_resource(cuda_huge_page_resource const&)            = default;
-  cuda_huge_page_resource(cuda_huge_page_resource&&)                 = default;
-  cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default;
-  cuda_huge_page_resource& operator=(cuda_huge_page_resource&&)      = default;
-
- private:
-  /**
-   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
-   *
-   * The returned pointer has at least 256B alignment.
-   *
-   * @note Stream argument is ignored
-   *
-   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
-   *
-   * @param bytes The size, in bytes, of the allocation
-   * @return void* Pointer to the newly allocated memory
-   */
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
-  {
-    void* _addr{nullptr};
-    _addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    if (_addr == MAP_FAILED) { RAFT_FAIL("huge_page_resource::MAP FAILED"); }
-    if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) {
-      munmap(_addr, bytes);
-      RAFT_FAIL("huge_page_resource::madvise MADV_HUGEPAGE");
-    }
-    memset(_addr, 0, bytes);
-    return _addr;
-  }
-
-  /**
-   * @brief Deallocate memory pointed to by \p p.
-   *
-   * @note Stream argument is ignored.
-   *
-   * @throws Nothing.
-   *
-   * @param p Pointer to be deallocated
-   */
-  void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) override
-  {
-    if (munmap(ptr, size) == -1) { RAFT_FAIL("huge_page_resource::munmap"); }
-  }
-
-  /**
-   * @brief Compare this resource to another.
-   *
-   * Two cuda_huge_page_resources always compare equal, because they can each
-   * deallocate memory allocated by the other.
-   *
-   * @throws Nothing.
-   *
-   * @param other The other resource to compare to
-   * @return true If the two resources are equivalent
-   * @return false If the two resources are not equal
-   */
-  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
-  {
-    return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
-  }
-};
-}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp
deleted file mode 100644
index 3256fc293c..0000000000
--- a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/detail/error.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
-
-#include <cstddef>
-
-namespace raft::mr {
-/**
- * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for
- * allocation/deallocation.
- *
- * This is almost the same as rmm::mr::host::pinned_memory_resource, but it has
- * device_memory_resource as base class. Pinned memory can be accessed from device,
- * and using this allocator we can create device_mdarray backed by pinned allocator.
- *
- * TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm
- * (pinned_memory_resource), but that is incompatible with the container_policy class
- * for device matrix, because the latter expects a device_memory_resource. We shall
- * revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819
- */
-class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
- public:
-  cuda_pinned_resource()                                       = default;
-  ~cuda_pinned_resource() override                             = default;
-  cuda_pinned_resource(cuda_pinned_resource const&)            = default;
-  cuda_pinned_resource(cuda_pinned_resource&&)                 = default;
-  cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default;
-  cuda_pinned_resource& operator=(cuda_pinned_resource&&)      = default;
-
- private:
-  /**
-   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
-   *
-   * The returned pointer has at least 256B alignment.
-   *
-   * @note Stream argument is ignored
-   *
-   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
-   *
-   * @param bytes The size, in bytes, of the allocation
-   * @return void* Pointer to the newly allocated memory
-   */
-  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
-  {
-    void* ptr{nullptr};
-    RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes));
-    return ptr;
-  }
-
-  /**
-   * @brief Deallocate memory pointed to by \p p.
-   *
-   * @note Stream argument is ignored.
-   *
-   * @throws Nothing.
-   *
-   * @param p Pointer to be deallocated
-   */
-  void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override
-  {
-    RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
-  }
-
-  /**
-   * @brief Compare this resource to another.
-   *
-   * Two cuda_pinned_resources always compare equal, because they can each
-   * deallocate memory allocated by the other.
-   *
-   * @throws Nothing.
-   *
-   * @param other The other resource to compare to
-   * @return true If the two resources are equivalent
-   * @return false If the two resources are not equal
-   */
-  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
-  {
-    return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
-  }
-};
-}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
deleted file mode 100644
index 5ed138a86d..0000000000
--- a/cpp/bench/ann/src/common/cuda_stub.hpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-The content of this header is governed by two preprocessor definitions:
-
-  - BUILD_CPU_ONLY - whether none of the CUDA functions are used.
-  - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined.
-
-___________________________________________________________________________________
-|BUILD_CPU_ONLY | ANN_BENCH_LINK_CUDART |         cudart      | cuda_runtime_api.h |
-|               |                       |  found    |  needed |      included      |
-|---------------|-----------------------|-----------|---------|--------------------|
-|   ON          |    <not defined>      |  false    |  false  |       NO           |
-|   ON          |   "cudart.so.xx.xx"   |  false    |  false  |       NO           |
-|  OFF          |     <not defined>     |   true    |   true  |      YES           |
-|  OFF          |   "cudart.so.xx.xx"   | <runtime> |   true  |      YES           |
-------------------------------------------------------------------------------------
-*/
-
-#pragma once
-
-#ifndef BUILD_CPU_ONLY
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-#ifdef ANN_BENCH_LINK_CUDART
-#include <dlfcn.h>
-
-#include <cstring>
-#endif
-#else
-#include <cstdint>
-
-typedef void* cudaStream_t;
-typedef void* cudaEvent_t;
-typedef uint16_t half;
-#endif
-
-namespace raft::bench::ann {
-
-struct cuda_lib_handle {
-  void* handle{nullptr};
-  explicit cuda_lib_handle()
-  {
-#ifdef ANN_BENCH_LINK_CUDART
-    constexpr int kFlags = RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE;
-    // The full name of the linked cudart library 'cudart.so.MAJOR.MINOR.PATCH'
-    char libname[] = ANN_BENCH_LINK_CUDART;  // NOLINT
-    handle         = dlopen(ANN_BENCH_LINK_CUDART, kFlags);
-    if (handle != nullptr) { return; }
-    // try strip the PATCH
-    auto p = strrchr(libname, '.');
-    p[0]   = 0;
-    handle = dlopen(libname, kFlags);
-    if (handle != nullptr) { return; }
-    // try set the MINOR version to 0
-    p      = strrchr(libname, '.');
-    p[1]   = '0';
-    p[2]   = 0;
-    handle = dlopen(libname, kFlags);
-    if (handle != nullptr) { return; }
-    // try strip the MINOR
-    p[0]   = 0;
-    handle = dlopen(libname, kFlags);
-    if (handle != nullptr) { return; }
-    // try strip the MAJOR
-    p      = strrchr(libname, '.');
-    p[0]   = 0;
-    handle = dlopen(libname, kFlags);
-#endif
-  }
-  ~cuda_lib_handle() noexcept
-  {
-#ifdef ANN_BENCH_LINK_CUDART
-    if (handle != nullptr) { dlclose(handle); }
-#endif
-  }
-
-  template <typename Symbol>
-  auto sym(const char* name) -> Symbol
-  {
-#ifdef ANN_BENCH_LINK_CUDART
-    return reinterpret_cast<Symbol>(dlsym(handle, name));
-#else
-    return nullptr;
-#endif
-  }
-
-  /** Whether this is NOT a cpu-only package. */
-  [[nodiscard]] constexpr inline auto needed() const -> bool
-  {
-#if defined(BUILD_CPU_ONLY)
-    return false;
-#else
-    return true;
-#endif
-  }
-
-  /** CUDA found, either at compile time or at runtime. */
-  [[nodiscard]] inline auto found() const -> bool
-  {
-#if defined(BUILD_CPU_ONLY)
-    return false;
-#elif defined(ANN_BENCH_LINK_CUDART)
-    return handle != nullptr;
-#else
-    return true;
-#endif
-  }
-};
-
-static inline cuda_lib_handle cudart{};
-
-#ifdef ANN_BENCH_LINK_CUDART
-namespace stub {
-
-[[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
-                                                    const void* src,
-                                                    size_t count,
-                                                    enum cudaMemcpyKind kind)
-{
-  return cudaSuccess;
-}
-
-[[gnu::weak, gnu::noinline]] cudaError_t cudaMalloc(void** ptr, size_t size)
-{
-  *ptr = nullptr;
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaMemset(void* devPtr, int value, size_t count)
-{
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaFree(void* devPtr) { return cudaSuccess; }
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreate(cudaStream_t* pStream)
-{
-  *pStream = 0;
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreateWithFlags(cudaStream_t* pStream,
-                                                                   unsigned int flags)
-{
-  *pStream = 0;
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamDestroy(cudaStream_t pStream)
-{
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaDeviceSynchronize() { return cudaSuccess; }
-
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamSynchronize(cudaStream_t pStream)
-{
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventCreate(cudaEvent_t* event)
-{
-  *event = 0;
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream)
-{
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventSynchronize(cudaEvent_t event)
-{
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventElapsedTime(float* ms,
-                                                              cudaEvent_t start,
-                                                              cudaEvent_t end)
-{
-  *ms = 0;
-  return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventDestroy(cudaEvent_t event) { return cudaSuccess; }
-[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDevice(int* device)
-{
-  *device = 0;
-  return cudaSuccess;
-};
-[[gnu::weak, gnu::noinline]] cudaError_t cudaDriverGetVersion(int* driver)
-{
-  *driver = 0;
-  return cudaSuccess;
-};
-[[gnu::weak, gnu::noinline]] cudaError_t cudaRuntimeGetVersion(int* runtime)
-{
-  *runtime = 0;
-  return cudaSuccess;
-};
-[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp* prop,
-                                                                 int device)
-{
-  *prop = cudaDeviceProp{};
-  return cudaSuccess;
-}
-
-}  // namespace stub
-
-#define RAFT_DECLARE_CUDART(fun)           \
-  static inline decltype(&stub::fun) fun = \
-    cudart.found() ? cudart.sym<decltype(&stub::fun)>(#fun) : &stub::fun
-
-RAFT_DECLARE_CUDART(cudaMemcpy);
-RAFT_DECLARE_CUDART(cudaMalloc);
-RAFT_DECLARE_CUDART(cudaMemset);
-RAFT_DECLARE_CUDART(cudaFree);
-RAFT_DECLARE_CUDART(cudaStreamCreate);
-RAFT_DECLARE_CUDART(cudaStreamCreateWithFlags);
-RAFT_DECLARE_CUDART(cudaStreamDestroy);
-RAFT_DECLARE_CUDART(cudaDeviceSynchronize);
-RAFT_DECLARE_CUDART(cudaStreamSynchronize);
-RAFT_DECLARE_CUDART(cudaEventCreate);
-RAFT_DECLARE_CUDART(cudaEventRecord);
-RAFT_DECLARE_CUDART(cudaEventSynchronize);
-RAFT_DECLARE_CUDART(cudaEventElapsedTime);
-RAFT_DECLARE_CUDART(cudaEventDestroy);
-RAFT_DECLARE_CUDART(cudaGetDevice);
-RAFT_DECLARE_CUDART(cudaDriverGetVersion);
-RAFT_DECLARE_CUDART(cudaRuntimeGetVersion);
-RAFT_DECLARE_CUDART(cudaGetDeviceProperties);
-
-#undef RAFT_DECLARE_CUDART
-#endif
-
-};  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp
deleted file mode 100644
index 8fcff77d3c..0000000000
--- a/cpp/bench/ann/src/common/dataset.hpp
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "util.hpp"
-
-#include <errno.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-
-#include <cassert>
-#include <cstdint>
-#include <cstdio>
-#include <optional>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <vector>
-
-namespace raft::bench::ann {
-
-// http://big-ann-benchmarks.com/index.html:
-// binary format that starts with 8 bytes of data consisting of num_points(uint32_t)
-// num_dimensions(uint32) followed by num_pts x num_dimensions x sizeof(type) bytes of
-// data stored one vector after another.
-// Data files will have suffixes .fbin, .u8bin, and .i8bin to represent float32, uint8
-// and int8 type data.
-// As extensions for this benchmark, half and int data files will have suffixes .f16bin
-// and .ibin, respectively.
-template <typename T>
-class BinFile {
- public:
-  BinFile(const std::string& file,
-          const std::string& mode,
-          uint32_t subset_first_row = 0,
-          uint32_t subset_size      = 0);
-  ~BinFile()
-  {
-    if (mapped_ptr_ != nullptr) { unmap(); }
-    if (fp_ != nullptr) { fclose(fp_); }
-  }
-  BinFile(const BinFile&)            = delete;
-  BinFile& operator=(const BinFile&) = delete;
-
-  void get_shape(size_t* nrows, int* ndims) const
-  {
-    assert(read_mode_);
-    if (!fp_) { open_file_(); }
-    *nrows = nrows_;
-    *ndims = ndims_;
-  }
-
-  void read(T* data) const
-  {
-    assert(read_mode_);
-    if (!fp_) { open_file_(); }
-    size_t total = static_cast<size_t>(nrows_) * ndims_;
-    if (fread(data, sizeof(T), total, fp_) != total) {
-      throw std::runtime_error("fread() BinFile " + file_ + " failed");
-    }
-  }
-
-  void write(const T* data, uint32_t nrows, uint32_t ndims)
-  {
-    assert(!read_mode_);
-    if (!fp_) { open_file_(); }
-    if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) {
-      throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
-    }
-    if (fwrite(&ndims, sizeof(uint32_t), 1, fp_) != 1) {
-      throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
-    }
-
-    size_t total = static_cast<size_t>(nrows) * ndims;
-    if (fwrite(data, sizeof(T), total, fp_) != total) {
-      throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
-    }
-  }
-
-  T* map() const
-  {
-    assert(read_mode_);
-    if (!fp_) { open_file_(); }
-    int fid     = fileno(fp_);
-    mapped_ptr_ = mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
-    if (mapped_ptr_ == MAP_FAILED) {
-      mapped_ptr_ = nullptr;
-      throw std::runtime_error("mmap error: Value of errno " + std::to_string(errno) + ", " +
-                               std::string(strerror(errno)));
-    }
-    return reinterpret_cast<T*>(reinterpret_cast<uint8_t*>(mapped_ptr_) + 2 * sizeof(uint32_t) +
-                                subset_first_row_ * ndims_ * sizeof(T));
-  }
-
-  void unmap() const
-  {
-    if (munmap(mapped_ptr_, file_size_) == -1) {
-      throw std::runtime_error("munmap error: " + std::string(strerror(errno)));
-    }
-  }
-
- private:
-  void check_suffix_();
-  void open_file_() const;
-
-  std::string file_;
-  bool read_mode_;
-  uint32_t subset_first_row_;
-  uint32_t subset_size_;
-
-  mutable FILE* fp_{nullptr};
-  mutable uint32_t nrows_;
-  mutable uint32_t ndims_;
-  mutable size_t file_size_;
-  mutable void* mapped_ptr_{nullptr};
-};
-
-template <typename T>
-BinFile<T>::BinFile(const std::string& file,
-                    const std::string& mode,
-                    uint32_t subset_first_row,
-                    uint32_t subset_size)
-  : file_(file),
-    read_mode_(mode == "r"),
-    subset_first_row_(subset_first_row),
-    subset_size_(subset_size),
-    fp_(nullptr)
-{
-  check_suffix_();
-
-  if (!read_mode_) {
-    if (mode == "w") {
-      if (subset_first_row != 0) {
-        throw std::runtime_error("subset_first_row should be zero for write mode");
-      }
-      if (subset_size != 0) {
-        throw std::runtime_error("subset_size should be zero for write mode");
-      }
-    } else {
-      throw std::runtime_error("BinFile's mode must be either 'r' or 'w': " + file_);
-    }
-  }
-}
-
-template <typename T>
-void BinFile<T>::open_file_() const
-{
-  fp_ = fopen(file_.c_str(), read_mode_ ? "r" : "w");
-  if (!fp_) { throw std::runtime_error("open BinFile failed: " + file_); }
-
-  if (read_mode_) {
-    struct stat statbuf;
-    if (stat(file_.c_str(), &statbuf) != 0) { throw std::runtime_error("stat() failed: " + file_); }
-    file_size_ = statbuf.st_size;
-
-    uint32_t header[2];
-    if (fread(header, sizeof(uint32_t), 2, fp_) != 2) {
-      throw std::runtime_error("read header of BinFile failed: " + file_);
-    }
-    nrows_ = header[0];
-    ndims_ = header[1];
-
-    size_t expected_file_size =
-      2 * sizeof(uint32_t) + static_cast<size_t>(nrows_) * ndims_ * sizeof(T);
-    if (file_size_ != expected_file_size) {
-      throw std::runtime_error("expected file size of " + file_ + " is " +
-                               std::to_string(expected_file_size) + ", however, actual size is " +
-                               std::to_string(file_size_));
-    }
-
-    if (subset_first_row_ >= nrows_) {
-      throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
-                               ") >= nrows (" + std::to_string(nrows_) + ")");
-    }
-    if (subset_first_row_ + subset_size_ > nrows_) {
-      throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
-                               ") + subset_size (" + std::to_string(subset_size_) + ") > nrows (" +
-                               std::to_string(nrows_) + ")");
-    }
-
-    if (subset_first_row_) {
-      static_assert(sizeof(long) == 8, "fseek() don't support 64-bit offset");
-      if (fseek(fp_, sizeof(T) * subset_first_row_ * ndims_, SEEK_CUR) == -1) {
-        throw std::runtime_error(file_ + ": fseek failed");
-      }
-      nrows_ -= subset_first_row_;
-    }
-    if (subset_size_) { nrows_ = subset_size_; }
-  }
-}
-
-template <typename T>
-void BinFile<T>::check_suffix_()
-{
-  auto pos = file_.rfind('.');
-  if (pos == std::string::npos) {
-    throw std::runtime_error("name of BinFile doesn't have a suffix: " + file_);
-  }
-  std::string suffix = file_.substr(pos + 1);
-
-  if constexpr (std::is_same_v<T, float>) {
-    if (suffix != "fbin") {
-      throw std::runtime_error("BinFile<float> should has .fbin suffix: " + file_);
-    }
-  } else if constexpr (std::is_same_v<T, half>) {
-    if (suffix != "f16bin" && suffix != "fbin") {
-      throw std::runtime_error("BinFile<half> should has .f16bin suffix: " + file_);
-    }
-  } else if constexpr (std::is_same_v<T, int>) {
-    if (suffix != "ibin") {
-      throw std::runtime_error("BinFile<int> should has .ibin suffix: " + file_);
-    }
-  } else if constexpr (std::is_same_v<T, uint8_t>) {
-    if (suffix != "u8bin") {
-      throw std::runtime_error("BinFile<uint8_t> should has .u8bin suffix: " + file_);
-    }
-  } else if constexpr (std::is_same_v<T, int8_t>) {
-    if (suffix != "i8bin") {
-      throw std::runtime_error("BinFile<int8_t> should has .i8bin suffix: " + file_);
-    }
-  } else {
-    throw std::runtime_error(
-      "T of BinFile<T> should be one of float, half, int, uint8_t, or int8_t");
-  }
-}
-
-template <typename T>
-class Dataset {
- public:
-  Dataset(const std::string& name) : name_(name) {}
-  Dataset(const std::string& name, const std::string& distance) : name_(name), distance_(distance)
-  {
-  }
-  Dataset(const Dataset&)            = delete;
-  Dataset& operator=(const Dataset&) = delete;
-  virtual ~Dataset();
-
-  std::string name() const { return name_; }
-  std::string distance() const { return distance_; }
-  virtual int dim() const               = 0;
-  virtual uint32_t max_k() const        = 0;
-  virtual size_t base_set_size() const  = 0;
-  virtual size_t query_set_size() const = 0;
-
-  // load data lazily, so don't pay the overhead of reading unneeded set
-  // e.g. don't load base set when searching
-  const T* base_set() const
-  {
-    if (!base_set_) { load_base_set_(); }
-    return base_set_;
-  }
-
-  const T* query_set() const
-  {
-    if (!query_set_) { load_query_set_(); }
-    return query_set_;
-  }
-
-  const int32_t* gt_set() const
-  {
-    if (!gt_set_) { load_gt_set_(); }
-    return gt_set_;
-  }
-
-  const T* base_set_on_gpu() const;
-  const T* query_set_on_gpu() const;
-  const T* mapped_base_set() const;
-
-  auto query_set(MemoryType memory_type) const -> const T*
-  {
-    switch (memory_type) {
-      case MemoryType::Device: return query_set_on_gpu();
-      default: return query_set();
-    }
-  }
-
-  auto base_set(MemoryType memory_type) const -> const T*
-  {
-    switch (memory_type) {
-      case MemoryType::Device: return base_set_on_gpu();
-      case MemoryType::Host: return base_set();
-      case MemoryType::HostMmap: return mapped_base_set();
-      default: return nullptr;
-    }
-  }
-
- protected:
-  virtual void load_base_set_() const  = 0;
-  virtual void load_gt_set_() const    = 0;
-  virtual void load_query_set_() const = 0;
-  virtual void map_base_set_() const   = 0;
-
-  std::string name_;
-  std::string distance_;
-
-  mutable T* base_set_        = nullptr;
-  mutable T* query_set_       = nullptr;
-  mutable T* d_base_set_      = nullptr;
-  mutable T* d_query_set_     = nullptr;
-  mutable T* mapped_base_set_ = nullptr;
-  mutable int32_t* gt_set_    = nullptr;
-};
-
-template <typename T>
-Dataset<T>::~Dataset()
-{
-  delete[] base_set_;
-  delete[] query_set_;
-  delete[] gt_set_;
-#ifndef BUILD_CPU_ONLY
-  if (d_base_set_) { cudaFree(d_base_set_); }
-  if (d_query_set_) { cudaFree(d_query_set_); }
-#endif
-}
-
-template <typename T>
-const T* Dataset<T>::base_set_on_gpu() const
-{
-#ifndef BUILD_CPU_ONLY
-  if (!d_base_set_) {
-    base_set();
-    cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T));
-    cudaMemcpy(d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
-  }
-#endif
-  return d_base_set_;
-}
-
-template <typename T>
-const T* Dataset<T>::query_set_on_gpu() const
-{
-#ifndef BUILD_CPU_ONLY
-  if (!d_query_set_) {
-    query_set();
-    cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T));
-    cudaMemcpy(
-      d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
-  }
-#endif
-  return d_query_set_;
-}
-
-template <typename T>
-const T* Dataset<T>::mapped_base_set() const
-{
-  if (!mapped_base_set_) { map_base_set_(); }
-  return mapped_base_set_;
-}
-
-template <typename T>
-class BinDataset : public Dataset<T> {
- public:
-  BinDataset(const std::string& name,
-             const std::string& base_file,
-             size_t subset_first_row,
-             size_t subset_size,
-             const std::string& query_file,
-             const std::string& distance,
-             const std::optional<std::string>& groundtruth_neighbors_file);
-
-  int dim() const override;
-  uint32_t max_k() const override;
-  size_t base_set_size() const override;
-  size_t query_set_size() const override;
-
- private:
-  void load_base_set_() const override;
-  void load_query_set_() const override;
-  void load_gt_set_() const override;
-  void map_base_set_() const override;
-
-  mutable int dim_               = 0;
-  mutable uint32_t max_k_        = 0;
-  mutable size_t base_set_size_  = 0;
-  mutable size_t query_set_size_ = 0;
-
-  BinFile<T> base_file_;
-  BinFile<T> query_file_;
-  std::optional<BinFile<std::int32_t>> gt_file_{std::nullopt};
-};
-
-template <typename T>
-BinDataset<T>::BinDataset(const std::string& name,
-                          const std::string& base_file,
-                          size_t subset_first_row,
-                          size_t subset_size,
-                          const std::string& query_file,
-                          const std::string& distance,
-                          const std::optional<std::string>& groundtruth_neighbors_file)
-  : Dataset<T>(name, distance),
-    base_file_(base_file, "r", subset_first_row, subset_size),
-    query_file_(query_file, "r")
-{
-  if (groundtruth_neighbors_file.has_value()) {
-    gt_file_.emplace(groundtruth_neighbors_file.value(), "r");
-  }
-}
-
-template <typename T>
-int BinDataset<T>::dim() const
-{
-  if (dim_ > 0) { return dim_; }
-  if (base_set_size() > 0) { return dim_; }
-  if (query_set_size() > 0) { return dim_; }
-  return dim_;
-}
-
-template <typename T>
-uint32_t BinDataset<T>::max_k() const
-{
-  if (!this->gt_set_) { load_gt_set_(); }
-  return max_k_;
-}
-
-template <typename T>
-size_t BinDataset<T>::query_set_size() const
-{
-  if (query_set_size_ > 0) { return query_set_size_; }
-  int dim;
-  query_file_.get_shape(&query_set_size_, &dim);
-  if (query_set_size_ == 0) { throw std::runtime_error("Zero query set size"); }
-  if (dim == 0) { throw std::runtime_error("Zero query set dim"); }
-  if (dim_ == 0) {
-    dim_ = dim;
-  } else if (dim_ != dim) {
-    throw std::runtime_error("base set dim (" + std::to_string(dim_) + ") != query set dim (" +
-                             std::to_string(dim));
-  }
-  return query_set_size_;
-}
-
-template <typename T>
-size_t BinDataset<T>::base_set_size() const
-{
-  if (base_set_size_ > 0) { return base_set_size_; }
-  int dim;
-  base_file_.get_shape(&base_set_size_, &dim);
-  if (base_set_size_ == 0) { throw std::runtime_error("Zero base set size"); }
-  if (dim == 0) { throw std::runtime_error("Zero base set dim"); }
-  if (dim_ == 0) {
-    dim_ = dim;
-  } else if (dim_ != dim) {
-    throw std::runtime_error("base set dim (" + std::to_string(dim) + ") != query set dim (" +
-                             std::to_string(dim_));
-  }
-  return base_set_size_;
-}
-
-template <typename T>
-void BinDataset<T>::load_base_set_() const
-{
-  this->base_set_ = new T[base_set_size() * dim()];
-  base_file_.read(this->base_set_);
-}
-
-template <typename T>
-void BinDataset<T>::load_query_set_() const
-{
-  this->query_set_ = new T[query_set_size() * dim()];
-  query_file_.read(this->query_set_);
-}
-
-template <typename T>
-void BinDataset<T>::load_gt_set_() const
-{
-  if (gt_file_.has_value()) {
-    size_t queries;
-    int k;
-    gt_file_->get_shape(&queries, &k);
-    this->gt_set_ = new std::int32_t[queries * k];
-    gt_file_->read(this->gt_set_);
-    max_k_ = k;
-  }
-}
-
-template <typename T>
-void BinDataset<T>::map_base_set_() const
-{
-  this->mapped_base_set_ = base_file_.map();
-}
-
-}  // namespace  raft::bench::ann
diff --git a/cpp/bench/ann/src/common/thread_pool.hpp b/cpp/bench/ann/src/common/thread_pool.hpp
deleted file mode 100644
index 4a5684ecb3..0000000000
--- a/cpp/bench/ann/src/common/thread_pool.hpp
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <omp.h>
-
-#include <atomic>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-
-class FixedThreadPool {
- public:
-  FixedThreadPool(int num_threads)
-  {
-    if (num_threads < 1) {
-      throw std::runtime_error("num_threads must >= 1");
-    } else if (num_threads == 1) {
-      return;
-    }
-
-    tasks_ = new Task_[num_threads];
-
-    threads_.reserve(num_threads);
-    for (int i = 0; i < num_threads; ++i) {
-      threads_.emplace_back([&, i] {
-        auto& task = tasks_[i];
-        while (true) {
-          std::unique_lock<std::mutex> lock(task.mtx);
-          task.cv.wait(lock,
-                       [&] { return task.has_task || finished_.load(std::memory_order_relaxed); });
-          if (finished_.load(std::memory_order_relaxed)) { break; }
-
-          task.task();
-          task.has_task = false;
-        }
-      });
-    }
-  }
-
-  ~FixedThreadPool()
-  {
-    if (threads_.empty()) { return; }
-
-    finished_.store(true, std::memory_order_relaxed);
-    for (unsigned i = 0; i < threads_.size(); ++i) {
-      auto& task = tasks_[i];
-      std::lock_guard<std::mutex>(task.mtx);
-
-      task.cv.notify_one();
-      threads_[i].join();
-    }
-
-    delete[] tasks_;
-  }
-
-  template <typename Func, typename IdxT>
-  void submit(Func f, IdxT len)
-  {
-    // Run functions in main thread if thread pool has no threads
-    if (threads_.empty()) {
-      for (IdxT i = 0; i < len; ++i) {
-        f(i);
-      }
-      return;
-    }
-
-    const int num_threads = threads_.size();
-    // one extra part for competition among threads
-    const IdxT items_per_thread = len / (num_threads + 1);
-    std::atomic<IdxT> cnt(items_per_thread * num_threads);
-
-    // Wrap function
-    auto wrapped_f = [&](IdxT start, IdxT end) {
-      for (IdxT i = start; i < end; ++i) {
-        f(i);
-      }
-
-      while (true) {
-        IdxT i = cnt.fetch_add(1, std::memory_order_relaxed);
-        if (i >= len) { break; }
-        f(i);
-      }
-    };
-
-    std::vector<std::future<void>> futures;
-    futures.reserve(num_threads);
-    for (int i = 0; i < num_threads; ++i) {
-      IdxT start = i * items_per_thread;
-      auto& task = tasks_[i];
-      {
-        std::lock_guard lock(task.mtx);
-        (void)lock;  // stop nvcc warning
-        task.task = std::packaged_task<void()>([=] { wrapped_f(start, start + items_per_thread); });
-        futures.push_back(task.task.get_future());
-        task.has_task = true;
-      }
-      task.cv.notify_one();
-    }
-
-    for (auto& fut : futures) {
-      fut.wait();
-    }
-    return;
-  }
-
- private:
-  struct alignas(64) Task_ {
-    std::mutex mtx;
-    std::condition_variable cv;
-    bool has_task = false;
-    std::packaged_task<void()> task;
-  };
-
-  Task_* tasks_;
-  std::vector<std::thread> threads_;
-  std::atomic<bool> finished_{false};
-};
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
deleted file mode 100644
index 96185c79eb..0000000000
--- a/cpp/bench/ann/src/common/util.hpp
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "ann_types.hpp"
-#include "cuda_stub.hpp"  // cuda-related utils
-
-#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
-#include <nvtx3/nvToolsExt.h>
-#endif
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <atomic>
-#include <chrono>
-#include <condition_variable>
-#include <cstdio>
-#include <cstring>
-#include <ctime>
-#include <filesystem>
-#include <functional>
-#include <mutex>
-#include <optional>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace raft::bench::ann {
-
-/**
- * Current thread id as given by the benchmark State.
- * It's populated on every call of a benchmark case.
- * It's relevant in the 'throughput' mode of the search benchmarks,
- * where some algorithms might want to coordinate allocation of the resources.
- */
-inline thread_local int benchmark_thread_id = 0;
-/**
- * Total concurrent thread count as given by the benchmark State.
- * It's populated on every call of a benchmark case.
- * It's relevant in the 'throughput' mode of the search benchmarks,
- * where some algorithms might want to coordinate allocation of the resources.
- */
-inline thread_local int benchmark_n_threads = 1;
-
-struct cuda_timer {
- private:
-  std::optional<cudaStream_t> stream_;
-  cudaEvent_t start_{nullptr};
-  cudaEvent_t stop_{nullptr};
-  double total_time_{0};
-
-  template <typename AnnT>
-  static inline auto extract_stream(AnnT* algo) -> std::optional<cudaStream_t>
-  {
-    auto gpu_ann = dynamic_cast<AnnGPU*>(algo);
-    if (gpu_ann != nullptr && gpu_ann->uses_stream()) {
-      return std::make_optional(gpu_ann->get_sync_stream());
-    }
-    return std::nullopt;
-  }
-
- public:
-  struct cuda_lap {
-   private:
-    cudaStream_t stream_;
-    cudaEvent_t start_;
-    cudaEvent_t stop_;
-    double& total_time_;
-
-   public:
-    cuda_lap(cudaStream_t stream, cudaEvent_t start, cudaEvent_t stop, double& total_time)
-      : start_(start), stop_(stop), stream_(stream), total_time_(total_time)
-    {
-#ifndef BUILD_CPU_ONLY
-      cudaEventRecord(start_, stream_);
-#endif
-    }
-    cuda_lap() = delete;
-
-    ~cuda_lap() noexcept
-    {
-#ifndef BUILD_CPU_ONLY
-      cudaEventRecord(stop_, stream_);
-      cudaEventSynchronize(stop_);
-      float milliseconds = 0.0f;
-      cudaEventElapsedTime(&milliseconds, start_, stop_);
-      total_time_ += milliseconds / 1000.0;
-#endif
-    }
-  };
-
-  explicit cuda_timer(std::optional<cudaStream_t> stream) : stream_{stream}
-  {
-#ifndef BUILD_CPU_ONLY
-    if (stream_.has_value()) {
-      cudaEventCreate(&stop_);
-      cudaEventCreate(&start_);
-    }
-#endif
-  }
-
-  template <typename AnnT>
-  explicit cuda_timer(const std::unique_ptr<AnnT>& algo) : cuda_timer{extract_stream(algo.get())}
-  {
-  }
-
-  ~cuda_timer() noexcept
-  {
-#ifndef BUILD_CPU_ONLY
-    if (stream_.has_value()) {
-      cudaStreamSynchronize(stream_.value());
-      cudaEventDestroy(start_);
-      cudaEventDestroy(stop_);
-    }
-#endif
-  }
-
-  cuda_timer()                                     = delete;
-  cuda_timer(cuda_timer const&)                    = delete;
-  cuda_timer(cuda_timer&&)                         = delete;
-  auto operator=(cuda_timer const&) -> cuda_timer& = delete;
-  auto operator=(cuda_timer&&) -> cuda_timer&      = delete;
-
-  [[nodiscard]] auto stream() const -> std::optional<cudaStream_t> { return stream_; }
-
-  [[nodiscard]] auto active() const -> bool { return stream_.has_value(); }
-
-  [[nodiscard]] auto total_time() const -> double { return total_time_; }
-
-  [[nodiscard]] auto lap(bool enabled = true) -> std::optional<cuda_timer::cuda_lap>
-  {
-    return enabled && stream_.has_value()
-             ? std::make_optional<cuda_timer::cuda_lap>(stream_.value(), start_, stop_, total_time_)
-             : std::nullopt;
-  }
-};
-
-#ifndef BUILD_CPU_ONLY
-// ATM, rmm::stream does not support passing in flags; hence this helper type.
-struct non_blocking_stream {
-  non_blocking_stream() { cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking); }
-  ~non_blocking_stream() noexcept
-  {
-    if (stream_ != nullptr) { cudaStreamDestroy(stream_); }
-  }
-  non_blocking_stream(non_blocking_stream const&) = delete;
-  non_blocking_stream(non_blocking_stream&& other) noexcept { std::swap(stream_, other.stream_); }
-  auto operator=(non_blocking_stream const&) -> non_blocking_stream& = delete;
-  auto operator=(non_blocking_stream&&) -> non_blocking_stream&      = delete;
-  [[nodiscard]] auto view() const noexcept -> cudaStream_t { return stream_; }
-
- private:
-  cudaStream_t stream_{nullptr};
-};
-
-namespace detail {
-inline std::vector<non_blocking_stream> global_stream_pool(0);
-inline std::mutex gsp_mutex;
-}  // namespace detail
-#endif
-
-/**
- * Get a stream associated with the current benchmark thread.
- *
- * Note, the streams are reused between the benchmark cases.
- * This makes it easier to profile and analyse multiple benchmark cases in one timeline using tools
- * like nsys.
- */
-inline auto get_stream_from_global_pool() -> cudaStream_t
-{
-#ifndef BUILD_CPU_ONLY
-  std::lock_guard guard(detail::gsp_mutex);
-  if (int(detail::global_stream_pool.size()) < benchmark_n_threads) {
-    detail::global_stream_pool.resize(benchmark_n_threads);
-  }
-  return detail::global_stream_pool[benchmark_thread_id].view();
-#else
-  return nullptr;
-#endif
-}
-
-struct result_buffer {
-  explicit result_buffer(size_t size, cudaStream_t stream) : size_{size}, stream_{stream}
-  {
-    if (size_ == 0) { return; }
-    data_host_ = malloc(size_);
-#ifndef BUILD_CPU_ONLY
-    cudaMallocAsync(&data_device_, size_, stream_);
-    cudaStreamSynchronize(stream_);
-#endif
-  }
-  result_buffer()                                = delete;
-  result_buffer(result_buffer&&)                 = delete;
-  result_buffer& operator=(result_buffer&&)      = delete;
-  result_buffer(const result_buffer&)            = delete;
-  result_buffer& operator=(const result_buffer&) = delete;
-  ~result_buffer() noexcept
-  {
-    if (size_ == 0) { return; }
-#ifndef BUILD_CPU_ONLY
-    cudaFreeAsync(data_device_, stream_);
-    cudaStreamSynchronize(stream_);
-#endif
-    free(data_host_);
-  }
-
-  [[nodiscard]] auto size() const noexcept { return size_; }
-  [[nodiscard]] auto data(ann::MemoryType loc) const noexcept
-  {
-    switch (loc) {
-      case MemoryType::Device: return data_device_;
-      default: return data_host_;
-    }
-  }
-
-  void transfer_data(ann::MemoryType dst, ann::MemoryType src)
-  {
-    auto dst_ptr = data(dst);
-    auto src_ptr = data(src);
-    if (dst_ptr == src_ptr) { return; }
-#ifndef BUILD_CPU_ONLY
-    cudaMemcpyAsync(dst_ptr, src_ptr, size_, cudaMemcpyDefault, stream_);
-    cudaStreamSynchronize(stream_);
-#endif
-  }
-
- private:
-  size_t size_{0};
-  cudaStream_t stream_ = nullptr;
-  void* data_host_     = nullptr;
-  void* data_device_   = nullptr;
-};
-
-namespace detail {
-inline std::vector<std::unique_ptr<result_buffer>> global_result_buffer_pool(0);
-inline std::mutex grp_mutex;
-}  // namespace detail
-
-/**
- * Get a result buffer associated with the current benchmark thread.
- *
- * Note, the allocations are reused between the benchmark cases.
- * This reduces the setup overhead and number of times the context is being blocked
- * (this is relevant if there is a persistent kernel running across multiples benchmark cases).
- */
-inline auto get_result_buffer_from_global_pool(size_t size) -> result_buffer&
-{
-  auto stream = get_stream_from_global_pool();
-  auto& rb    = [stream, size]() -> result_buffer& {
-    std::lock_guard guard(detail::grp_mutex);
-    if (static_cast<int>(detail::global_result_buffer_pool.size()) < benchmark_n_threads) {
-      detail::global_result_buffer_pool.resize(benchmark_n_threads);
-    }
-    auto& rb = detail::global_result_buffer_pool[benchmark_thread_id];
-    if (!rb || rb->size() < size) { rb = std::make_unique<result_buffer>(size, stream); }
-    return *rb;
-  }();
-
-  memset(rb.data(MemoryType::Host), 0, size);
-#ifndef BUILD_CPU_ONLY
-  cudaMemsetAsync(rb.data(MemoryType::Device), 0, size, stream);
-  cudaStreamSynchronize(stream);
-#endif
-  return rb;
-}
-
-/**
- * Delete all streams and memory allocations in the global pool.
- * It's called at the end of the `main` function - before global/static variables and cuda context
- * is destroyed - to make sure they are destroyed gracefully and correctly seen by analysis tools
- * such as nsys.
- */
-inline void reset_global_device_resources()
-{
-#ifndef BUILD_CPU_ONLY
-  std::lock_guard guard(detail::gsp_mutex);
-  detail::global_result_buffer_pool.resize(0);
-  detail::global_stream_pool.resize(0);
-#endif
-}
-
-inline auto cuda_info()
-{
-  std::vector<std::tuple<std::string, std::string>> props;
-#ifndef BUILD_CPU_ONLY
-  int dev, driver = 0, runtime = 0;
-  cudaDriverGetVersion(&driver);
-  cudaRuntimeGetVersion(&runtime);
-
-  cudaDeviceProp device_prop;
-  cudaGetDevice(&dev);
-  cudaGetDeviceProperties(&device_prop, dev);
-  props.emplace_back("gpu_name", std::string(device_prop.name));
-  props.emplace_back("gpu_sm_count", std::to_string(device_prop.multiProcessorCount));
-  props.emplace_back("gpu_sm_freq", std::to_string(device_prop.clockRate * 1e3));
-  props.emplace_back("gpu_mem_freq", std::to_string(device_prop.memoryClockRate * 1e3));
-  props.emplace_back("gpu_mem_bus_width", std::to_string(device_prop.memoryBusWidth));
-  props.emplace_back("gpu_mem_global_size", std::to_string(device_prop.totalGlobalMem));
-  props.emplace_back("gpu_mem_shared_size", std::to_string(device_prop.sharedMemPerMultiprocessor));
-  props.emplace_back("gpu_driver_version",
-                     std::to_string(driver / 1000) + "." + std::to_string((driver % 100) / 10));
-  props.emplace_back("gpu_runtime_version",
-                     std::to_string(runtime / 1000) + "." + std::to_string((runtime % 100) / 10));
-#endif
-  return props;
-}
-
-struct nvtx_case {
-#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
- private:
-  std::string case_name_;
-  std::array<char, 32> iter_name_{0};
-  nvtxDomainHandle_t domain_;
-  int64_t iteration_ = 0;
-  nvtxEventAttributes_t case_attrib_{0};
-  nvtxEventAttributes_t iter_attrib_{0};
-#endif
-
- public:
-  struct nvtx_lap {
-#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
-   private:
-    nvtxDomainHandle_t domain_;
-
-   public:
-    nvtx_lap(nvtxDomainHandle_t domain, nvtxEventAttributes_t* attr) : domain_(domain)
-    {
-      nvtxDomainRangePushEx(domain_, attr);
-    }
-    nvtx_lap() = delete;
-    ~nvtx_lap() noexcept { nvtxDomainRangePop(domain_); }
-#endif
-  };
-
-#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
-  explicit nvtx_case(std::string case_name)
-    : case_name_(std::move(case_name)), domain_(nvtxDomainCreateA("ANN benchmark"))
-  {
-    case_attrib_.version       = NVTX_VERSION;
-    iter_attrib_.version       = NVTX_VERSION;
-    case_attrib_.size          = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
-    iter_attrib_.size          = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
-    case_attrib_.colorType     = NVTX_COLOR_ARGB;
-    iter_attrib_.colorType     = NVTX_COLOR_ARGB;
-    case_attrib_.messageType   = NVTX_MESSAGE_TYPE_ASCII;
-    iter_attrib_.messageType   = NVTX_MESSAGE_TYPE_ASCII;
-    case_attrib_.message.ascii = case_name_.c_str();
-    auto c                     = std::hash<std::string>{}(case_name_);
-    case_attrib_.color         = c | 0xA0A0A0;
-    nvtxDomainRangePushEx(domain_, &case_attrib_);
-  }
-
-  ~nvtx_case()
-  {
-    nvtxDomainRangePop(domain_);
-    nvtxDomainDestroy(domain_);
-  }
-#else
-  explicit nvtx_case(std::string) {}
-#endif
-
-  [[nodiscard]] auto lap() -> nvtx_case::nvtx_lap
-  {
-#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
-    auto i     = iteration_++;
-    uint32_t c = (i % 5);
-    uint32_t r = 150 + c * 20;
-    uint32_t g = 200 + c * 10;
-    uint32_t b = 220 + c * 5;
-    std::snprintf(iter_name_.data(), iter_name_.size(), "Lap %zd", i);
-    iter_attrib_.message.ascii = iter_name_.data();
-    iter_attrib_.color         = (r << 16) + (g << 8) + b;
-    return nvtx_lap{domain_, &iter_attrib_};
-#else
-    return nvtx_lap{};
-#endif
-  }
-};
-
-/**
- * A progress tracker that allows syncing threads multiple times and resets the global
- * progress once the threads are done.
- */
-struct progress_barrier {
-  progress_barrier() = default;
-  ~progress_barrier() noexcept
-  {
-    {
-      // Lock makes sure the notified threads see the updates to `done_`.
-      std::unique_lock lk(mutex_);
-      done_.store(true, std::memory_order_relaxed);
-      cv_.notify_all();
-    }
-    // This is the only place where the order of the updates to thread_progress_ and done_ is
-    // important. They are not guarded by the mutex, and `done_` must not be reset to `true` by
-    // other threads after the `total_progress_` is zero.
-    // Hence the default memory order (std::memory_order_seq_cst).
-    auto rem = total_progress_.fetch_sub(thread_progress_);
-    if (rem == thread_progress_) {
-      // the last thread to exit clears the progress state.
-      done_.store(false);
-    }
-  }
-
-  /**
-   * Advance the progress counter by `n` and return the previous `progress` value.
-   *
-   * This can be used to track which thread arrives on the call site first.
-   *
-   * @return the previous progress counter value (before incrementing it by `n`).
-   */
-  auto arrive(int n)
-  {
-    thread_progress_ += n;
-    // Lock makes sure the notified threads see the updates to `total_progress_`.
-    std::unique_lock lk(mutex_);
-    auto prev = total_progress_.fetch_add(n, std::memory_order_relaxed);
-    cv_.notify_all();
-    return prev;
-  }
-
-  /**
-   * Wait till the progress counter reaches `n` or finishes abnormally.
-   *
-   * @return the latest observed value of the progress counter.
-   */
-  auto wait(int limit)
-  {
-    int cur = total_progress_.load(std::memory_order_relaxed);
-    if (cur >= limit) { return cur; }
-    auto done = done_.load(std::memory_order_relaxed);
-    if (done) { return cur; }
-    std::unique_lock lk(mutex_);
-    while (cur < limit && !done) {
-      using namespace std::chrono_literals;
-      cv_.wait_for(lk, 10ms);
-      cur  = total_progress_.load(std::memory_order_relaxed);
-      done = done_.load(std::memory_order_relaxed);
-    }
-    return cur;
-  }
-
- private:
-  static inline std::atomic<int> total_progress_;
-  static inline std::atomic<bool> done_;
-  static inline std::mutex mutex_;
-  static inline std::condition_variable cv_;
-  int thread_progress_{0};
-};
-
-inline std::vector<std::string> split(const std::string& s, char delimiter)
-{
-  std::vector<std::string> tokens;
-  std::string token;
-  std::istringstream iss(s);
-  while (getline(iss, token, delimiter)) {
-    if (!token.empty()) { tokens.push_back(token); }
-  }
-  return tokens;
-}
-
-inline bool file_exists(const std::string& filename)
-{
-  struct stat statbuf;
-  if (stat(filename.c_str(), &statbuf) != 0) { return false; }
-  return S_ISREG(statbuf.st_mode);
-}
-
-inline bool dir_exists(const std::string& dir)
-{
-  struct stat statbuf;
-  if (stat(dir.c_str(), &statbuf) != 0) { return false; }
-  return S_ISDIR(statbuf.st_mode);
-}
-
-inline bool create_dir(const std::string& dir)
-{
-  const auto path = split(dir, '/');
-
-  std::string cwd;
-  if (!dir.empty() && dir[0] == '/') { cwd += '/'; }
-
-  for (const auto& p : path) {
-    cwd += p + "/";
-    if (!dir_exists(cwd)) {
-      int ret = mkdir(cwd.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
-      if (ret != 0) { return false; }
-    }
-  }
-  return true;
-}
-
-inline void make_sure_parent_dir_exists(const std::string& file_path)
-{
-  const auto pos = file_path.rfind('/');
-  if (pos != std::string::npos) {
-    auto dir = file_path.substr(0, pos);
-    if (!dir_exists(dir)) { create_dir(dir); }
-  }
-}
-
-inline auto combine_path(const std::string& dir, const std::string& path)
-{
-  std::filesystem::path p_dir(dir);
-  std::filesystem::path p_suf(path);
-  return (p_dir / p_suf).string();
-}
-
-template <typename... Ts>
-void log_(const char* level, const Ts&... vs)
-{
-  char buf[20];
-  std::time_t now = std::time(nullptr);
-  std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
-  printf("%s [%s] ", buf, level);
-  if constexpr (sizeof...(Ts) == 1) {
-    printf("%s", vs...);
-  } else {
-    printf(vs...);
-  }
-  printf("\n");
-  fflush(stdout);
-}
-
-template <typename... Ts>
-void log_info(Ts&&... vs)
-{
-  log_("info", std::forward<Ts>(vs)...);
-}
-
-template <typename... Ts>
-void log_warn(Ts&&... vs)
-{
-  log_("warn", std::forward<Ts>(vs)...);
-}
-
-template <typename... Ts>
-void log_error(Ts&&... vs)
-{
-  log_("error", std::forward<Ts>(vs)...);
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
deleted file mode 100644
index 234b33d80a..0000000000
--- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../common/ann_types.hpp"
-#include "faiss_cpu_wrapper.h"
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace raft::bench::ann {
-
-template <typename T>
-void parse_base_build_param(const nlohmann::json& conf,
-                            typename raft::bench::ann::FaissCpu<T>::BuildParam& param)
-{
-  param.nlist = conf.at("nlist");
-  if (conf.contains("ratio")) { param.ratio = conf.at("ratio"); }
-}
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::FaissCpuIVFFlat<T>::BuildParam& param)
-{
-  parse_base_build_param<T>(conf, param);
-}
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::FaissCpuIVFPQ<T>::BuildParam& param)
-{
-  parse_base_build_param<T>(conf, param);
-  param.M = conf.at("M");
-  if (conf.contains("use_precomputed_table")) {
-    param.use_precomputed_table = conf.at("use_precomputed_table");
-  } else {
-    param.use_precomputed_table = false;
-  }
-  if (conf.contains("bitsPerCode")) {
-    param.bitsPerCode = conf.at("bitsPerCode");
-  } else {
-    param.bitsPerCode = 8;
-  }
-}
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::FaissCpuIVFSQ<T>::BuildParam& param)
-{
-  parse_base_build_param<T>(conf, param);
-  param.quantizer_type = conf.at("quantizer_type");
-}
-
-template <typename T>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::FaissCpu<T>::SearchParam& param)
-{
-  param.nprobe = conf.at("nprobe");
-  if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); }
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf,
-                                                    const std::vector<int>& dev_list)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-
-  (void)dev_list;
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T>
-std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
-                                                      const std::string& distance,
-                                                      int dim,
-                                                      const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list)
-{
-  // stop compiler warning; not all algorithms support multi-GPU so it may not be used
-  (void)dev_list;
-
-  std::unique_ptr<raft::bench::ann::ANN<T>> ann;
-
-  if constexpr (std::is_same_v<T, float>) {
-    raft::bench::ann::Metric metric = parse_metric(distance);
-    if (algo == "faiss_cpu_ivf_flat") {
-      ann = make_algo<T, raft::bench::ann::FaissCpuIVFFlat>(metric, dim, conf, dev_list);
-    } else if (algo == "faiss_cpu_ivf_pq") {
-      ann = make_algo<T, raft::bench::ann::FaissCpuIVFPQ>(metric, dim, conf);
-    } else if (algo == "faiss_cpu_ivf_sq") {
-      ann = make_algo<T, raft::bench::ann::FaissCpuIVFSQ>(metric, dim, conf);
-    } else if (algo == "faiss_cpu_flat") {
-      ann = std::make_unique<raft::bench::ann::FaissCpuFlat<T>>(metric, dim);
-    }
-  }
-
-  if constexpr (std::is_same_v<T, uint8_t>) {}
-
-  if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-
-  return ann;
-}
-
-template <typename T>
-std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
-  const std::string& algo, const nlohmann::json& conf)
-{
-  if (algo == "faiss_cpu_ivf_flat" || algo == "faiss_cpu_ivf_pq" || algo == "faiss_cpu_ivf_sq") {
-    auto param = std::make_unique<typename raft::bench::ann::FaissCpu<T>::SearchParam>();
-    parse_search_param<T>(conf, *param);
-    return param;
-  } else if (algo == "faiss_cpu_flat") {
-    auto param = std::make_unique<typename raft::bench::ann::FaissCpu<T>::SearchParam>();
-    return param;
-  }
-  // else
-  throw std::runtime_error("invalid algo: '" + algo + "'");
-}
-
-}  // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#ifdef ANN_BENCH_BUILD_MAIN
-#include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
-#endif
diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
deleted file mode 100644
index c7ce4595b5..0000000000
--- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../common/ann_types.hpp"
-#include "../common/thread_pool.hpp"
-
-#include <raft/core/logger.hpp>
-
-#include <faiss/IndexFlat.h>
-#include <faiss/IndexIVFFlat.h>
-#include <faiss/IndexIVFPQ.h>
-#include <faiss/IndexRefine.h>
-#include <faiss/IndexScalarQuantizer.h>
-#include <faiss/index_io.h>
-
-#include <cassert>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-
-namespace {
-
-faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric)
-{
-  if (metric == raft::bench::ann::Metric::kInnerProduct) {
-    return faiss::METRIC_INNER_PRODUCT;
-  } else if (metric == raft::bench::ann::Metric::kEuclidean) {
-    return faiss::METRIC_L2;
-  } else {
-    throw std::runtime_error("faiss supports only metric type of inner product and L2");
-  }
-}
-}  // namespace
-
-namespace raft::bench::ann {
-
-template <typename T>
-class FaissCpu : public ANN<T> {
- public:
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    int nprobe;
-    float refine_ratio = 1.0;
-    int num_threads    = omp_get_num_procs();
-  };
-
-  struct BuildParam {
-    int nlist = 1;
-    int ratio = 2;
-  };
-
-  FaissCpu(Metric metric, int dim, const BuildParam& param)
-    : ANN<T>(metric, dim),
-      metric_type_(parse_metric_type(metric)),
-      nlist_{param.nlist},
-      training_sample_fraction_{1.0 / double(param.ratio)}
-  {
-    static_assert(std::is_same_v<T, float>, "faiss support only float type");
-  }
-
-  void build(const T* dataset, size_t nrow) final;
-
-  void set_search_param(const AnnSearchParam& param) override;
-
-  void init_quantizer(int dim)
-  {
-    if (this->metric_type_ == faiss::MetricType::METRIC_L2) {
-      this->quantizer_ = std::make_shared<faiss::IndexFlatL2>(dim);
-    } else if (this->metric_type_ == faiss::MetricType::METRIC_INNER_PRODUCT) {
-      this->quantizer_ = std::make_shared<faiss::IndexFlatIP>(dim);
-    }
-  }
-
-  // TODO: if the number of results is less than k, the remaining elements of 'neighbors'
-  // will be filled with (size_t)-1
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const final;
-
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    // to enable building big dataset which is larger than  memory
-    property.dataset_memory_type = MemoryType::Host;
-    property.query_memory_type   = MemoryType::Host;
-    return property;
-  }
-
- protected:
-  template <typename Index>
-  void save_(const std::string& file) const;
-
-  template <typename Index>
-  void load_(const std::string& file);
-
-  std::shared_ptr<faiss::Index> index_;
-  std::shared_ptr<faiss::Index> quantizer_;
-  std::shared_ptr<faiss::IndexRefineFlat> index_refine_;
-  faiss::MetricType metric_type_;
-  int nlist_;
-  double training_sample_fraction_;
-
-  int num_threads_;
-  std::shared_ptr<FixedThreadPool> thread_pool_;
-};
-
-template <typename T>
-void FaissCpu<T>::build(const T* dataset, size_t nrow)
-{
-  auto index_ivf = dynamic_cast<faiss::IndexIVF*>(index_.get());
-  if (index_ivf != nullptr) {
-    // set the min/max training size for clustering to use the whole provided training set.
-    double trainset_size       = training_sample_fraction_ * static_cast<double>(nrow);
-    double points_per_centroid = trainset_size / static_cast<double>(nlist_);
-    int max_ppc                = std::ceil(points_per_centroid);
-    int min_ppc                = std::floor(points_per_centroid);
-    if (min_ppc < index_ivf->cp.min_points_per_centroid) {
-      RAFT_LOG_WARN(
-        "The suggested training set size %zu (data size %zu, training sample ratio %f) yields %d "
-        "points per cluster (n_lists = %d). This is smaller than the FAISS default "
-        "min_points_per_centroid = %d.",
-        static_cast<size_t>(trainset_size),
-        nrow,
-        training_sample_fraction_,
-        min_ppc,
-        nlist_,
-        index_ivf->cp.min_points_per_centroid);
-    }
-    index_ivf->cp.max_points_per_centroid = max_ppc;
-    index_ivf->cp.min_points_per_centroid = min_ppc;
-  }
-  index_->train(nrow, dataset);  // faiss::IndexFlat::train() will do nothing
-  assert(index_->is_trained);
-  index_->add(nrow, dataset);
-  index_refine_ = std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), dataset);
-}
-
-template <typename T>
-void FaissCpu<T>::set_search_param(const AnnSearchParam& param)
-{
-  auto search_param = dynamic_cast<const SearchParam&>(param);
-  int nprobe        = search_param.nprobe;
-  assert(nprobe <= nlist_);
-  dynamic_cast<faiss::IndexIVF*>(index_.get())->nprobe = nprobe;
-
-  if (search_param.refine_ratio > 1.0) {
-    this->index_refine_.get()->k_factor = search_param.refine_ratio;
-  }
-
-  if (!thread_pool_ || num_threads_ != search_param.num_threads) {
-    num_threads_ = search_param.num_threads;
-    thread_pool_ = std::make_shared<FixedThreadPool>(num_threads_);
-  }
-}
-
-template <typename T>
-void FaissCpu<T>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
-                "sizes of size_t and faiss::idx_t are different");
-
-  thread_pool_->submit(
-    [&](int i) {
-      // Use thread pool for batch size = 1. FAISS multi-threads internally for batch size > 1.
-      index_->search(batch_size, queries, k, distances, reinterpret_cast<faiss::idx_t*>(neighbors));
-    },
-    1);
-}
-
-template <typename T>
-template <typename Index>
-void FaissCpu<T>::save_(const std::string& file) const
-{
-  faiss::write_index(index_.get(), file.c_str());
-}
-
-template <typename T>
-template <typename Index>
-void FaissCpu<T>::load_(const std::string& file)
-{
-  index_ = std::shared_ptr<Index>(dynamic_cast<Index*>(faiss::read_index(file.c_str())));
-}
-
-template <typename T>
-class FaissCpuIVFFlat : public FaissCpu<T> {
- public:
-  using typename FaissCpu<T>::BuildParam;
-
-  FaissCpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissCpu<T>(metric, dim, param)
-  {
-    this->init_quantizer(dim);
-    this->index_ = std::make_shared<faiss::IndexIVFFlat>(
-      this->quantizer_.get(), dim, param.nlist, this->metric_type_);
-  }
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::IndexIVFFlat>(file);
-  }
-  void load(const std::string& file) override { this->template load_<faiss::IndexIVFFlat>(file); }
-
-  std::unique_ptr<ANN<T>> copy()
-  {
-    return std::make_unique<FaissCpuIVFFlat<T>>(*this);  // use copy constructor
-  }
-};
-
-template <typename T>
-class FaissCpuIVFPQ : public FaissCpu<T> {
- public:
-  struct BuildParam : public FaissCpu<T>::BuildParam {
-    int M;
-    int bitsPerCode;
-    bool use_precomputed_table;
-  };
-
-  FaissCpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissCpu<T>(metric, dim, param)
-  {
-    this->init_quantizer(dim);
-    this->index_ = std::make_shared<faiss::IndexIVFPQ>(
-      this->quantizer_.get(), dim, param.nlist, param.M, param.bitsPerCode, this->metric_type_);
-  }
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::IndexIVFPQ>(file);
-  }
-  void load(const std::string& file) override { this->template load_<faiss::IndexIVFPQ>(file); }
-
-  std::unique_ptr<ANN<T>> copy()
-  {
-    return std::make_unique<FaissCpuIVFPQ<T>>(*this);  // use copy constructor
-  }
-};
-
-// TODO: Enable this in cmake
-//  ref: https://github.com/rapidsai/raft/issues/1876
-template <typename T>
-class FaissCpuIVFSQ : public FaissCpu<T> {
- public:
-  struct BuildParam : public FaissCpu<T>::BuildParam {
-    std::string quantizer_type;
-  };
-
-  FaissCpuIVFSQ(Metric metric, int dim, const BuildParam& param) : FaissCpu<T>(metric, dim, param)
-  {
-    faiss::ScalarQuantizer::QuantizerType qtype;
-    if (param.quantizer_type == "fp16") {
-      qtype = faiss::ScalarQuantizer::QT_fp16;
-    } else if (param.quantizer_type == "int8") {
-      qtype = faiss::ScalarQuantizer::QT_8bit;
-    } else {
-      throw std::runtime_error("FaissCpuIVFSQ supports only fp16 and int8 but got " +
-                               param.quantizer_type);
-    }
-
-    this->init_quantizer(dim);
-    this->index_ = std::make_shared<faiss::IndexIVFScalarQuantizer>(
-      this->quantizer_.get(), dim, param.nlist, qtype, this->metric_type_, true);
-  }
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::IndexIVFScalarQuantizer>(file);
-  }
-  void load(const std::string& file) override
-  {
-    this->template load_<faiss::IndexIVFScalarQuantizer>(file);
-  }
-
-  std::unique_ptr<ANN<T>> copy()
-  {
-    return std::make_unique<FaissCpuIVFSQ<T>>(*this);  // use copy constructor
-  }
-};
-
-template <typename T>
-class FaissCpuFlat : public FaissCpu<T> {
- public:
-  FaissCpuFlat(Metric metric, int dim)
-    : FaissCpu<T>(metric, dim, typename FaissCpu<T>::BuildParam{})
-  {
-    this->index_ = std::make_shared<faiss::IndexFlat>(dim, this->metric_type_);
-  }
-
-  // class FaissCpu is more like a IVF class, so need special treating here
-  void set_search_param(const typename ANN<T>::AnnSearchParam& param) override
-  {
-    auto search_param = dynamic_cast<const typename FaissCpu<T>::SearchParam&>(param);
-    if (!this->thread_pool_ || this->num_threads_ != search_param.num_threads) {
-      this->num_threads_ = search_param.num_threads;
-      this->thread_pool_ = std::make_shared<FixedThreadPool>(this->num_threads_);
-    }
-  };
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::IndexFlat>(file);
-  }
-  void load(const std::string& file) override { this->template load_<faiss::IndexFlat>(file); }
-
-  std::unique_ptr<ANN<T>> copy()
-  {
-    return std::make_unique<FaissCpuFlat<T>>(*this);  // use copy constructor
-  }
-};
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
deleted file mode 100644
index b47c497e3d..0000000000
--- a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../common/ann_types.hpp"
-
-#undef WARP_SIZE
-#include "faiss_gpu_wrapper.h"
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace raft::bench::ann {
-
-template <typename T>
-void parse_base_build_param(const nlohmann::json& conf,
-                            typename raft::bench::ann::FaissGpu<T>::BuildParam& param)
-{
-  param.nlist = conf.at("nlist");
-  if (conf.contains("ratio")) { param.ratio = conf.at("ratio"); }
-}
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::FaissGpuIVFFlat<T>::BuildParam& param)
-{
-  parse_base_build_param<T>(conf, param);
-  if (conf.contains("use_raft")) {
-    param.use_raft = conf.at("use_raft");
-  } else {
-    param.use_raft = false;
-  }
-}
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::FaissGpuIVFPQ<T>::BuildParam& param)
-{
-  parse_base_build_param<T>(conf, param);
-  param.M = conf.at("M");
-  if (conf.contains("usePrecomputed")) {
-    param.usePrecomputed = conf.at("usePrecomputed");
-  } else {
-    param.usePrecomputed = false;
-  }
-  if (conf.contains("useFloat16")) {
-    param.useFloat16 = conf.at("useFloat16");
-  } else {
-    param.useFloat16 = false;
-  }
-  if (conf.contains("use_raft")) {
-    param.use_raft = conf.at("use_raft");
-  } else {
-    param.use_raft = false;
-  }
-  if (conf.contains("bitsPerCode")) {
-    param.bitsPerCode = conf.at("bitsPerCode");
-  } else {
-    param.bitsPerCode = 8;
-  }
-}
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::FaissGpuIVFSQ<T>::BuildParam& param)
-{
-  parse_base_build_param<T>(conf, param);
-  param.quantizer_type = conf.at("quantizer_type");
-}
-
-template <typename T>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::FaissGpu<T>::SearchParam& param)
-{
-  param.nprobe = conf.at("nprobe");
-  if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); }
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf,
-                                                    const std::vector<int>& dev_list)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-
-  (void)dev_list;
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T>
-std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
-                                                      const std::string& distance,
-                                                      int dim,
-                                                      const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list)
-{
-  // stop compiler warning; not all algorithms support multi-GPU so it may not be used
-  (void)dev_list;
-
-  std::unique_ptr<raft::bench::ann::ANN<T>> ann;
-
-  if constexpr (std::is_same_v<T, float>) {
-    raft::bench::ann::Metric metric = parse_metric(distance);
-    if (algo == "faiss_gpu_ivf_flat") {
-      ann = make_algo<T, raft::bench::ann::FaissGpuIVFFlat>(metric, dim, conf, dev_list);
-    } else if (algo == "faiss_gpu_ivf_pq") {
-      ann = make_algo<T, raft::bench::ann::FaissGpuIVFPQ>(metric, dim, conf);
-    } else if (algo == "faiss_gpu_ivf_sq") {
-      ann = make_algo<T, raft::bench::ann::FaissGpuIVFSQ>(metric, dim, conf);
-    } else if (algo == "faiss_gpu_flat") {
-      ann = std::make_unique<raft::bench::ann::FaissGpuFlat<T>>(metric, dim);
-    }
-  }
-
-  if constexpr (std::is_same_v<T, uint8_t>) {}
-
-  if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-
-  return ann;
-}
-
-template <typename T>
-std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
-  const std::string& algo, const nlohmann::json& conf)
-{
-  if (algo == "faiss_gpu_ivf_flat" || algo == "faiss_gpu_ivf_pq" || algo == "faiss_gpu_ivf_sq") {
-    auto param = std::make_unique<typename raft::bench::ann::FaissGpu<T>::SearchParam>();
-    parse_search_param<T>(conf, *param);
-    return param;
-  } else if (algo == "faiss_gpu_flat") {
-    auto param = std::make_unique<typename raft::bench::ann::FaissGpu<T>::SearchParam>();
-    return param;
-  }
-  // else
-  throw std::runtime_error("invalid algo: '" + algo + "'");
-}
-
-}  // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#ifdef ANN_BENCH_BUILD_MAIN
-#include "../common/benchmark.hpp"
-int main(int argc, char** argv)
-{
-  rmm::mr::cuda_memory_resource cuda_mr;
-  // Construct a resource that uses a coalescing best-fit pool allocator
-  // and is initially sized to half of free device memory.
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
-    &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  // Updates the current device resource pointer to `pool_mr`
-  auto old_mr = rmm::mr::set_current_device_resource(&pool_mr);
-  auto ret    = raft::bench::ann::run_main(argc, argv);
-  // Restores the current device resource pointer to its previous value
-  rmm::mr::set_current_device_resource(old_mr);
-  return ret;
-}
-#endif
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
deleted file mode 100644
index 6955201c5d..0000000000
--- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
+++ /dev/null
@@ -1,515 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef FAISS_WRAPPER_H_
-#define FAISS_WRAPPER_H_
-
-#include "../common/ann_types.hpp"
-#include "../raft/raft_ann_bench_utils.h"
-
-#include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/core/host_mdarray.hpp>
-#include <raft/core/host_mdspan.hpp>
-#include <raft/core/logger.hpp>
-#include <raft/core/resource/stream_view.hpp>
-#include <raft/distance/distance_types.hpp>
-#include <raft/util/cudart_utils.hpp>
-
-#include <raft_runtime/neighbors/refine.hpp>
-
-#include <rmm/cuda_device.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <faiss/IndexFlat.h>
-#include <faiss/IndexIVFFlat.h>
-#include <faiss/IndexIVFPQ.h>
-#include <faiss/IndexRefine.h>
-#include <faiss/IndexScalarQuantizer.h>
-#include <faiss/MetricType.h>
-#include <faiss/gpu/GpuIndexFlat.h>
-#include <faiss/gpu/GpuIndexIVFFlat.h>
-#include <faiss/gpu/GpuIndexIVFPQ.h>
-#include <faiss/gpu/GpuIndexIVFScalarQuantizer.h>
-#include <faiss/gpu/StandardGpuResources.h>
-#include <faiss/impl/ScalarQuantizer.h>
-#include <faiss/index_io.h>
-#include <omp.h>
-
-#include <cassert>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-
-namespace {
-
-faiss::MetricType parse_metric_faiss(raft::bench::ann::Metric metric)
-{
-  if (metric == raft::bench::ann::Metric::kInnerProduct) {
-    return faiss::METRIC_INNER_PRODUCT;
-  } else if (metric == raft::bench::ann::Metric::kEuclidean) {
-    return faiss::METRIC_L2;
-  } else {
-    throw std::runtime_error("faiss supports only metric type of inner product and L2");
-  }
-}
-
-// note BLAS library can still use multi-threading, and
-// setting environment variable like OPENBLAS_NUM_THREADS can control it
-class OmpSingleThreadScope {
- public:
-  OmpSingleThreadScope()
-  {
-    max_threads_ = omp_get_max_threads();
-    omp_set_num_threads(1);
-  }
-  ~OmpSingleThreadScope()
-  {
-    // the best we can do
-    omp_set_num_threads(max_threads_);
-  }
-
- private:
-  int max_threads_;
-};
-
-}  // namespace
-
-namespace raft::bench::ann {
-
-template <typename T>
-class FaissGpu : public ANN<T>, public AnnGPU {
- public:
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    int nprobe;
-    float refine_ratio = 1.0;
-    auto needs_dataset() const -> bool override { return refine_ratio > 1.0f; }
-  };
-
-  struct BuildParam {
-    int nlist = 1;
-    int ratio = 2;
-  };
-
-  FaissGpu(Metric metric, int dim, const BuildParam& param)
-    : ANN<T>(metric, dim),
-      gpu_resource_{std::make_shared<faiss::gpu::StandardGpuResources>()},
-      metric_type_(parse_metric_faiss(metric)),
-      nlist_{param.nlist},
-      training_sample_fraction_{1.0 / double(param.ratio)}
-  {
-    static_assert(std::is_same_v<T, float>, "faiss support only float type");
-    RAFT_CUDA_TRY(cudaGetDevice(&device_));
-  }
-
-  void build(const T* dataset, size_t nrow) final;
-
-  virtual void set_search_param(const FaissGpu<T>::AnnSearchParam& param) {}
-
-  void set_search_dataset(const T* dataset, size_t nrow) override { dataset_ = dataset; }
-
-  // TODO: if the number of results is less than k, the remaining elements of 'neighbors'
-  // will be filled with (size_t)-1
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const final;
-
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
-  {
-    return gpu_resource_->getDefaultStream(device_);
-  }
-
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    // to enable building big dataset which is larger than GPU memory
-    property.dataset_memory_type = MemoryType::Host;
-    property.query_memory_type   = MemoryType::Device;
-    return property;
-  }
-
- protected:
-  template <typename GpuIndex, typename CpuIndex>
-  void save_(const std::string& file) const;
-
-  template <typename GpuIndex, typename CpuIndex>
-  void load_(const std::string& file);
-
-  /** [NOTE Multithreading]
-   *
-   * `gpu_resource_` is a shared resource:
-   *   1. It uses a shared_ptr under the hood, so the copies of it refer to the same
-   *      resource implementation instance
-   *   2. GpuIndex is probably keeping a reference to it, as it's passed to the constructor
-   *
-   * To avoid copying the index (database) in each thread, we make both the index and
-   * the gpu_resource shared.
-   * This means faiss GPU streams are possibly shared among the CPU threads;
-   * the throughput search mode may be inaccurate.
-   *
-   * WARNING: we haven't investigated whether faiss::gpu::GpuIndex or
-   * faiss::gpu::StandardGpuResources are thread-safe.
-   *
-   */
-  mutable std::shared_ptr<faiss::gpu::StandardGpuResources> gpu_resource_;
-  std::shared_ptr<faiss::gpu::GpuIndex> index_;
-  std::shared_ptr<faiss::IndexRefineFlat> index_refine_{nullptr};
-  faiss::MetricType metric_type_;
-  int nlist_;
-  int device_;
-  double training_sample_fraction_;
-  std::shared_ptr<faiss::SearchParameters> search_params_;
-  std::shared_ptr<faiss::IndexRefineSearchParameters> refine_search_params_{nullptr};
-  const T* dataset_;
-  float refine_ratio_ = 1.0;
-  Objective metric_objective_;
-};
-
-template <typename T>
-void FaissGpu<T>::build(const T* dataset, size_t nrow)
-{
-  OmpSingleThreadScope omp_single_thread;
-  auto index_ivf = dynamic_cast<faiss::gpu::GpuIndexIVF*>(index_.get());
-  if (index_ivf != nullptr) {
-    // set the min/max training size for clustering to use the whole provided training set.
-    double trainset_size       = training_sample_fraction_ * static_cast<double>(nrow);
-    double points_per_centroid = trainset_size / static_cast<double>(nlist_);
-    int max_ppc                = std::ceil(points_per_centroid);
-    int min_ppc                = std::floor(points_per_centroid);
-    if (min_ppc < index_ivf->cp.min_points_per_centroid) {
-      RAFT_LOG_WARN(
-        "The suggested training set size %zu (data size %zu, training sample ratio %f) yields %d "
-        "points per cluster (n_lists = %d). This is smaller than the FAISS default "
-        "min_points_per_centroid = %d.",
-        static_cast<size_t>(trainset_size),
-        nrow,
-        training_sample_fraction_,
-        min_ppc,
-        nlist_,
-        index_ivf->cp.min_points_per_centroid);
-    }
-    index_ivf->cp.max_points_per_centroid = max_ppc;
-    index_ivf->cp.min_points_per_centroid = min_ppc;
-  }
-  index_->train(nrow, dataset);  // faiss::gpu::GpuIndexFlat::train() will do nothing
-  assert(index_->is_trained);
-  index_->add(nrow, dataset);
-}
-
-template <typename T>
-void FaissGpu<T>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  ASSERT(Objective::LATENCY, "l2Knn: rowMajorIndex and rowMajorQuery should have same layout");
-  using IdxT = faiss::idx_t;
-  static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
-                "sizes of size_t and faiss::idx_t are different");
-
-  if (refine_ratio_ > 1.0) {
-    if (raft::get_device_for_address(queries) >= 0) {
-      uint32_t k0        = static_cast<uint32_t>(refine_ratio_ * k);
-      auto distances_tmp = raft::make_device_matrix<float, IdxT>(
-        gpu_resource_->getRaftHandle(device_), batch_size, k0);
-      auto candidates =
-        raft::make_device_matrix<IdxT, IdxT>(gpu_resource_->getRaftHandle(device_), batch_size, k0);
-      index_->search(batch_size,
-                     queries,
-                     k0,
-                     distances_tmp.data_handle(),
-                     candidates.data_handle(),
-                     this->search_params_.get());
-
-      auto queries_host    = raft::make_host_matrix<T, IdxT>(batch_size, index_->d);
-      auto candidates_host = raft::make_host_matrix<IdxT, IdxT>(batch_size, k0);
-      auto neighbors_host  = raft::make_host_matrix<IdxT, IdxT>(batch_size, k);
-      auto distances_host  = raft::make_host_matrix<float, IdxT>(batch_size, k);
-      auto dataset_v       = raft::make_host_matrix_view<const T, faiss::idx_t>(
-        this->dataset_, index_->ntotal, index_->d);
-
-      raft::device_resources handle_ = gpu_resource_->getRaftHandle(device_);
-
-      raft::copy(queries_host.data_handle(), queries, queries_host.size(), handle_.get_stream());
-      raft::copy(candidates_host.data_handle(),
-                 candidates.data_handle(),
-                 candidates_host.size(),
-                 handle_.get_stream());
-
-      // wait for the queries to copy to host in 'stream`
-      handle_.sync_stream();
-
-      raft::runtime::neighbors::refine(handle_,
-                                       dataset_v,
-                                       queries_host.view(),
-                                       candidates_host.view(),
-                                       neighbors_host.view(),
-                                       distances_host.view(),
-                                       parse_metric_type(this->metric_));
-
-      raft::copy(neighbors,
-                 (size_t*)neighbors_host.data_handle(),
-                 neighbors_host.size(),
-                 handle_.get_stream());
-      raft::copy(
-        distances, distances_host.data_handle(), distances_host.size(), handle_.get_stream());
-    } else {
-      index_refine_->search(batch_size,
-                            queries,
-                            k,
-                            distances,
-                            reinterpret_cast<faiss::idx_t*>(neighbors),
-                            this->refine_search_params_.get());
-    }
-  } else {
-    index_->search(batch_size,
-                   queries,
-                   k,
-                   distances,
-                   reinterpret_cast<faiss::idx_t*>(neighbors),
-                   this->search_params_.get());
-  }
-}
-
-template <typename T>
-template <typename GpuIndex, typename CpuIndex>
-void FaissGpu<T>::save_(const std::string& file) const
-{
-  OmpSingleThreadScope omp_single_thread;
-
-  auto cpu_index = std::make_unique<CpuIndex>();
-  dynamic_cast<GpuIndex*>(index_.get())->copyTo(cpu_index.get());
-  faiss::write_index(cpu_index.get(), file.c_str());
-}
-
-template <typename T>
-template <typename GpuIndex, typename CpuIndex>
-void FaissGpu<T>::load_(const std::string& file)
-{
-  OmpSingleThreadScope omp_single_thread;
-
-  std::unique_ptr<CpuIndex> cpu_index(dynamic_cast<CpuIndex*>(faiss::read_index(file.c_str())));
-  assert(cpu_index);
-
-  try {
-    dynamic_cast<GpuIndex*>(index_.get())->copyFrom(cpu_index.get());
-
-  } catch (const std::exception& e) {
-    std::cout << "Error loading index file: " << std::string(e.what()) << std::endl;
-  }
-}
-
-template <typename T>
-class FaissGpuIVFFlat : public FaissGpu<T> {
- public:
-  struct BuildParam : public FaissGpu<T>::BuildParam {
-    bool use_raft;
-  };
-
-  FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
-  {
-    faiss::gpu::GpuIndexIVFFlatConfig config;
-    config.device   = this->device_;
-    config.use_raft = param.use_raft;
-    this->index_    = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(
-      this->gpu_resource_.get(), dim, param.nlist, this->metric_type_, config);
-  }
-
-  void set_search_param(const typename FaissGpu<T>::AnnSearchParam& param) override
-  {
-    auto search_param = dynamic_cast<const typename FaissGpu<T>::SearchParam&>(param);
-    int nprobe        = search_param.nprobe;
-    assert(nprobe <= nlist_);
-
-    faiss::IVFSearchParameters faiss_search_params;
-    faiss_search_params.nprobe = nprobe;
-    this->search_params_       = std::make_shared<faiss::IVFSearchParameters>(faiss_search_params);
-    this->refine_ratio_        = search_param.refine_ratio;
-  }
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::gpu::GpuIndexIVFFlat, faiss::IndexIVFFlat>(file);
-  }
-  void load(const std::string& file) override
-  {
-    this->template load_<faiss::gpu::GpuIndexIVFFlat, faiss::IndexIVFFlat>(file);
-  }
-  std::unique_ptr<ANN<T>> copy() override { return std::make_unique<FaissGpuIVFFlat<T>>(*this); };
-};
-
-template <typename T>
-class FaissGpuIVFPQ : public FaissGpu<T> {
- public:
-  struct BuildParam : public FaissGpu<T>::BuildParam {
-    int M;
-    bool useFloat16;
-    bool usePrecomputed;
-    bool use_raft;
-    int bitsPerCode;
-  };
-
-  FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
-  {
-    faiss::gpu::GpuIndexIVFPQConfig config;
-    config.useFloat16LookupTables = param.useFloat16;
-    config.usePrecomputedTables   = param.usePrecomputed;
-    config.use_raft               = param.use_raft;
-    config.interleavedLayout      = param.use_raft;
-    config.device                 = this->device_;
-
-    this->index_ = std::make_shared<faiss::gpu::GpuIndexIVFPQ>(this->gpu_resource_.get(),
-                                                               dim,
-                                                               param.nlist,
-                                                               param.M,
-                                                               param.bitsPerCode,
-                                                               this->metric_type_,
-                                                               config);
-  }
-
-  void set_search_param(const typename FaissGpu<T>::AnnSearchParam& param) override
-  {
-    auto search_param = dynamic_cast<const typename FaissGpu<T>::SearchParam&>(param);
-    int nprobe        = search_param.nprobe;
-    assert(nprobe <= nlist_);
-    this->refine_ratio_ = search_param.refine_ratio;
-    faiss::IVFPQSearchParameters faiss_search_params;
-    faiss_search_params.nprobe = nprobe;
-
-    this->search_params_ = std::make_shared<faiss::IVFPQSearchParameters>(faiss_search_params);
-
-    if (search_param.refine_ratio > 1.0) {
-      this->index_refine_ =
-        std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
-      this->index_refine_.get()->k_factor = search_param.refine_ratio;
-      faiss::IndexRefineSearchParameters faiss_refine_search_params;
-      faiss_refine_search_params.k_factor          = this->index_refine_.get()->k_factor;
-      faiss_refine_search_params.base_index_params = this->search_params_.get();
-      this->refine_search_params_ =
-        std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
-    }
-  }
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::gpu::GpuIndexIVFPQ, faiss::IndexIVFPQ>(file);
-  }
-  void load(const std::string& file) override
-  {
-    this->template load_<faiss::gpu::GpuIndexIVFPQ, faiss::IndexIVFPQ>(file);
-  }
-  std::unique_ptr<ANN<T>> copy() override { return std::make_unique<FaissGpuIVFPQ<T>>(*this); };
-};
-
-// TODO: Enable this in cmake
-//  ref: https://github.com/rapidsai/raft/issues/1876
-template <typename T>
-class FaissGpuIVFSQ : public FaissGpu<T> {
- public:
-  struct BuildParam : public FaissGpu<T>::BuildParam {
-    std::string quantizer_type;
-  };
-
-  FaissGpuIVFSQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
-  {
-    faiss::ScalarQuantizer::QuantizerType qtype;
-    if (param.quantizer_type == "fp16") {
-      qtype = faiss::ScalarQuantizer::QT_fp16;
-    } else if (param.quantizer_type == "int8") {
-      qtype = faiss::ScalarQuantizer::QT_8bit;
-    } else {
-      throw std::runtime_error("FaissGpuIVFSQ supports only fp16 and int8 but got " +
-                               param.quantizer_type);
-    }
-
-    faiss::gpu::GpuIndexIVFScalarQuantizerConfig config;
-    config.device = this->device_;
-    this->index_  = std::make_shared<faiss::gpu::GpuIndexIVFScalarQuantizer>(
-      this->gpu_resource_.get(), dim, param.nlist, qtype, this->metric_type_, true, config);
-  }
-
-  void set_search_param(const typename FaissGpu<T>::AnnSearchParam& param) override
-  {
-    auto search_param = dynamic_cast<const typename FaissGpu<T>::SearchParam&>(param);
-    int nprobe        = search_param.nprobe;
-    assert(nprobe <= nlist_);
-
-    faiss::IVFSearchParameters faiss_search_params;
-    faiss_search_params.nprobe = nprobe;
-
-    this->search_params_ = std::make_shared<faiss::IVFSearchParameters>(faiss_search_params);
-    this->refine_ratio_  = search_param.refine_ratio;
-    if (search_param.refine_ratio > 1.0) {
-      this->index_refine_ =
-        std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
-      this->index_refine_.get()->k_factor = search_param.refine_ratio;
-      faiss::IndexRefineSearchParameters faiss_refine_search_params;
-      faiss_refine_search_params.k_factor          = this->index_refine_.get()->k_factor;
-      faiss_refine_search_params.base_index_params = this->search_params_.get();
-      this->refine_search_params_ =
-        std::make_unique<faiss::IndexRefineSearchParameters>(faiss_refine_search_params);
-    }
-  }
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::gpu::GpuIndexIVFScalarQuantizer, faiss::IndexIVFScalarQuantizer>(
-      file);
-  }
-  void load(const std::string& file) override
-  {
-    this->template load_<faiss::gpu::GpuIndexIVFScalarQuantizer, faiss::IndexIVFScalarQuantizer>(
-      file);
-  }
-  std::unique_ptr<ANN<T>> copy() override { return std::make_unique<FaissGpuIVFSQ<T>>(*this); };
-};
-
-template <typename T>
-class FaissGpuFlat : public FaissGpu<T> {
- public:
-  FaissGpuFlat(Metric metric, int dim)
-    : FaissGpu<T>(metric, dim, typename FaissGpu<T>::BuildParam{})
-  {
-    faiss::gpu::GpuIndexFlatConfig config;
-    config.device = this->device_;
-    this->index_  = std::make_shared<faiss::gpu::GpuIndexFlat>(
-      this->gpu_resource_.get(), dim, this->metric_type_, config);
-  }
-  void set_search_param(const typename FaissGpu<T>::AnnSearchParam& param) override
-  {
-    auto search_param = dynamic_cast<const typename FaissGpu<T>::SearchParam&>(param);
-    int nprobe        = search_param.nprobe;
-    assert(nprobe <= nlist_);
-
-    this->search_params_ = std::make_shared<faiss::SearchParameters>();
-  }
-
-  void save(const std::string& file) const override
-  {
-    this->template save_<faiss::gpu::GpuIndexFlat, faiss::IndexFlat>(file);
-  }
-  void load(const std::string& file) override
-  {
-    this->template load_<faiss::gpu::GpuIndexFlat, faiss::IndexFlat>(file);
-  }
-  std::unique_ptr<ANN<T>> copy() override { return std::make_unique<FaissGpuFlat<T>>(*this); };
-};
-
-}  // namespace raft::bench::ann
-
-#endif
diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
deleted file mode 100644
index 48d41388d4..0000000000
--- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../common/ann_types.hpp"
-#include "ggnn_wrapper.cuh"
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace raft::bench::ann {
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::Ggnn<T>::BuildParam& param)
-{
-  param.k = conf.at("k");
-
-  if (conf.contains("k_build")) { param.k_build = conf.at("k_build"); }
-  if (conf.contains("segment_size")) { param.segment_size = conf.at("segment_size"); }
-  if (conf.contains("num_layers")) { param.num_layers = conf.at("num_layers"); }
-  if (conf.contains("tau")) { param.tau = conf.at("tau"); }
-  if (conf.contains("refine_iterations")) {
-    param.refine_iterations = conf.at("refine_iterations");
-  }
-}
-
-template <typename T>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::Ggnn<T>::SearchParam& param)
-{
-  param.tau = conf.at("tau");
-
-  if (conf.contains("block_dim")) { param.block_dim = conf.at("block_dim"); }
-  if (conf.contains("max_iterations")) { param.max_iterations = conf.at("max_iterations"); }
-  if (conf.contains("cache_size")) { param.cache_size = conf.at("cache_size"); }
-  if (conf.contains("sorted_size")) { param.sorted_size = conf.at("sorted_size"); }
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf,
-                                                    const std::vector<int>& dev_list)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-
-  (void)dev_list;
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T>
-std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
-                                                      const std::string& distance,
-                                                      int dim,
-                                                      const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list)
-{
-  // stop compiler warning; not all algorithms support multi-GPU so it may not be used
-  (void)dev_list;
-
-  raft::bench::ann::Metric metric = parse_metric(distance);
-  std::unique_ptr<raft::bench::ann::ANN<T>> ann;
-
-  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, uint8_t> ||
-                std::is_same_v<T, int8_t>) {
-    if (algo == "ggnn") { ann = make_algo<T, raft::bench::ann::Ggnn>(metric, dim, conf); }
-  }
-  if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-
-  return ann;
-}
-
-template <typename T>
-std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
-  const std::string& algo, const nlohmann::json& conf)
-{
-  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, uint8_t> ||
-                std::is_same_v<T, int8_t>) {
-    if (algo == "ggnn") {
-      auto param = std::make_unique<typename raft::bench::ann::Ggnn<T>::SearchParam>();
-      parse_search_param<T>(conf, *param);
-      return param;
-    }
-  }
-  // else
-  throw std::runtime_error("invalid algo: '" + algo + "'");
-}
-
-}  // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#ifdef ANN_BENCH_BUILD_MAIN
-#include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
-#endif
diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
deleted file mode 100644
index 59cf3df806..0000000000
--- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "../common/ann_types.hpp"
-#include "../common/util.hpp"
-
-#include <raft/util/cudart_utils.hpp>
-
-#include <ggnn/cuda_knn_ggnn_gpu_instance.cuh>
-
-#include <memory>
-#include <stdexcept>
-
-namespace raft::bench::ann {
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-class GgnnImpl;
-
-template <typename T>
-class Ggnn : public ANN<T>, public AnnGPU {
- public:
-  struct BuildParam {
-    int k_build{24};       // KBuild
-    int segment_size{32};  // S
-    int num_layers{4};     // L
-    float tau{0.5};
-    int refine_iterations{2};
-    int k;  // GGNN requires to know k during building
-  };
-
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    float tau;
-    int block_dim{32};
-    int max_iterations{400};
-    int cache_size{512};
-    int sorted_size{256};
-    auto needs_dataset() const -> bool override { return true; }
-  };
-
-  Ggnn(Metric metric, int dim, const BuildParam& param);
-
-  void build(const T* dataset, size_t nrow) override { impl_->build(dataset, nrow); }
-
-  void set_search_param(const AnnSearchParam& param) override { impl_->set_search_param(param); }
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const override
-  {
-    impl_->search(queries, batch_size, k, neighbors, distances);
-  }
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
-  {
-    return dynamic_cast<AnnGPU*>(impl_.get())->get_sync_stream();
-  }
-
-  void save(const std::string& file) const override { impl_->save(file); }
-  void load(const std::string& file) override { impl_->load(file); }
-  std::unique_ptr<ANN<T>> copy() override { return std::make_unique<Ggnn<T>>(*this); };
-
-  AlgoProperty get_preference() const override { return impl_->get_preference(); }
-
-  void set_search_dataset(const T* dataset, size_t nrow) override
-  {
-    impl_->set_search_dataset(dataset, nrow);
-  };
-
- private:
-  std::shared_ptr<ANN<T>> impl_;
-};
-
-template <typename T>
-Ggnn<T>::Ggnn(Metric metric, int dim, const BuildParam& param) : ANN<T>(metric, dim)
-{
-  // ggnn/src/sift1m.cu
-  if (metric == Metric::kEuclidean && dim == 128 && param.k_build == 24 && param.k == 10 &&
-      param.segment_size == 32) {
-    impl_ = std::make_shared<GgnnImpl<T, Euclidean, 128, 24, 10, 32>>(metric, dim, param);
-  }
-  // ggnn/src/deep1b_multi_gpu.cu, and adapt it deep1B
-  else if (metric == Metric::kEuclidean && dim == 96 && param.k_build == 24 && param.k == 10 &&
-           param.segment_size == 32) {
-    impl_ = std::make_shared<GgnnImpl<T, Euclidean, 96, 24, 10, 32>>(metric, dim, param);
-  } else if (metric == Metric::kInnerProduct && dim == 96 && param.k_build == 24 && param.k == 10 &&
-             param.segment_size == 32) {
-    impl_ = std::make_shared<GgnnImpl<T, Cosine, 96, 24, 10, 32>>(metric, dim, param);
-  } else if (metric == Metric::kInnerProduct && dim == 96 && param.k_build == 96 && param.k == 10 &&
-             param.segment_size == 64) {
-    impl_ = std::make_shared<GgnnImpl<T, Cosine, 96, 96, 10, 64>>(metric, dim, param);
-  }
-  // ggnn/src/glove200.cu, adapt it to glove100
-  else if (metric == Metric::kInnerProduct && dim == 100 && param.k_build == 96 && param.k == 10 &&
-           param.segment_size == 64) {
-    impl_ = std::make_shared<GgnnImpl<T, Cosine, 100, 96, 10, 64>>(metric, dim, param);
-  } else {
-    throw std::runtime_error(
-      "ggnn: not supported combination of metric, dim and build param; "
-      "see Ggnn's constructor in ggnn_wrapper.cuh for available combinations");
-  }
-}
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-class GgnnImpl : public ANN<T>, public AnnGPU {
- public:
-  using typename ANN<T>::AnnSearchParam;
-
-  GgnnImpl(Metric metric, int dim, const typename Ggnn<T>::BuildParam& param);
-
-  void build(const T* dataset, size_t nrow) override;
-
-  void set_search_param(const AnnSearchParam& param) override;
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const override;
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override { return stream_; }
-
-  void save(const std::string& file) const override;
-  void load(const std::string& file) override;
-  std::unique_ptr<ANN<T>> copy() override
-  {
-    auto r = std::make_unique<GgnnImpl<T, measure, D, KBuild, KQuery, S>>(*this);
-    // set the thread-local stream to the copied handle.
-    r->stream_ = raft::bench::ann::get_stream_from_global_pool();
-    return r;
-  };
-
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    property.dataset_memory_type = MemoryType::Device;
-    property.query_memory_type   = MemoryType::Device;
-    return property;
-  }
-
-  void set_search_dataset(const T* dataset, size_t nrow) override;
-
- private:
-  using ANN<T>::metric_;
-  using ANN<T>::dim_;
-
-  using GGNNGPUInstance = GGNNGPUInstance<measure,
-                                          int64_t /* KeyT */,
-                                          float /* ValueT */,
-                                          size_t /* GAddrT */,
-                                          T /* BaseT */,
-                                          size_t /* BAddrT */,
-                                          D,
-                                          KBuild,
-                                          KBuild / 2 /* KF */,
-                                          KQuery,
-                                          S>;
-  std::shared_ptr<GGNNGPUInstance> ggnn_;
-  typename Ggnn<T>::BuildParam build_param_;
-  typename Ggnn<T>::SearchParam search_param_;
-  cudaStream_t stream_;
-  const T* base_dataset                 = nullptr;
-  size_t base_n_rows                    = 0;
-  std::optional<std::string> graph_file = std::nullopt;
-
-  void load_impl()
-  {
-    if (base_dataset == nullptr) { return; }
-    if (base_n_rows == 0) { return; }
-    int device;
-    RAFT_CUDA_TRY(cudaGetDevice(&device));
-    ggnn_ = std::make_shared<GGNNGPUInstance>(
-      device, base_n_rows, build_param_.num_layers, true, build_param_.tau);
-    ggnn_->set_base_data(base_dataset);
-    ggnn_->set_stream(get_sync_stream());
-    if (graph_file.has_value()) {
-      auto& ggnn_host   = ggnn_->ggnn_cpu_buffers.at(0);
-      auto& ggnn_device = ggnn_->ggnn_shards.at(0);
-      ggnn_->set_stream(get_sync_stream());
-
-      ggnn_host.load(graph_file.value());
-      ggnn_host.uploadAsync(ggnn_device);
-      RAFT_CUDA_TRY(cudaStreamSynchronize(ggnn_device.stream));
-    }
-  }
-};
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-GgnnImpl<T, measure, D, KBuild, KQuery, S>::GgnnImpl(Metric metric,
-                                                     int dim,
-                                                     const typename Ggnn<T>::BuildParam& param)
-  : ANN<T>(metric, dim),
-    build_param_(param),
-    stream_(raft::bench::ann::get_stream_from_global_pool())
-{
-  if (metric_ == Metric::kInnerProduct) {
-    if (measure != Cosine) { throw std::runtime_error("mis-matched metric"); }
-  } else if (metric_ == Metric::kEuclidean) {
-    if (measure != Euclidean) { throw std::runtime_error("mis-matched metric"); }
-  } else {
-    throw std::runtime_error(
-      "ggnn supports only metric type of InnerProduct, Cosine and Euclidean");
-  }
-
-  if (dim != D) { throw std::runtime_error("mis-matched dim"); }
-}
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-void GgnnImpl<T, measure, D, KBuild, KQuery, S>::build(const T* dataset, size_t nrow)
-{
-  base_dataset = dataset;
-  base_n_rows  = nrow;
-  graph_file   = std::nullopt;
-  load_impl();
-  ggnn_->build(0);
-  for (int i = 0; i < build_param_.refine_iterations; ++i) {
-    ggnn_->refine();
-  }
-}
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-void GgnnImpl<T, measure, D, KBuild, KQuery, S>::set_search_dataset(const T* dataset, size_t nrow)
-{
-  if (base_dataset != dataset || base_n_rows != nrow) {
-    base_dataset = dataset;
-    base_n_rows  = nrow;
-    load_impl();
-  }
-}
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-void GgnnImpl<T, measure, D, KBuild, KQuery, S>::set_search_param(const AnnSearchParam& param)
-{
-  search_param_ = dynamic_cast<const typename Ggnn<T>::SearchParam&>(param);
-}
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-void GgnnImpl<T, measure, D, KBuild, KQuery, S>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  static_assert(sizeof(size_t) == sizeof(int64_t), "sizes of size_t and GGNN's KeyT are different");
-  if (k != KQuery) {
-    throw std::runtime_error(
-      "k = " + std::to_string(k) +
-      ", but this GGNN instance only supports k = " + std::to_string(KQuery));
-  }
-
-  ggnn_->set_stream(get_sync_stream());
-  RAFT_CUDA_TRY(cudaMemcpyToSymbol(c_tau_query, &search_param_.tau, sizeof(float)));
-
-  const int block_dim      = search_param_.block_dim;
-  const int max_iterations = search_param_.max_iterations;
-  const int cache_size     = search_param_.cache_size;
-  const int sorted_size    = search_param_.sorted_size;
-  // default value
-  if (block_dim == 32 && max_iterations == 400 && cache_size == 512 && sorted_size == 256) {
-    ggnn_->template queryLayer<32, 400, 512, 256, false>(
-      queries, batch_size, reinterpret_cast<int64_t*>(neighbors), distances);
-  }
-  // ggnn/src/sift1m.cu
-  else if (block_dim == 32 && max_iterations == 200 && cache_size == 256 && sorted_size == 64) {
-    ggnn_->template queryLayer<32, 200, 256, 64, false>(
-      queries, batch_size, reinterpret_cast<int64_t*>(neighbors), distances);
-  }
-  // ggnn/src/sift1m.cu
-  else if (block_dim == 32 && max_iterations == 400 && cache_size == 448 && sorted_size == 64) {
-    ggnn_->template queryLayer<32, 400, 448, 64, false>(
-      queries, batch_size, reinterpret_cast<int64_t*>(neighbors), distances);
-  }
-  // ggnn/src/glove200.cu
-  else if (block_dim == 128 && max_iterations == 2000 && cache_size == 2048 && sorted_size == 32) {
-    ggnn_->template queryLayer<128, 2000, 2048, 32, false>(
-      queries, batch_size, reinterpret_cast<int64_t*>(neighbors), distances);
-  }
-  // for glove100
-  else if (block_dim == 64 && max_iterations == 400 && cache_size == 512 && sorted_size == 32) {
-    ggnn_->template queryLayer<64, 400, 512, 32, false>(
-      queries, batch_size, reinterpret_cast<int64_t*>(neighbors), distances);
-  } else if (block_dim == 128 && max_iterations == 2000 && cache_size == 1024 &&
-             sorted_size == 32) {
-    ggnn_->template queryLayer<128, 2000, 1024, 32, false>(
-      queries, batch_size, reinterpret_cast<int64_t*>(neighbors), distances);
-  } else {
-    throw std::runtime_error("ggnn: not supported search param");
-  }
-}
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-void GgnnImpl<T, measure, D, KBuild, KQuery, S>::save(const std::string& file) const
-{
-  auto& ggnn_host   = ggnn_->ggnn_cpu_buffers.at(0);
-  auto& ggnn_device = ggnn_->ggnn_shards.at(0);
-  ggnn_->set_stream(get_sync_stream());
-
-  ggnn_host.downloadAsync(ggnn_device);
-  RAFT_CUDA_TRY(cudaStreamSynchronize(ggnn_device.stream));
-  ggnn_host.store(file);
-}
-
-template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
-void GgnnImpl<T, measure, D, KBuild, KQuery, S>::load(const std::string& file)
-{
-  if (!graph_file.has_value() || graph_file.value() != file) {
-    graph_file = file;
-    load_impl();
-  }
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
deleted file mode 100644
index df82c68830..0000000000
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../common/ann_types.hpp"
-#include "hnswlib_wrapper.h"
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace raft::bench::ann {
-
-template <typename T>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::HnswLib<T>::BuildParam& param)
-{
-  param.ef_construction = conf.at("efConstruction");
-  param.M               = conf.at("M");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
-}
-
-template <typename T>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::HnswLib<T>::SearchParam& param)
-{
-  param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T, template <typename> class Algo>
-std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
-                                                    int dim,
-                                                    const nlohmann::json& conf,
-                                                    const std::vector<int>& dev_list)
-{
-  typename Algo<T>::BuildParam param;
-  parse_build_param<T>(conf, param);
-
-  (void)dev_list;
-  return std::make_unique<Algo<T>>(metric, dim, param);
-}
-
-template <typename T>
-std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
-                                                      const std::string& distance,
-                                                      int dim,
-                                                      const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list)
-{
-  // stop compiler warning; not all algorithms support multi-GPU so it may not be used
-  (void)dev_list;
-
-  raft::bench::ann::Metric metric = parse_metric(distance);
-  std::unique_ptr<raft::bench::ann::ANN<T>> ann;
-
-  if constexpr (std::is_same_v<T, float>) {
-    if (algo == "hnswlib") { ann = make_algo<T, raft::bench::ann::HnswLib>(metric, dim, conf); }
-  }
-
-  if constexpr (std::is_same_v<T, uint8_t>) {
-    if (algo == "hnswlib") { ann = make_algo<T, raft::bench::ann::HnswLib>(metric, dim, conf); }
-  }
-
-  if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-  return ann;
-}
-
-template <typename T>
-std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
-  const std::string& algo, const nlohmann::json& conf)
-{
-  if (algo == "hnswlib") {
-    auto param = std::make_unique<typename raft::bench::ann::HnswLib<T>::SearchParam>();
-    parse_search_param<T>(conf, *param);
-    return param;
-  }
-  // else
-  throw std::runtime_error("invalid algo: '" + algo + "'");
-}
-
-};  // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#ifdef ANN_BENCH_BUILD_MAIN
-#include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
-#endif
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
deleted file mode 100644
index 5743632bf4..0000000000
--- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../common/ann_types.hpp"
-#include "../common/thread_pool.hpp"
-
-#include <hnswlib/hnswlib.h>
-
-#include <algorithm>
-#include <atomic>
-#include <cassert>
-#include <cmath>
-#include <condition_variable>
-#include <cstdio>
-#include <ctime>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <numeric>
-#include <stdexcept>
-#include <thread>
-#include <utility>
-#include <vector>
-
-namespace raft::bench::ann {
-
-template <typename T>
-struct hnsw_dist_t {
-  using type = void;
-};
-
-template <>
-struct hnsw_dist_t<float> {
-  using type = float;
-};
-
-template <>
-struct hnsw_dist_t<uint8_t> {
-  using type = int;
-};
-
-template <>
-struct hnsw_dist_t<int8_t> {
-  using type = int;
-};
-
-template <typename T>
-class HnswLib : public ANN<T> {
- public:
-  // https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md
-  struct BuildParam {
-    int M;
-    int ef_construction;
-    int num_threads = omp_get_num_procs();
-  };
-
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    int ef;
-    int num_threads = 1;
-  };
-
-  HnswLib(Metric metric, int dim, const BuildParam& param);
-
-  void build(const T* dataset, size_t nrow) override;
-
-  void set_search_param(const AnnSearchParam& param) override;
-  void search(const T* query,
-              int batch_size,
-              int k,
-              AnnBase::index_type* indices,
-              float* distances) const override;
-
-  void save(const std::string& path_to_index) const override;
-  void load(const std::string& path_to_index) override;
-  std::unique_ptr<ANN<T>> copy() override { return std::make_unique<HnswLib<T>>(*this); };
-
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    property.dataset_memory_type = MemoryType::Host;
-    property.query_memory_type   = MemoryType::Host;
-    return property;
-  }
-
-  void set_base_layer_only() { appr_alg_->base_layer_only = true; }
-
- private:
-  void get_search_knn_results_(const T* query,
-                               int k,
-                               AnnBase::index_type* indices,
-                               float* distances) const;
-
-  std::shared_ptr<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>> appr_alg_;
-  std::shared_ptr<hnswlib::SpaceInterface<typename hnsw_dist_t<T>::type>> space_;
-
-  using ANN<T>::metric_;
-  using ANN<T>::dim_;
-  int ef_construction_;
-  int m_;
-  int num_threads_;
-  std::shared_ptr<FixedThreadPool> thread_pool_;
-  Objective metric_objective_;
-};
-
-template <typename T>
-HnswLib<T>::HnswLib(Metric metric, int dim, const BuildParam& param) : ANN<T>(metric, dim)
-{
-  assert(dim_ > 0);
-  static_assert(std::is_same_v<T, float> || std::is_same_v<T, uint8_t>);
-  if constexpr (std::is_same_v<T, uint8_t>) {
-    if (metric_ != Metric::kEuclidean) {
-      throw std::runtime_error("hnswlib<uint8_t> only supports Euclidean distance");
-    }
-  }
-
-  ef_construction_ = param.ef_construction;
-  m_               = param.M;
-  num_threads_     = param.num_threads;
-}
-
-template <typename T>
-void HnswLib<T>::build(const T* dataset, size_t nrow)
-{
-  if constexpr (std::is_same_v<T, float>) {
-    if (metric_ == Metric::kInnerProduct) {
-      space_ = std::make_shared<hnswlib::InnerProductSpace>(dim_);
-    } else {
-      space_ = std::make_shared<hnswlib::L2Space>(dim_);
-    }
-  } else if constexpr (std::is_same_v<T, uint8_t>) {
-    space_ = std::make_shared<hnswlib::L2SpaceI<T>>(dim_);
-  }
-
-  appr_alg_ = std::make_shared<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
-    space_.get(), nrow, m_, ef_construction_);
-
-  thread_pool_                  = std::make_shared<FixedThreadPool>(num_threads_);
-  const size_t items_per_thread = nrow / (num_threads_ + 1);
-
-  thread_pool_->submit(
-    [&](size_t i) {
-      if (i < items_per_thread && i % 10000 == 0) {
-        char buf[20];
-        std::time_t now = std::time(nullptr);
-        std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
-        printf("%s building %zu / %zu\n", buf, i, items_per_thread);
-        fflush(stdout);
-      }
-
-      appr_alg_->addPoint(dataset + i * dim_, i);
-    },
-    nrow);
-}
-
-template <typename T>
-void HnswLib<T>::set_search_param(const AnnSearchParam& param_)
-{
-  auto param        = dynamic_cast<const SearchParam&>(param_);
-  appr_alg_->ef_    = param.ef;
-  metric_objective_ = param.metric_objective;
-  num_threads_      = param.num_threads;
-
-  // Create a pool if multiple query threads have been set and the pool hasn't been created already
-  bool create_pool = (metric_objective_ == Objective::LATENCY && num_threads_ > 1 && !thread_pool_);
-  if (create_pool) { thread_pool_ = std::make_shared<FixedThreadPool>(num_threads_); }
-}
-
-template <typename T>
-void HnswLib<T>::search(
-  const T* query, int batch_size, int k, AnnBase::index_type* indices, float* distances) const
-{
-  auto f = [&](int i) {
-    // hnsw can only handle a single vector at a time.
-    get_search_knn_results_(query + i * dim_, k, indices + i * k, distances + i * k);
-  };
-  if (metric_objective_ == Objective::LATENCY && num_threads_ > 1) {
-    thread_pool_->submit(f, batch_size);
-  } else {
-    for (int i = 0; i < batch_size; i++) {
-      f(i);
-    }
-  }
-}
-
-template <typename T>
-void HnswLib<T>::save(const std::string& path_to_index) const
-{
-  appr_alg_->saveIndex(std::string(path_to_index));
-}
-
-template <typename T>
-void HnswLib<T>::load(const std::string& path_to_index)
-{
-  if constexpr (std::is_same_v<T, float>) {
-    if (metric_ == Metric::kInnerProduct) {
-      space_ = std::make_shared<hnswlib::InnerProductSpace>(dim_);
-    } else {
-      space_ = std::make_shared<hnswlib::L2Space>(dim_);
-    }
-  } else if constexpr (std::is_same_v<T, uint8_t>) {
-    space_ = std::make_shared<hnswlib::L2SpaceI<T>>(dim_);
-  }
-
-  appr_alg_ = std::make_shared<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
-    space_.get(), path_to_index);
-}
-
-template <typename T>
-void HnswLib<T>::get_search_knn_results_(const T* query,
-                                         int k,
-                                         AnnBase::index_type* indices,
-                                         float* distances) const
-{
-  auto result = appr_alg_->searchKnn(query, k);
-  assert(result.size() >= static_cast<size_t>(k));
-
-  for (int i = k - 1; i >= 0; --i) {
-    indices[i]   = result.top().second;
-    distances[i] = result.top().first;
-    result.pop();
-  }
-}
-
-};  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
deleted file mode 100644
index 48bf1d70d8..0000000000
--- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-#undef WARP_SIZE
-#ifdef RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE
-#include "raft_wrapper.h"
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT
-#include "raft_ivf_flat_wrapper.h"
-extern template class raft::bench::ann::RaftIvfFlatGpu<float, int64_t>;
-extern template class raft::bench::ann::RaftIvfFlatGpu<uint8_t, int64_t>;
-extern template class raft::bench::ann::RaftIvfFlatGpu<int8_t, int64_t>;
-#endif
-#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || \
-  defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
-#include "raft_ivf_pq_wrapper.h"
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
-extern template class raft::bench::ann::RaftIvfPQ<float, int64_t>;
-extern template class raft::bench::ann::RaftIvfPQ<uint8_t, int64_t>;
-extern template class raft::bench::ann::RaftIvfPQ<int8_t, int64_t>;
-#endif
-#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
-#include "raft_cagra_wrapper.h"
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
-extern template class raft::bench::ann::RaftCagra<float, uint32_t>;
-extern template class raft::bench::ann::RaftCagra<half, uint32_t>;
-extern template class raft::bench::ann::RaftCagra<uint8_t, uint32_t>;
-extern template class raft::bench::ann::RaftCagra<int8_t, uint32_t>;
-#endif
-
-#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT
-template <typename T, typename IdxT>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::RaftIvfFlatGpu<T, IdxT>::BuildParam& param)
-{
-  param.n_lists = conf.at("nlist");
-  if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
-  if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
-}
-
-template <typename T, typename IdxT>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::RaftIvfFlatGpu<T, IdxT>::SearchParam& param)
-{
-  param.ivf_flat_params.n_probes = conf.at("nprobe");
-}
-#endif
-
-#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || \
-  defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
-template <typename T, typename IdxT>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::RaftIvfPQ<T, IdxT>::BuildParam& param)
-{
-  if (conf.contains("nlist")) { param.n_lists = conf.at("nlist"); }
-  if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
-  if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
-  if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); }
-  if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); }
-  if (conf.contains("codebook_kind")) {
-    std::string kind = conf.at("codebook_kind");
-    if (kind == "cluster") {
-      param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER;
-    } else if (kind == "subspace") {
-      param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE;
-    } else {
-      throw std::runtime_error("codebook_kind: '" + kind +
-                               "', should be either 'cluster' or 'subspace'");
-    }
-  }
-}
-
-template <typename T, typename IdxT>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::RaftIvfPQ<T, IdxT>::SearchParam& param)
-{
-  if (conf.contains("nprobe")) { param.pq_param.n_probes = conf.at("nprobe"); }
-  if (conf.contains("internalDistanceDtype")) {
-    std::string type = conf.at("internalDistanceDtype");
-    if (type == "float") {
-      param.pq_param.internal_distance_dtype = CUDA_R_32F;
-    } else if (type == "half") {
-      param.pq_param.internal_distance_dtype = CUDA_R_16F;
-    } else {
-      throw std::runtime_error("internalDistanceDtype: '" + type +
-                               "', should be either 'float' or 'half'");
-    }
-  } else {
-    // set half as default type
-    param.pq_param.internal_distance_dtype = CUDA_R_16F;
-  }
-
-  if (conf.contains("smemLutDtype")) {
-    std::string type = conf.at("smemLutDtype");
-    if (type == "float") {
-      param.pq_param.lut_dtype = CUDA_R_32F;
-    } else if (type == "half") {
-      param.pq_param.lut_dtype = CUDA_R_16F;
-    } else if (type == "fp8") {
-      param.pq_param.lut_dtype = CUDA_R_8U;
-    } else {
-      throw std::runtime_error("smemLutDtype: '" + type +
-                               "', should be either 'float', 'half' or 'fp8'");
-    }
-  } else {
-    // set half as default
-    param.pq_param.lut_dtype = CUDA_R_16F;
-  }
-  if (conf.contains("refine_ratio")) {
-    param.refine_ratio = conf.at("refine_ratio");
-    if (param.refine_ratio < 1.0f) { throw std::runtime_error("refine_ratio should be >= 1.0"); }
-  }
-}
-#endif
-
-#if defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
-template <typename T, typename IdxT>
-void parse_build_param(const nlohmann::json& conf,
-                       raft::neighbors::experimental::nn_descent::index_params& param)
-{
-  if (conf.contains("graph_degree")) { param.graph_degree = conf.at("graph_degree"); }
-  if (conf.contains("intermediate_graph_degree")) {
-    param.intermediate_graph_degree = conf.at("intermediate_graph_degree");
-  }
-  // we allow niter shorthand for max_iterations
-  if (conf.contains("niter")) { param.max_iterations = conf.at("niter"); }
-  if (conf.contains("max_iterations")) { param.max_iterations = conf.at("max_iterations"); }
-  if (conf.contains("termination_threshold")) {
-    param.termination_threshold = conf.at("termination_threshold");
-  }
-}
-
-inline void parse_build_param(const nlohmann::json& conf, raft::neighbors::vpq_params& param)
-{
-  if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); }
-  if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); }
-  if (conf.contains("vq_n_centers")) { param.vq_n_centers = conf.at("vq_n_centers"); }
-  if (conf.contains("kmeans_n_iters")) { param.kmeans_n_iters = conf.at("kmeans_n_iters"); }
-  if (conf.contains("vq_kmeans_trainset_fraction")) {
-    param.vq_kmeans_trainset_fraction = conf.at("vq_kmeans_trainset_fraction");
-  }
-  if (conf.contains("pq_kmeans_trainset_fraction")) {
-    param.pq_kmeans_trainset_fraction = conf.at("pq_kmeans_trainset_fraction");
-  }
-}
-
-nlohmann::json collect_conf_with_prefix(const nlohmann::json& conf,
-                                        const std::string& prefix,
-                                        bool remove_prefix = true)
-{
-  nlohmann::json out;
-  for (auto& i : conf.items()) {
-    if (i.key().compare(0, prefix.size(), prefix) == 0) {
-      auto new_key = remove_prefix ? i.key().substr(prefix.size()) : i.key();
-      out[new_key] = i.value();
-    }
-  }
-  return out;
-}
-
-template <typename T, typename IdxT>
-void parse_build_param(const nlohmann::json& conf,
-                       typename raft::bench::ann::RaftCagra<T, IdxT>::BuildParam& param)
-{
-  if (conf.contains("graph_degree")) {
-    param.cagra_params.graph_degree              = conf.at("graph_degree");
-    param.cagra_params.intermediate_graph_degree = param.cagra_params.graph_degree * 2;
-  }
-  if (conf.contains("intermediate_graph_degree")) {
-    param.cagra_params.intermediate_graph_degree = conf.at("intermediate_graph_degree");
-  }
-  if (conf.contains("graph_build_algo")) {
-    if (conf.at("graph_build_algo") == "IVF_PQ") {
-      param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ;
-    } else if (conf.at("graph_build_algo") == "NN_DESCENT") {
-      param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT;
-    }
-  }
-  nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_");
-  if (!ivf_pq_build_conf.empty()) {
-    raft::neighbors::ivf_pq::index_params bparam;
-    parse_build_param<T, IdxT>(ivf_pq_build_conf, bparam);
-    param.ivf_pq_build_params = bparam;
-  }
-  nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_");
-  if (!ivf_pq_search_conf.empty()) {
-    typename raft::bench::ann::RaftIvfPQ<T, IdxT>::SearchParam sparam;
-    parse_search_param<T, IdxT>(ivf_pq_search_conf, sparam);
-    param.ivf_pq_search_params = sparam.pq_param;
-    param.ivf_pq_refine_rate   = sparam.refine_ratio;
-  }
-  nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_");
-  if (!nn_descent_conf.empty()) {
-    raft::neighbors::experimental::nn_descent::index_params nn_param;
-    nn_param.intermediate_graph_degree = 1.5 * param.cagra_params.intermediate_graph_degree;
-    parse_build_param<T, IdxT>(nn_descent_conf, nn_param);
-    if (nn_param.graph_degree != param.cagra_params.intermediate_graph_degree) {
-      nn_param.graph_degree = param.cagra_params.intermediate_graph_degree;
-    }
-    param.nn_descent_params = nn_param;
-  }
-  nlohmann::json comp_search_conf = collect_conf_with_prefix(conf, "compression_");
-  if (!comp_search_conf.empty()) {
-    raft::neighbors::vpq_params vpq_pams;
-    parse_build_param(comp_search_conf, vpq_pams);
-    param.cagra_params.compression.emplace(vpq_pams);
-  }
-}
-
-raft::bench::ann::AllocatorType parse_allocator(std::string mem_type)
-{
-  if (mem_type == "device") {
-    return raft::bench::ann::AllocatorType::Device;
-  } else if (mem_type == "host_pinned") {
-    return raft::bench::ann::AllocatorType::HostPinned;
-  } else if (mem_type == "host_huge_page") {
-    return raft::bench::ann::AllocatorType::HostHugePage;
-  }
-  THROW(
-    "Invalid value for memory type %s, must be one of [\"device\", \"host_pinned\", "
-    "\"host_huge_page\"",
-    mem_type.c_str());
-}
-
-template <typename T, typename IdxT>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::RaftCagra<T, IdxT>::SearchParam& param)
-{
-  if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); }
-  if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); }
-  if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); }
-  if (conf.contains("algo")) {
-    if (conf.at("algo") == "single_cta") {
-      param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA;
-    } else if (conf.at("algo") == "multi_cta") {
-      param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA;
-    } else if (conf.at("algo") == "multi_kernel") {
-      param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL;
-    } else if (conf.at("algo") == "auto") {
-      param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO;
-    } else {
-      std::string tmp = conf.at("algo");
-      THROW("Invalid value for algo: %s", tmp.c_str());
-    }
-  }
-  if (conf.contains("graph_memory_type")) {
-    param.graph_mem = parse_allocator(conf.at("graph_memory_type"));
-  }
-  if (conf.contains("internal_dataset_memory_type")) {
-    param.dataset_mem = parse_allocator(conf.at("internal_dataset_memory_type"));
-  }
-  // Same ratio as in IVF-PQ
-  param.refine_ratio = conf.value("refine_ratio", 1.0f);
-}
-#endif
diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_utils.h b/cpp/bench/ann/src/raft/raft_ann_bench_utils.h
deleted file mode 100644
index 9b086fdb23..0000000000
--- a/cpp/bench/ann/src/raft/raft_ann_bench_utils.h
+++ /dev/null
@@ -1,255 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../common/util.hpp"
-
-#include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/core/host_mdarray.hpp>
-#include <raft/core/host_mdspan.hpp>
-#include <raft/core/logger.hpp>
-#include <raft/core/operators.hpp>
-#include <raft/distance/distance_types.hpp>
-#include <raft/neighbors/refine.cuh>
-#include <raft/util/cudart_utils.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
-#include <rmm/mr/device/failure_callback_resource_adaptor.hpp>
-#include <rmm/mr/device/managed_memory_resource.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
-
-#include <memory>
-#include <type_traits>
-
-namespace raft::bench::ann {
-
-inline raft::distance::DistanceType parse_metric_type(raft::bench::ann::Metric metric)
-{
-  if (metric == raft::bench::ann::Metric::kInnerProduct) {
-    return raft::distance::DistanceType::InnerProduct;
-  } else if (metric == raft::bench::ann::Metric::kEuclidean) {
-    // Even for L2 expanded RAFT IVF Flat uses unexpanded formula
-    return raft::distance::DistanceType::L2Expanded;
-  } else {
-    throw std::runtime_error("raft supports only metric type of inner product and L2");
-  }
-}
-
-/** Report a more verbose error with a backtrace when OOM occurs on RMM side. */
-inline auto rmm_oom_callback(std::size_t bytes, void*) -> bool
-{
-  auto cuda_status = cudaGetLastError();
-  size_t free      = 0;
-  size_t total     = 0;
-  RAFT_CUDA_TRY_NO_THROW(cudaMemGetInfo(&free, &total));
-  RAFT_FAIL(
-    "Failed to allocate %zu bytes using RMM memory resource. "
-    "NB: latest cuda status = %s, free memory = %zu, total memory = %zu.",
-    bytes,
-    cudaGetErrorName(cuda_status),
-    free,
-    total);
-}
-
-/**
- * This container keeps the part of raft state that should be shared among multiple copies of raft
- * handles (in different CPU threads).
- * An example of this is an RMM memory resource: if we had an RMM memory pool per thread, we'd
- * quickly run out of memory.
- */
-class shared_raft_resources {
- public:
-  using pool_mr_type  = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
-  using mr_type       = rmm::mr::failure_callback_resource_adaptor<pool_mr_type>;
-  using large_mr_type = rmm::mr::managed_memory_resource;
-
-  shared_raft_resources()
-  try : orig_resource_{rmm::mr::get_current_device_resource()},
-    pool_resource_(orig_resource_, 1024 * 1024 * 1024ull),
-    resource_(&pool_resource_, rmm_oom_callback, nullptr), large_mr_() {
-    rmm::mr::set_current_device_resource(&resource_);
-  } catch (const std::exception& e) {
-    auto cuda_status = cudaGetLastError();
-    size_t free      = 0;
-    size_t total     = 0;
-    RAFT_CUDA_TRY_NO_THROW(cudaMemGetInfo(&free, &total));
-    RAFT_FAIL(
-      "Failed to initialize shared raft resources (NB: latest cuda status = %s, free memory = %zu, "
-      "total memory = %zu): %s",
-      cudaGetErrorName(cuda_status),
-      free,
-      total,
-      e.what());
-  }
-
-  shared_raft_resources(shared_raft_resources&&)                       = delete;
-  shared_raft_resources& operator=(shared_raft_resources&&)            = delete;
-  shared_raft_resources(const shared_raft_resources& res)              = delete;
-  shared_raft_resources& operator=(const shared_raft_resources& other) = delete;
-
-  ~shared_raft_resources() noexcept { rmm::mr::set_current_device_resource(orig_resource_); }
-
-  auto get_large_memory_resource() noexcept
-  {
-    return static_cast<rmm::mr::device_memory_resource*>(&large_mr_);
-  }
-
- private:
-  rmm::mr::device_memory_resource* orig_resource_;
-  pool_mr_type pool_resource_;
-  mr_type resource_;
-  large_mr_type large_mr_;
-};
-
-/**
- * This struct is used by multiple raft benchmark wrappers. It serves as a thread-safe keeper of
- * shared and private GPU resources (see below).
- *
- * - Accessing the same `configured_raft_resources` from concurrent threads is not safe.
- * - Accessing the copies of `configured_raft_resources` from concurrent threads is safe.
- * - There must be at most one "original" `configured_raft_resources` at any time, but as many
- *   copies of it as needed (modifies the program static state).
- */
-class configured_raft_resources {
- public:
-  /**
-   * This constructor has the shared state passed unmodified but creates the local state anew.
-   * It's used by the copy constructor.
-   */
-  explicit configured_raft_resources(const std::shared_ptr<shared_raft_resources>& shared_res)
-    : shared_res_{shared_res},
-      res_{std::make_unique<raft::device_resources>(
-        rmm::cuda_stream_view(get_stream_from_global_pool()))}
-  {
-    // set the large workspace resource to the raft handle, but without the deleter
-    // (this resource is managed by the shared_res).
-    raft::resource::set_large_workspace_resource(
-      *res_,
-      std::shared_ptr<rmm::mr::device_memory_resource>(shared_res_->get_large_memory_resource(),
-                                                       raft::void_op{}));
-  }
-
-  /** Default constructor creates all resources anew. */
-  configured_raft_resources() : configured_raft_resources{std::make_shared<shared_raft_resources>()}
-  {
-  }
-
-  configured_raft_resources(configured_raft_resources&&);
-  configured_raft_resources& operator=(configured_raft_resources&&);
-  ~configured_raft_resources() = default;
-  configured_raft_resources(const configured_raft_resources& res)
-    : configured_raft_resources{res.shared_res_}
-  {
-  }
-  configured_raft_resources& operator=(const configured_raft_resources& other)
-  {
-    this->shared_res_ = other.shared_res_;
-    return *this;
-  }
-
-  operator raft::resources&() noexcept { return *res_; }
-  operator const raft::resources&() const noexcept { return *res_; }
-
-  /** Get the main stream */
-  [[nodiscard]] auto get_sync_stream() const noexcept { return resource::get_cuda_stream(*res_); }
-
- private:
-  /** The resources shared among multiple raft handles / threads. */
-  std::shared_ptr<shared_raft_resources> shared_res_;
-  /**
-   * Until we make the use of copies of raft::resources thread-safe, each benchmark wrapper must
-   * have its own copy of it.
-   */
-  std::unique_ptr<raft::device_resources> res_ = std::make_unique<raft::device_resources>();
-};
-
-inline configured_raft_resources::configured_raft_resources(configured_raft_resources&&) = default;
-inline configured_raft_resources& configured_raft_resources::operator=(
-  configured_raft_resources&&) = default;
-
-/** A helper to refine the neighbors when the data is on device or on host. */
-template <typename DatasetT, typename QueriesT, typename CandidatesT>
-void refine_helper(const raft::resources& res,
-                   DatasetT dataset,
-                   QueriesT queries,
-                   CandidatesT candidates,
-                   int k,
-                   AnnBase::index_type* neighbors,
-                   float* distances,
-                   raft::distance::DistanceType metric)
-{
-  using data_type    = typename DatasetT::value_type;
-  using index_type   = AnnBase::index_type;
-  using extents_type = index_type;  // device-side refine requires this
-
-  static_assert(std::is_same_v<data_type, typename QueriesT::value_type>);
-  static_assert(std::is_same_v<data_type, typename DatasetT::value_type>);
-  static_assert(std::is_same_v<index_type, typename CandidatesT::value_type>);
-
-  extents_type batch_size = queries.extent(0);
-  extents_type dim        = queries.extent(1);
-  extents_type k0         = candidates.extent(1);
-
-  if (raft::get_device_for_address(dataset.data_handle()) >= 0) {
-    auto dataset_device = raft::make_device_matrix_view<const data_type, extents_type>(
-      dataset.data_handle(), dataset.extent(0), dataset.extent(1));
-    auto queries_device = raft::make_device_matrix_view<const data_type, extents_type>(
-      queries.data_handle(), batch_size, dim);
-    auto candidates_device = raft::make_device_matrix_view<const index_type, extents_type>(
-      candidates.data_handle(), batch_size, k0);
-    auto neighbors_device =
-      raft::make_device_matrix_view<index_type, extents_type>(neighbors, batch_size, k);
-    auto distances_device =
-      raft::make_device_matrix_view<float, extents_type>(distances, batch_size, k);
-
-    raft::neighbors::refine<index_type, data_type, float, extents_type>(res,
-                                                                        dataset_device,
-                                                                        queries_device,
-                                                                        candidates_device,
-                                                                        neighbors_device,
-                                                                        distances_device,
-                                                                        metric);
-  } else {
-    auto dataset_host = raft::make_host_matrix_view<const data_type, extents_type>(
-      dataset.data_handle(), dataset.extent(0), dataset.extent(1));
-    auto queries_host    = raft::make_host_matrix<data_type, extents_type>(batch_size, dim);
-    auto candidates_host = raft::make_host_matrix<index_type, extents_type>(batch_size, k0);
-    auto neighbors_host  = raft::make_host_matrix<index_type, extents_type>(batch_size, k);
-    auto distances_host  = raft::make_host_matrix<float, extents_type>(batch_size, k);
-
-    auto stream = resource::get_cuda_stream(res);
-    raft::copy(queries_host.data_handle(), queries.data_handle(), queries_host.size(), stream);
-    raft::copy(
-      candidates_host.data_handle(), candidates.data_handle(), candidates_host.size(), stream);
-
-    raft::resource::sync_stream(res);  // wait for the queries and candidates
-    raft::neighbors::refine<index_type, data_type, float, extents_type>(res,
-                                                                        dataset_host,
-                                                                        queries_host.view(),
-                                                                        candidates_host.view(),
-                                                                        neighbors_host.view(),
-                                                                        distances_host.view(),
-                                                                        metric);
-
-    raft::copy(neighbors, neighbors_host.data_handle(), neighbors_host.size(), stream);
-    raft::copy(distances, distances_host.data_handle(), distances_host.size(), stream);
-  }
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
deleted file mode 100644
index 8bb4d9423c..0000000000
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../common/ann_types.hpp"
-#include "raft_ann_bench_param_parser.h"
-
-#include <raft/core/logger.hpp>
-
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <utility>
-
-namespace raft::bench::ann {
-
-template <typename T>
-std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
-                                                      const std::string& distance,
-                                                      int dim,
-                                                      const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list)
-{
-  // stop compiler warning; not all algorithms support multi-GPU so it may not be used
-  (void)dev_list;
-
-  [[maybe_unused]] raft::bench::ann::Metric metric = parse_metric(distance);
-  std::unique_ptr<raft::bench::ann::ANN<T>> ann;
-
-  if constexpr (std::is_same_v<T, float>) {
-#ifdef RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE
-    if (algo == "raft_brute_force") {
-      ann = std::make_unique<raft::bench::ann::RaftGpu<T>>(metric, dim);
-    }
-#endif
-  }
-
-  if constexpr (std::is_same_v<T, uint8_t>) {}
-
-#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT
-  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, uint8_t> ||
-                std::is_same_v<T, int8_t>) {
-    if (algo == "raft_ivf_flat") {
-      typename raft::bench::ann::RaftIvfFlatGpu<T, int64_t>::BuildParam param;
-      parse_build_param<T, int64_t>(conf, param);
-      ann = std::make_unique<raft::bench::ann::RaftIvfFlatGpu<T, int64_t>>(metric, dim, param);
-    }
-  }
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
-  if (algo == "raft_ivf_pq") {
-    typename raft::bench::ann::RaftIvfPQ<T, int64_t>::BuildParam param;
-    parse_build_param<T, int64_t>(conf, param);
-    ann = std::make_unique<raft::bench::ann::RaftIvfPQ<T, int64_t>>(metric, dim, param);
-  }
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
-  if (algo == "raft_cagra") {
-    typename raft::bench::ann::RaftCagra<T, uint32_t>::BuildParam param;
-    parse_build_param<T, uint32_t>(conf, param);
-    ann = std::make_unique<raft::bench::ann::RaftCagra<T, uint32_t>>(metric, dim, param);
-  }
-#endif
-
-  if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-
-  return ann;
-}
-
-template <typename T>
-std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
-  const std::string& algo, const nlohmann::json& conf)
-{
-#ifdef RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE
-  if (algo == "raft_brute_force") {
-    auto param = std::make_unique<typename raft::bench::ann::ANN<T>::AnnSearchParam>();
-    return param;
-  }
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT
-  if constexpr (std::is_same_v<T, float> || std::is_same_v<T, uint8_t> ||
-                std::is_same_v<T, int8_t>) {
-    if (algo == "raft_ivf_flat") {
-      auto param =
-        std::make_unique<typename raft::bench::ann::RaftIvfFlatGpu<T, int64_t>::SearchParam>();
-      parse_search_param<T, int64_t>(conf, *param);
-      return param;
-    }
-  }
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
-  if (algo == "raft_ivf_pq") {
-    auto param = std::make_unique<typename raft::bench::ann::RaftIvfPQ<T, int64_t>::SearchParam>();
-    parse_search_param<T, int64_t>(conf, *param);
-    return param;
-  }
-#endif
-#ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
-  if (algo == "raft_cagra") {
-    auto param = std::make_unique<typename raft::bench::ann::RaftCagra<T, uint32_t>::SearchParam>();
-    parse_search_param<T, uint32_t>(conf, *param);
-    return param;
-  }
-#endif
-
-  // else
-  throw std::runtime_error("invalid algo: '" + algo + "'");
-}
-
-};  // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(half);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#ifdef ANN_BENCH_BUILD_MAIN
-#include "../common/benchmark.hpp"
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
-#endif
diff --git a/cpp/bench/ann/src/raft/raft_cagra_float.cu b/cpp/bench/ann/src/raft/raft_cagra_float.cu
deleted file mode 100644
index 058f5bf34a..0000000000
--- a/cpp/bench/ann/src/raft/raft_cagra_float.cu
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "raft_cagra_wrapper.h"
-
-namespace raft::bench::ann {
-template class RaftCagra<float, uint32_t>;
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_cagra_half.cu b/cpp/bench/ann/src/raft/raft_cagra_half.cu
deleted file mode 100644
index a015819ec5..0000000000
--- a/cpp/bench/ann/src/raft/raft_cagra_half.cu
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "raft_cagra_wrapper.h"
-
-namespace raft::bench::ann {
-template class RaftCagra<half, uint32_t>;
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu b/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu
deleted file mode 100644
index d9ef1d74a3..0000000000
--- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib.cu
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../common/ann_types.hpp"
-#include "raft_ann_bench_param_parser.h"
-#include "raft_cagra_hnswlib_wrapper.h"
-
-#include <rmm/cuda_device.hpp>
-#include <rmm/mr/device/pool_memory_resource.hpp>
-#include <rmm/resource_ref.hpp>
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-namespace raft::bench::ann {
-
-template <typename T, typename IdxT>
-void parse_search_param(const nlohmann::json& conf,
-                        typename raft::bench::ann::RaftCagraHnswlib<T, IdxT>::SearchParam& param)
-{
-  param.ef = conf.at("ef");
-  if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
-}
-
-template <typename T>
-std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
-                                                      const std::string& distance,
-                                                      int dim,
-                                                      const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list)
-{
-  // stop compiler warning; not all algorithms support multi-GPU so it may not be used
-  (void)dev_list;
-
-  [[maybe_unused]] raft::bench::ann::Metric metric = parse_metric(distance);
-  std::unique_ptr<raft::bench::ann::ANN<T>> ann;
-
-  if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) {
-    if (algo == "raft_cagra_hnswlib") {
-      typename raft::bench::ann::RaftCagraHnswlib<T, uint32_t>::BuildParam param;
-      parse_build_param<T, uint32_t>(conf, param);
-      ann = std::make_unique<raft::bench::ann::RaftCagraHnswlib<T, uint32_t>>(metric, dim, param);
-    }
-  }
-
-  if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-
-  return ann;
-}
-
-template <typename T>
-std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
-  const std::string& algo, const nlohmann::json& conf)
-{
-  if (algo == "raft_cagra_hnswlib") {
-    auto param =
-      std::make_unique<typename raft::bench::ann::RaftCagraHnswlib<T, uint32_t>::SearchParam>();
-    parse_search_param<T, uint32_t>(conf, *param);
-    return param;
-  }
-
-  throw std::runtime_error("invalid algo: '" + algo + "'");
-}
-
-}  // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#ifdef ANN_BENCH_BUILD_MAIN
-#include "../common/benchmark.hpp"
-int main(int argc, char** argv)
-{
-  rmm::mr::cuda_memory_resource cuda_mr;
-  // Construct a resource that uses a coalescing best-fit pool allocator
-  // and is initially sized to half of free device memory.
-  rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
-    &cuda_mr, rmm::percent_of_free_device_memory(50)};
-  // Updates the current device resource pointer to `pool_mr`
-  auto old_mr = rmm::mr::set_current_device_resource(&pool_mr);
-  auto ret    = raft::bench::ann::run_main(argc, argv);
-  // Restores the current device resource pointer to its previous value
-  rmm::mr::set_current_device_resource(old_mr);
-  return ret;
-}
-#endif
diff --git a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h
deleted file mode 100644
index 1d2a1076ab..0000000000
--- a/cpp/bench/ann/src/raft/raft_cagra_hnswlib_wrapper.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../hnswlib/hnswlib_wrapper.h"
-#include "raft_cagra_wrapper.h"
-
-#include <memory>
-
-namespace raft::bench::ann {
-
-template <typename T, typename IdxT>
-class RaftCagraHnswlib : public ANN<T>, public AnnGPU {
- public:
-  using typename ANN<T>::AnnSearchParam;
-  using BuildParam  = typename RaftCagra<T, IdxT>::BuildParam;
-  using SearchParam = typename HnswLib<T>::SearchParam;
-
-  RaftCagraHnswlib(Metric metric, int dim, const BuildParam& param, int concurrent_searches = 1)
-    : ANN<T>(metric, dim),
-      cagra_build_{metric, dim, param, concurrent_searches, true},
-      // HnswLib param values don't matter since we don't build with HnswLib
-      hnswlib_search_{metric, dim, typename HnswLib<T>::BuildParam{50, 100}}
-  {
-  }
-
-  void build(const T* dataset, size_t nrow) final;
-
-  void set_search_param(const AnnSearchParam& param) override;
-
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const override;
-
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
-  {
-    return cagra_build_.get_sync_stream();
-  }
-
-  // to enable dataset access from GPU memory
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    property.dataset_memory_type = MemoryType::HostMmap;
-    property.query_memory_type   = MemoryType::Host;
-    return property;
-  }
-
-  void save(const std::string& file) const override;
-  void load(const std::string&) override;
-  std::unique_ptr<ANN<T>> copy() override
-  {
-    return std::make_unique<RaftCagraHnswlib<T, IdxT>>(*this);
-  }
-
- private:
-  RaftCagra<T, IdxT> cagra_build_;
-  HnswLib<T> hnswlib_search_;
-};
-
-template <typename T, typename IdxT>
-void RaftCagraHnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
-{
-  cagra_build_.build(dataset, nrow);
-}
-
-template <typename T, typename IdxT>
-void RaftCagraHnswlib<T, IdxT>::set_search_param(const AnnSearchParam& param_)
-{
-  hnswlib_search_.set_search_param(param_);
-}
-
-template <typename T, typename IdxT>
-void RaftCagraHnswlib<T, IdxT>::save(const std::string& file) const
-{
-  cagra_build_.save_to_hnswlib(file);
-}
-
-template <typename T, typename IdxT>
-void RaftCagraHnswlib<T, IdxT>::load(const std::string& file)
-{
-  hnswlib_search_.load(file);
-  hnswlib_search_.set_base_layer_only();
-}
-
-template <typename T, typename IdxT>
-void RaftCagraHnswlib<T, IdxT>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  hnswlib_search_.search(queries, batch_size, k, neighbors, distances);
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_cagra_int8_t.cu b/cpp/bench/ann/src/raft/raft_cagra_int8_t.cu
deleted file mode 100644
index be3b83ee60..0000000000
--- a/cpp/bench/ann/src/raft/raft_cagra_int8_t.cu
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "raft_cagra_wrapper.h"
-
-namespace raft::bench::ann {
-template class RaftCagra<int8_t, uint32_t>;
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_cagra_uint8_t.cu b/cpp/bench/ann/src/raft/raft_cagra_uint8_t.cu
deleted file mode 100644
index c9679e404d..0000000000
--- a/cpp/bench/ann/src/raft/raft_cagra_uint8_t.cu
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "raft_cagra_wrapper.h"
-
-namespace raft::bench::ann {
-template class RaftCagra<uint8_t, uint32_t>;
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
deleted file mode 100644
index b03f875a8e..0000000000
--- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../common/ann_types.hpp"
-#include "../common/cuda_huge_page_resource.hpp"
-#include "../common/cuda_pinned_resource.hpp"
-#include "raft_ann_bench_utils.h"
-
-#include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/core/logger.hpp>
-#include <raft/core/operators.hpp>
-#include <raft/distance/distance_types.hpp>
-#include <raft/linalg/unary_op.cuh>
-#include <raft/neighbors/cagra.cuh>
-#include <raft/neighbors/cagra_serialize.cuh>
-#include <raft/neighbors/cagra_types.hpp>
-#include <raft/neighbors/dataset.hpp>
-#include <raft/neighbors/detail/cagra/cagra_build.cuh>
-#include <raft/neighbors/ivf_pq_types.hpp>
-#include <raft/neighbors/nn_descent_types.hpp>
-#include <raft/util/cudart_utils.hpp>
-
-#include <rmm/device_uvector.hpp>
-#include <rmm/resource_ref.hpp>
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <optional>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-
-namespace raft::bench::ann {
-
-enum class AllocatorType { HostPinned, HostHugePage, Device };
-template <typename T, typename IdxT>
-class RaftCagra : public ANN<T>, public AnnGPU {
- public:
-  using typename ANN<T>::AnnSearchParam;
-
-  struct SearchParam : public AnnSearchParam {
-    raft::neighbors::experimental::cagra::search_params p;
-    float refine_ratio;
-    AllocatorType graph_mem   = AllocatorType::Device;
-    AllocatorType dataset_mem = AllocatorType::Device;
-    auto needs_dataset() const -> bool override { return true; }
-  };
-
-  struct BuildParam {
-    raft::neighbors::cagra::index_params cagra_params;
-    std::optional<raft::neighbors::experimental::nn_descent::index_params> nn_descent_params =
-      std::nullopt;
-    std::optional<float> ivf_pq_refine_rate                                    = std::nullopt;
-    std::optional<raft::neighbors::ivf_pq::index_params> ivf_pq_build_params   = std::nullopt;
-    std::optional<raft::neighbors::ivf_pq::search_params> ivf_pq_search_params = std::nullopt;
-  };
-
-  RaftCagra(Metric metric,
-            int dim,
-            const BuildParam& param,
-            int concurrent_searches    = 1,
-            bool shall_include_dataset = false)
-    : ANN<T>(metric, dim),
-      index_params_(param),
-      dimension_(dim),
-      need_dataset_update_(true),
-      shall_include_dataset_(shall_include_dataset),
-      dataset_(std::make_shared<raft::device_matrix<T, int64_t, row_major>>(
-        std::move(make_device_matrix<T, int64_t>(handle_, 0, 0)))),
-      graph_(std::make_shared<raft::device_matrix<IdxT, int64_t, row_major>>(
-        std::move(make_device_matrix<IdxT, int64_t>(handle_, 0, 0)))),
-      input_dataset_v_(
-        std::make_shared<raft::device_matrix_view<const T, int64_t, row_major>>(nullptr, 0, 0)),
-      graph_mem_(AllocatorType::Device),
-      dataset_mem_(AllocatorType::Device)
-  {
-    index_params_.cagra_params.metric         = parse_metric_type(metric);
-    index_params_.ivf_pq_build_params->metric = parse_metric_type(metric);
-  }
-
-  void build(const T* dataset, size_t nrow) final;
-
-  void set_search_param(const AnnSearchParam& param) override;
-
-  void set_search_dataset(const T* dataset, size_t nrow) override;
-
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const override;
-  void search_base(const T* queries,
-                   int batch_size,
-                   int k,
-                   AnnBase::index_type* neighbors,
-                   float* distances) const;
-
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
-  {
-    return handle_.get_sync_stream();
-  }
-
-  // to enable dataset access from GPU memory
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    property.dataset_memory_type = MemoryType::HostMmap;
-    property.query_memory_type   = MemoryType::Device;
-    return property;
-  }
-  void save(const std::string& file) const override;
-  void load(const std::string&) override;
-  void save_to_hnswlib(const std::string& file) const;
-  std::unique_ptr<ANN<T>> copy() override;
-
- private:
-  // handle_ must go first to make sure it dies last and all memory allocated in pool
-  configured_raft_resources handle_{};
-  raft::mr::cuda_pinned_resource mr_pinned_;
-  raft::mr::cuda_huge_page_resource mr_huge_page_;
-  AllocatorType graph_mem_;
-  AllocatorType dataset_mem_;
-  float refine_ratio_;
-  BuildParam index_params_;
-  bool need_dataset_update_;
-  bool shall_include_dataset_;
-  raft::neighbors::cagra::search_params search_params_;
-  std::shared_ptr<raft::neighbors::cagra::index<T, IdxT>> index_;
-  int dimension_;
-  std::shared_ptr<raft::device_matrix<IdxT, int64_t, row_major>> graph_;
-  std::shared_ptr<raft::device_matrix<T, int64_t, row_major>> dataset_;
-  std::shared_ptr<raft::device_matrix_view<const T, int64_t, row_major>> input_dataset_v_;
-
-  inline rmm::device_async_resource_ref get_mr(AllocatorType mem_type)
-  {
-    switch (mem_type) {
-      case (AllocatorType::HostPinned): return &mr_pinned_;
-      case (AllocatorType::HostHugePage): return &mr_huge_page_;
-      default: return rmm::mr::get_current_device_resource();
-    }
-  }
-};
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::build(const T* dataset, size_t nrow)
-{
-  auto dataset_view =
-    raft::make_host_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
-
-  auto& params = index_params_.cagra_params;
-
-  // Do include the compressed dataset for the CAGRA-Q
-  bool include_dataset = params.compression.has_value() || shall_include_dataset_;
-
-  index_ = std::make_shared<raft::neighbors::cagra::index<T, IdxT>>(
-    std::move(raft::neighbors::cagra::detail::build(handle_,
-                                                    params,
-                                                    dataset_view,
-                                                    index_params_.nn_descent_params,
-                                                    index_params_.ivf_pq_refine_rate,
-                                                    index_params_.ivf_pq_build_params,
-                                                    index_params_.ivf_pq_search_params,
-                                                    include_dataset)));
-}
-
-inline std::string allocator_to_string(AllocatorType mem_type)
-{
-  if (mem_type == AllocatorType::Device) {
-    return "device";
-  } else if (mem_type == AllocatorType::HostPinned) {
-    return "host_pinned";
-  } else if (mem_type == AllocatorType::HostHugePage) {
-    return "host_huge_page";
-  }
-  return "<invalid allocator type>";
-}
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::set_search_param(const AnnSearchParam& param)
-{
-  auto search_param = dynamic_cast<const SearchParam&>(param);
-  search_params_    = search_param.p;
-  refine_ratio_     = search_param.refine_ratio;
-  if (search_param.graph_mem != graph_mem_) {
-    // Move graph to correct memory space
-    graph_mem_ = search_param.graph_mem;
-    RAFT_LOG_DEBUG("moving graph to new memory space: %s", allocator_to_string(graph_mem_).c_str());
-    // We create a new graph and copy to it from existing graph
-    auto mr        = get_mr(graph_mem_);
-    auto new_graph = make_device_mdarray<IdxT, int64_t>(
-      handle_, mr, make_extents<int64_t>(index_->graph().extent(0), index_->graph_degree()));
-
-    raft::copy(new_graph.data_handle(),
-               index_->graph().data_handle(),
-               index_->graph().size(),
-               resource::get_cuda_stream(handle_));
-
-    index_->update_graph(handle_, make_const_mdspan(new_graph.view()));
-    // update_graph() only stores a view in the index. We need to keep the graph object alive.
-    *graph_ = std::move(new_graph);
-  }
-
-  if (search_param.dataset_mem != dataset_mem_ || need_dataset_update_) {
-    dataset_mem_ = search_param.dataset_mem;
-
-    // First free up existing memory
-    *dataset_ = make_device_matrix<T, int64_t>(handle_, 0, 0);
-    index_->update_dataset(handle_, make_const_mdspan(dataset_->view()));
-
-    // Allocate space using the correct memory resource.
-    RAFT_LOG_DEBUG("moving dataset to new memory space: %s",
-                   allocator_to_string(dataset_mem_).c_str());
-
-    auto mr = get_mr(dataset_mem_);
-    raft::neighbors::cagra::detail::copy_with_padding(handle_, *dataset_, *input_dataset_v_, mr);
-
-    auto dataset_view = raft::make_device_strided_matrix_view<const T, int64_t>(
-      dataset_->data_handle(), dataset_->extent(0), this->dim_, dataset_->extent(1));
-    index_->update_dataset(handle_, dataset_view);
-
-    need_dataset_update_ = false;
-  }
-}
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::set_search_dataset(const T* dataset, size_t nrow)
-{
-  using ds_idx_type = decltype(index_->data().n_rows());
-  bool is_vpq =
-    dynamic_cast<const raft::neighbors::vpq_dataset<half, ds_idx_type>*>(&index_->data()) ||
-    dynamic_cast<const raft::neighbors::vpq_dataset<float, ds_idx_type>*>(&index_->data());
-  // It can happen that we are re-using a previous algo object which already has
-  // the dataset set. Check if we need update.
-  if (static_cast<size_t>(input_dataset_v_->extent(0)) != nrow ||
-      input_dataset_v_->data_handle() != dataset) {
-    *input_dataset_v_    = make_device_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
-    need_dataset_update_ = !is_vpq;  // ignore update if this is a VPQ dataset.
-  }
-}
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::save(const std::string& file) const
-{
-  raft::neighbors::cagra::serialize<T, IdxT>(handle_, file, *index_);
-}
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::save_to_hnswlib(const std::string& file) const
-{
-  raft::neighbors::cagra::serialize_to_hnswlib<T, IdxT>(handle_, file, *index_);
-}
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::load(const std::string& file)
-{
-  index_ = std::make_shared<raft::neighbors::cagra::index<T, IdxT>>(
-    std::move(raft::neighbors::cagra::deserialize<T, IdxT>(handle_, file)));
-}
-
-template <typename T, typename IdxT>
-std::unique_ptr<ANN<T>> RaftCagra<T, IdxT>::copy()
-{
-  return std::make_unique<RaftCagra<T, IdxT>>(*this);  // use copy constructor
-}
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::search_base(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  static_assert(std::is_integral_v<AnnBase::index_type>);
-  static_assert(std::is_integral_v<IdxT>);
-
-  IdxT* neighbors_IdxT;
-  std::optional<rmm::device_uvector<IdxT>> neighbors_storage{std::nullopt};
-  if constexpr (sizeof(IdxT) == sizeof(AnnBase::index_type)) {
-    neighbors_IdxT = reinterpret_cast<IdxT*>(neighbors);
-  } else {
-    neighbors_storage.emplace(batch_size * k, resource::get_cuda_stream(handle_));
-    neighbors_IdxT = neighbors_storage->data();
-  }
-
-  auto queries_view =
-    raft::make_device_matrix_view<const T, int64_t>(queries, batch_size, dimension_);
-  auto neighbors_view = raft::make_device_matrix_view<IdxT, int64_t>(neighbors_IdxT, batch_size, k);
-  auto distances_view = raft::make_device_matrix_view<float, int64_t>(distances, batch_size, k);
-
-  raft::neighbors::cagra::search(
-    handle_, search_params_, *index_, queries_view, neighbors_view, distances_view);
-
-  if constexpr (sizeof(IdxT) != sizeof(AnnBase::index_type)) {
-    raft::linalg::unaryOp(neighbors,
-                          neighbors_IdxT,
-                          batch_size * k,
-                          raft::cast_op<AnnBase::index_type>(),
-                          raft::resource::get_cuda_stream(handle_));
-  }
-}
-
-template <typename T, typename IdxT>
-void RaftCagra<T, IdxT>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  auto k0                       = static_cast<size_t>(refine_ratio_ * k);
-  const bool disable_refinement = k0 <= static_cast<size_t>(k);
-  const raft::resources& res    = handle_;
-
-  if (disable_refinement) {
-    search_base(queries, batch_size, k, neighbors, distances);
-  } else {
-    auto queries_v =
-      raft::make_device_matrix_view<const T, AnnBase::index_type>(queries, batch_size, dimension_);
-    auto candidate_ixs =
-      raft::make_device_matrix<AnnBase::index_type, AnnBase::index_type>(res, batch_size, k0);
-    auto candidate_dists =
-      raft::make_device_matrix<float, AnnBase::index_type>(res, batch_size, k0);
-    search_base(
-      queries, batch_size, k0, candidate_ixs.data_handle(), candidate_dists.data_handle());
-    refine_helper(
-      res, *input_dataset_v_, queries_v, candidate_ixs, k, neighbors, distances, index_->metric());
-  }
-}
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat.cu b/cpp/bench/ann/src/raft/raft_ivf_flat.cu
deleted file mode 100644
index bcd23723a4..0000000000
--- a/cpp/bench/ann/src/raft/raft_ivf_flat.cu
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "raft_ivf_flat_wrapper.h"
-
-namespace raft::bench::ann {
-template class RaftIvfFlatGpu<float, int64_t>;
-template class RaftIvfFlatGpu<uint8_t, int64_t>;
-template class RaftIvfFlatGpu<int8_t, int64_t>;
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
deleted file mode 100644
index 83a3a63aba..0000000000
--- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../common/ann_types.hpp"
-#include "raft_ann_bench_utils.h"
-
-#include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/core/logger.hpp>
-#include <raft/core/resource/cuda_stream.hpp>
-#include <raft/distance/distance_types.hpp>
-#include <raft/linalg/unary_op.cuh>
-#include <raft/neighbors/ivf_flat.cuh>
-#include <raft/neighbors/ivf_flat_types.hpp>
-#include <raft/util/cudart_utils.hpp>
-
-#include <cassert>
-#include <fstream>
-#include <iostream>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-
-namespace raft::bench::ann {
-
-template <typename T, typename IdxT>
-class RaftIvfFlatGpu : public ANN<T>, public AnnGPU {
- public:
-  using typename ANN<T>::AnnSearchParam;
-
-  struct SearchParam : public AnnSearchParam {
-    raft::neighbors::ivf_flat::search_params ivf_flat_params;
-  };
-
-  using BuildParam = raft::neighbors::ivf_flat::index_params;
-
-  RaftIvfFlatGpu(Metric metric, int dim, const BuildParam& param)
-    : ANN<T>(metric, dim), index_params_(param), dimension_(dim)
-  {
-    index_params_.metric                         = parse_metric_type(metric);
-    index_params_.conservative_memory_allocation = true;
-    RAFT_CUDA_TRY(cudaGetDevice(&device_));
-  }
-
-  void build(const T* dataset, size_t nrow) final;
-
-  void set_search_param(const AnnSearchParam& param) override;
-
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const override;
-
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
-  {
-    return handle_.get_sync_stream();
-  }
-
-  // to enable dataset access from GPU memory
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    property.dataset_memory_type = MemoryType::HostMmap;
-    property.query_memory_type   = MemoryType::Device;
-    return property;
-  }
-  void save(const std::string& file) const override;
-  void load(const std::string&) override;
-  std::unique_ptr<ANN<T>> copy() override;
-
- private:
-  // handle_ must go first to make sure it dies last and all memory allocated in pool
-  configured_raft_resources handle_{};
-  BuildParam index_params_;
-  raft::neighbors::ivf_flat::search_params search_params_;
-  std::shared_ptr<raft::neighbors::ivf_flat::index<T, IdxT>> index_;
-  int device_;
-  int dimension_;
-};
-
-template <typename T, typename IdxT>
-void RaftIvfFlatGpu<T, IdxT>::build(const T* dataset, size_t nrow)
-{
-  index_ = std::make_shared<raft::neighbors::ivf_flat::index<T, IdxT>>(std::move(
-    raft::neighbors::ivf_flat::build(handle_, index_params_, dataset, IdxT(nrow), dimension_)));
-}
-
-template <typename T, typename IdxT>
-void RaftIvfFlatGpu<T, IdxT>::set_search_param(const AnnSearchParam& param)
-{
-  auto search_param = dynamic_cast<const SearchParam&>(param);
-  search_params_    = search_param.ivf_flat_params;
-  assert(search_params_.n_probes <= index_params_.n_lists);
-}
-
-template <typename T, typename IdxT>
-void RaftIvfFlatGpu<T, IdxT>::save(const std::string& file) const
-{
-  raft::neighbors::ivf_flat::serialize(handle_, file, *index_);
-  return;
-}
-
-template <typename T, typename IdxT>
-void RaftIvfFlatGpu<T, IdxT>::load(const std::string& file)
-{
-  index_ = std::make_shared<raft::neighbors::ivf_flat::index<T, IdxT>>(
-    std::move(raft::neighbors::ivf_flat::deserialize<T, IdxT>(handle_, file)));
-  return;
-}
-
-template <typename T, typename IdxT>
-std::unique_ptr<ANN<T>> RaftIvfFlatGpu<T, IdxT>::copy()
-{
-  return std::make_unique<RaftIvfFlatGpu<T, IdxT>>(*this);  // use copy constructor
-}
-
-template <typename T, typename IdxT>
-void RaftIvfFlatGpu<T, IdxT>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  static_assert(std::is_integral_v<AnnBase::index_type>);
-  static_assert(std::is_integral_v<IdxT>);
-
-  IdxT* neighbors_IdxT;
-  std::optional<rmm::device_uvector<IdxT>> neighbors_storage{std::nullopt};
-  if constexpr (sizeof(IdxT) == sizeof(AnnBase::index_type)) {
-    neighbors_IdxT = reinterpret_cast<IdxT*>(neighbors);
-  } else {
-    neighbors_storage.emplace(batch_size * k, resource::get_cuda_stream(handle_));
-    neighbors_IdxT = neighbors_storage->data();
-  }
-  raft::neighbors::ivf_flat::search(handle_,
-                                    search_params_,
-                                    *index_,
-                                    queries,
-                                    batch_size,
-                                    k,
-                                    neighbors_IdxT,
-                                    distances,
-                                    resource::get_workspace_resource(handle_));
-  if constexpr (sizeof(IdxT) != sizeof(AnnBase::index_type)) {
-    raft::linalg::unaryOp(neighbors,
-                          neighbors_IdxT,
-                          batch_size * k,
-                          raft::cast_op<AnnBase::index_type>(),
-                          raft::resource::get_cuda_stream(handle_));
-  }
-}
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq.cu b/cpp/bench/ann/src/raft/raft_ivf_pq.cu
deleted file mode 100644
index d4f68c1c7d..0000000000
--- a/cpp/bench/ann/src/raft/raft_ivf_pq.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "raft_ivf_pq_wrapper.h"
-
-namespace raft::bench::ann {
-template class RaftIvfPQ<float, int64_t>;
-template class RaftIvfPQ<half, int64_t>;
-template class RaftIvfPQ<uint8_t, int64_t>;
-template class RaftIvfPQ<int8_t, int64_t>;
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
deleted file mode 100644
index 7201467969..0000000000
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../common/ann_types.hpp"
-#include "raft_ann_bench_utils.h"
-
-#include <raft/core/device_mdarray.hpp>
-#include <raft/core/device_mdspan.hpp>
-#include <raft/core/device_resources.hpp>
-#include <raft/core/host_mdarray.hpp>
-#include <raft/core/host_mdspan.hpp>
-#include <raft/core/logger.hpp>
-#include <raft/core/resource/cuda_stream.hpp>
-#include <raft/distance/distance_types.hpp>
-#include <raft/linalg/unary_op.cuh>
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/ivf_pq_types.hpp>
-#include <raft/neighbors/refine.cuh>
-#include <raft/util/cudart_utils.hpp>
-
-#include <type_traits>
-
-namespace raft::bench::ann {
-
-template <typename T, typename IdxT>
-class RaftIvfPQ : public ANN<T>, public AnnGPU {
- public:
-  using typename ANN<T>::AnnSearchParam;
-  using ANN<T>::dim_;
-
-  struct SearchParam : public AnnSearchParam {
-    raft::neighbors::ivf_pq::search_params pq_param;
-    float refine_ratio = 1.0f;
-    auto needs_dataset() const -> bool override { return refine_ratio > 1.0f; }
-  };
-
-  using BuildParam = raft::neighbors::ivf_pq::index_params;
-
-  RaftIvfPQ(Metric metric, int dim, const BuildParam& param)
-    : ANN<T>(metric, dim), index_params_(param), dimension_(dim)
-  {
-    index_params_.metric = parse_metric_type(metric);
-  }
-
-  void build(const T* dataset, size_t nrow) final;
-
-  void set_search_param(const AnnSearchParam& param) override;
-  void set_search_dataset(const T* dataset, size_t nrow) override;
-
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const override;
-  void search_base(const T* queries,
-                   int batch_size,
-                   int k,
-                   AnnBase::index_type* neighbors,
-                   float* distances) const;
-
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
-  {
-    return handle_.get_sync_stream();
-  }
-
-  // to enable dataset access from GPU memory
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    property.dataset_memory_type = MemoryType::Host;
-    property.query_memory_type   = MemoryType::Device;
-    return property;
-  }
-  void save(const std::string& file) const override;
-  void load(const std::string&) override;
-  std::unique_ptr<ANN<T>> copy() override;
-
- private:
-  // handle_ must go first to make sure it dies last and all memory allocated in pool
-  configured_raft_resources handle_{};
-  BuildParam index_params_;
-  raft::neighbors::ivf_pq::search_params search_params_;
-  std::shared_ptr<raft::neighbors::ivf_pq::index<IdxT>> index_;
-  int dimension_;
-  float refine_ratio_ = 1.0;
-  raft::device_matrix_view<const T, IdxT> dataset_;
-};
-
-template <typename T, typename IdxT>
-void RaftIvfPQ<T, IdxT>::save(const std::string& file) const
-{
-  raft::neighbors::ivf_pq::serialize(handle_, file, *index_);
-}
-
-template <typename T, typename IdxT>
-void RaftIvfPQ<T, IdxT>::load(const std::string& file)
-{
-  index_ = std::make_shared<raft::neighbors::ivf_pq::index<IdxT>>(
-    std::move(raft::neighbors::ivf_pq::deserialize<IdxT>(handle_, file)));
-}
-
-template <typename T, typename IdxT>
-void RaftIvfPQ<T, IdxT>::build(const T* dataset, size_t nrow)
-{
-  auto dataset_v = raft::make_device_matrix_view<const T, IdxT>(dataset, IdxT(nrow), dim_);
-  std::make_shared<raft::neighbors::ivf_pq::index<IdxT>>(
-    std::move(raft::neighbors::ivf_pq::build(handle_, index_params_, dataset_v)))
-    .swap(index_);
-}
-
-template <typename T, typename IdxT>
-std::unique_ptr<ANN<T>> RaftIvfPQ<T, IdxT>::copy()
-{
-  return std::make_unique<RaftIvfPQ<T, IdxT>>(*this);  // use copy constructor
-}
-
-template <typename T, typename IdxT>
-void RaftIvfPQ<T, IdxT>::set_search_param(const AnnSearchParam& param)
-{
-  auto search_param = dynamic_cast<const SearchParam&>(param);
-  search_params_    = search_param.pq_param;
-  refine_ratio_     = search_param.refine_ratio;
-  assert(search_params_.n_probes <= index_params_.n_lists);
-}
-
-template <typename T, typename IdxT>
-void RaftIvfPQ<T, IdxT>::set_search_dataset(const T* dataset, size_t nrow)
-{
-  dataset_ = raft::make_device_matrix_view<const T, IdxT>(dataset, nrow, index_->dim());
-}
-
-template <typename T, typename IdxT>
-void RaftIvfPQ<T, IdxT>::search_base(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  static_assert(std::is_integral_v<AnnBase::index_type>);
-  static_assert(std::is_integral_v<IdxT>);
-
-  IdxT* neighbors_IdxT;
-  std::optional<rmm::device_uvector<IdxT>> neighbors_storage{std::nullopt};
-  if constexpr (sizeof(IdxT) == sizeof(AnnBase::index_type)) {
-    neighbors_IdxT = reinterpret_cast<IdxT*>(neighbors);
-  } else {
-    neighbors_storage.emplace(batch_size * k, resource::get_cuda_stream(handle_));
-    neighbors_IdxT = neighbors_storage->data();
-  }
-
-  auto queries_view =
-    raft::make_device_matrix_view<const T, uint32_t>(queries, batch_size, dimension_);
-  auto neighbors_view =
-    raft::make_device_matrix_view<IdxT, uint32_t>(neighbors_IdxT, batch_size, k);
-  auto distances_view = raft::make_device_matrix_view<float, uint32_t>(distances, batch_size, k);
-
-  raft::neighbors::ivf_pq::search(
-    handle_, search_params_, *index_, queries_view, neighbors_view, distances_view);
-
-  if constexpr (sizeof(IdxT) != sizeof(AnnBase::index_type)) {
-    raft::linalg::unaryOp(neighbors,
-                          neighbors_IdxT,
-                          batch_size * k,
-                          raft::cast_op<AnnBase::index_type>(),
-                          raft::resource::get_cuda_stream(handle_));
-  }
-}
-
-template <typename T, typename IdxT>
-void RaftIvfPQ<T, IdxT>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  auto k0                       = static_cast<size_t>(refine_ratio_ * k);
-  const bool disable_refinement = k0 <= static_cast<size_t>(k);
-  const raft::resources& res    = handle_;
-
-  if (disable_refinement) {
-    search_base(queries, batch_size, k, neighbors, distances);
-  } else {
-    auto queries_v =
-      raft::make_device_matrix_view<const T, AnnBase::index_type>(queries, batch_size, dimension_);
-    auto candidate_ixs =
-      raft::make_device_matrix<AnnBase::index_type, AnnBase::index_type>(res, batch_size, k0);
-    auto candidate_dists =
-      raft::make_device_matrix<float, AnnBase::index_type>(res, batch_size, k0);
-    search_base(
-      queries, batch_size, k0, candidate_ixs.data_handle(), candidate_dists.data_handle());
-    refine_helper(
-      res, dataset_, queries_v, candidate_ixs, k, neighbors, distances, index_->metric());
-  }
-}
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/raft/raft_wrapper.h b/cpp/bench/ann/src/raft/raft_wrapper.h
deleted file mode 100644
index 2c996058b2..0000000000
--- a/cpp/bench/ann/src/raft/raft_wrapper.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "../common/ann_types.hpp"
-#include "raft_ann_bench_utils.h"
-
-#include <raft/core/device_resources.hpp>
-#include <raft/distance/detail/distance.cuh>
-#include <raft/distance/distance_types.hpp>
-#include <raft/neighbors/brute_force.cuh>
-#include <raft/neighbors/brute_force_serialize.cuh>
-
-#include <cassert>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-
-namespace raft_temp {
-
-inline raft::distance::DistanceType parse_metric_type(raft::bench::ann::Metric metric)
-{
-  switch (metric) {
-    case raft::bench::ann::Metric::kInnerProduct: return raft::distance::DistanceType::InnerProduct;
-    case raft::bench::ann::Metric::kEuclidean: return raft::distance::DistanceType::L2Expanded;
-    default: throw std::runtime_error("raft supports only metric type of inner product and L2");
-  }
-}
-}  // namespace raft_temp
-
-namespace raft::bench::ann {
-
-// brute force KNN - RAFT
-template <typename T>
-class RaftGpu : public ANN<T>, public AnnGPU {
- public:
-  using typename ANN<T>::AnnSearchParam;
-
-  RaftGpu(Metric metric, int dim);
-
-  void build(const T*, size_t) final;
-
-  void set_search_param(const AnnSearchParam& param) override;
-
-  void search(const T* queries,
-              int batch_size,
-              int k,
-              AnnBase::index_type* neighbors,
-              float* distances) const final;
-
-  // to enable dataset access from GPU memory
-  AlgoProperty get_preference() const override
-  {
-    AlgoProperty property;
-    property.dataset_memory_type = MemoryType::Device;
-    property.query_memory_type   = MemoryType::Device;
-    return property;
-  }
-  [[nodiscard]] auto get_sync_stream() const noexcept -> cudaStream_t override
-  {
-    return handle_.get_sync_stream();
-  }
-  void set_search_dataset(const T* dataset, size_t nrow) override;
-  void save(const std::string& file) const override;
-  void load(const std::string&) override;
-  std::unique_ptr<ANN<T>> copy() override;
-
- protected:
-  // handle_ must go first to make sure it dies last and all memory allocated in pool
-  configured_raft_resources handle_{};
-  std::shared_ptr<raft::neighbors::brute_force::index<T>> index_;
-  raft::distance::DistanceType metric_type_;
-  int device_;
-  const T* dataset_;
-  size_t nrow_;
-};
-
-template <typename T>
-RaftGpu<T>::RaftGpu(Metric metric, int dim)
-  : ANN<T>(metric, dim), metric_type_(raft_temp::parse_metric_type(metric))
-{
-  static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,
-                "raft bfknn only supports float/double");
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-}
-
-template <typename T>
-void RaftGpu<T>::build(const T* dataset, size_t nrow)
-{
-  auto dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
-  index_            = std::make_shared<raft::neighbors::brute_force::index<T>>(
-    std::move(raft::neighbors::brute_force::build(handle_, dataset_view)));
-}
-
-template <typename T>
-void RaftGpu<T>::set_search_param(const AnnSearchParam&)
-{
-  // Nothing to set here as it is brute force implementation
-}
-
-template <typename T>
-void RaftGpu<T>::set_search_dataset(const T* dataset, size_t nrow)
-{
-  dataset_ = dataset;
-  nrow_    = nrow;
-}
-
-template <typename T>
-void RaftGpu<T>::save(const std::string& file) const
-{
-  raft::neighbors::brute_force::serialize<T>(handle_, file, *index_);
-}
-
-template <typename T>
-void RaftGpu<T>::load(const std::string& file)
-{
-  index_ = std::make_shared<raft::neighbors::brute_force::index<T>>(
-    std::move(raft::neighbors::brute_force::deserialize<T>(handle_, file)));
-}
-
-template <typename T>
-void RaftGpu<T>::search(
-  const T* queries, int batch_size, int k, AnnBase::index_type* neighbors, float* distances) const
-{
-  auto queries_view =
-    raft::make_device_matrix_view<const T, int64_t>(queries, batch_size, this->dim_);
-
-  auto neighbors_view =
-    raft::make_device_matrix_view<AnnBase::index_type, int64_t>(neighbors, batch_size, k);
-  auto distances_view = raft::make_device_matrix_view<float, int64_t>(distances, batch_size, k);
-
-  raft::neighbors::brute_force::search<T, AnnBase::index_type>(
-    handle_, *index_, queries_view, neighbors_view, distances_view);
-}
-
-template <typename T>
-std::unique_ptr<ANN<T>> RaftGpu<T>::copy()
-{
-  return std::make_unique<RaftGpu<T>>(*this);  // use copy constructor
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json
index f7cc50e513..3c568d9766 100644
--- a/cpp/cmake/config.json
+++ b/cpp/cmake/config.json
@@ -9,7 +9,7 @@
           "VERSION": "?",
           "GIT_SHALLOW": "?",
           "OPTIONS": "*",
-          "FIND_PACKAGE_ARGUMENTS": "*" 
+          "FIND_PACKAGE_ARGUMENTS": "*"
         }
       },
       "ConfigureTest": {
diff --git a/cpp/cmake/modules/FindAVX.cmake b/cpp/cmake/modules/FindAVX.cmake
deleted file mode 100644
index 7f3b2dfc76..0000000000
--- a/cpp/cmake/modules/FindAVX.cmake
+++ /dev/null
@@ -1,110 +0,0 @@
-# =============================================================================
-# Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
-# Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
-# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
-# Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
-# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
-# Copyright (c) 2011-2013 NYU                      (Clement Farabet)
-# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
-# Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
-# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
-#
-# Note: This file was copied from PyTorch and modified for use in the RAFT library.
-# Refer to thirdparty/LICENSES/LICENSE.pytorch for license and additional
-# copyright information.
-# =============================================================================
-
-INCLUDE(CheckCXXSourceRuns)
-
-SET(AVX_CODE
-    "
-  #include <immintrin.h>
-
-  int main()
-  {
-    __m256 a;
-    a = _mm256_set1_ps(0);
-    return 0;
-  }
-"
-)
-
-SET(AVX512_CODE
-    "
-  #include <immintrin.h>
-
-  int main()
-  {
-    __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-                                0, 0, 0, 0, 0, 0, 0, 0,
-                                0, 0, 0, 0, 0, 0, 0, 0,
-                                0, 0, 0, 0, 0, 0, 0, 0,
-                                0, 0, 0, 0, 0, 0, 0, 0,
-                                0, 0, 0, 0, 0, 0, 0, 0,
-                                0, 0, 0, 0, 0, 0, 0, 0,
-                                0, 0, 0, 0, 0, 0, 0, 0);
-    __m512i b = a;
-    __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
-    return 0;
-  }
-"
-)
-
-SET(AVX2_CODE
-    "
-  #include <immintrin.h>
-
-  int main()
-  {
-    __m256i a = {0};
-    a = _mm256_abs_epi16(a);
-    __m256i x;
-    _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
-    return 0;
-  }
-"
-)
-
-MACRO(CHECK_SSE lang type flags)
-  SET(__FLAG_I 1)
-  SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
-  FOREACH(__FLAG ${flags})
-    IF(NOT ${lang}_${type}_FOUND)
-      SET(CMAKE_REQUIRED_FLAGS ${__FLAG})
-      CHECK_CXX_SOURCE_RUNS("${${type}_CODE}" ${lang}_HAS_${type}_${__FLAG_I})
-      IF(${lang}_HAS_${type}_${__FLAG_I})
-        SET(${lang}_${type}_FOUND
-            TRUE
-            CACHE BOOL "${lang} ${type} support"
-        )
-        SET(${lang}_${type}_FLAGS
-            "${__FLAG}"
-            CACHE STRING "${lang} ${type} flags"
-        )
-      ENDIF()
-      MATH(EXPR __FLAG_I "${__FLAG_I}+1")
-    ENDIF()
-  ENDFOREACH()
-  SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
-
-  IF(NOT ${lang}_${type}_FOUND)
-    SET(${lang}_${type}_FOUND
-        FALSE
-        CACHE BOOL "${lang} ${type} support"
-    )
-    SET(${lang}_${type}_FLAGS
-        ""
-        CACHE STRING "${lang} ${type} flags"
-    )
-  ENDIF()
-
-  MARK_AS_ADVANCED(${lang}_${type}_FOUND ${lang}_${type}_FLAGS)
-
-ENDMACRO()
-
-# CHECK_SSE(C "AVX" " ;-mavx;/arch:AVX") CHECK_SSE(C "AVX2" " ;-mavx2 -mfma;/arch:AVX2") CHECK_SSE(C
-# "AVX512" " ;-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma;/arch:AVX512")
-#
-CHECK_SSE(CXX "AVX" " ;-mavx;/arch:AVX")
-CHECK_SSE(CXX "AVX2" " ;-mavx2 -mfma;/arch:AVX2")
-CHECK_SSE(CXX "AVX512" " ;-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma;/arch:AVX512")
diff --git a/cpp/cmake/patches/faiss_override.json b/cpp/cmake/patches/faiss_override.json
deleted file mode 100644
index 5d18c77fec..0000000000
--- a/cpp/cmake/patches/faiss_override.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "packages" : {
-    "faiss" : {
-      "version": "1.9.0",
-      "git_url": "https://github.com/facebookresearch/faiss.git",
-      "git_tag": "v1.9.0"
-    }
-  }
-}
diff --git a/cpp/cmake/patches/ggnn.diff b/cpp/cmake/patches/ggnn.diff
deleted file mode 100644
index fc45298803..0000000000
--- a/cpp/cmake/patches/ggnn.diff
+++ /dev/null
@@ -1,230 +0,0 @@
---- a/include/ggnn/cache/cuda_simple_knn_sym_cache.cuh
-+++ b/include/ggnn/cache/cuda_simple_knn_sym_cache.cuh
-@@ -62,7 +62,7 @@ struct SimpleKNNSymCache {
-                                                 const ValueT dist_half)
-         : dist_query(dist_query), dist_half(dist_half) {}
- 
--    __device__ __forceinline__ DistQueryAndHalf() {}
-+    DistQueryAndHalf() = default;
-   };
- 
-   struct DistanceAndNorm {
-@@ -98,8 +98,7 @@ struct SimpleKNNSymCache {
-     KeyT cache;
-     DistQueryAndHalf dist;
-     bool flag;
--
--    __device__ __forceinline__ SyncTempStorage() {}
-+    SyncTempStorage() = default;
-   };
- 
-  public:
-diff --git a/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh b/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh
-index 8cbaf0d..6eb72ac 100644
---- a/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh
-+++ b/include/ggnn/cuda_knn_ggnn_gpu_instance.cuh
-@@ -41,7 +41,6 @@ limitations under the License.
- #include "ggnn/sym/cuda_knn_sym_query_layer.cuh"
- #include "ggnn/utils/cuda_knn_utils.cuh"
- #include "ggnn/utils/cuda_knn_constants.cuh"
--#include "ggnn/utils/cuda_knn_dataset.cuh"
- 
- template <typename ValueT>
- __global__ void divide(ValueT* res, ValueT* input, ValueT N) {
-@@ -98,9 +97,7 @@ struct GGNNGPUInstance {
-   typedef GGNNGraphDevice<KeyT, BaseT, ValueT> GGNNGraphDevice;
-   typedef GGNNGraphHost<KeyT, BaseT, ValueT> GGNNGraphHost;
- 
--  const Dataset<KeyT, BaseT, BAddrT>* dataset;
-   GGNNGraphBuffer<KeyT, ValueT>* ggnn_buffer {nullptr};
--  GGNNQuery<KeyT, ValueT, BaseT> ggnn_query;
- 
-   // Graph Shards resident on the GPU
-   std::vector<GGNNGraphDevice> ggnn_shards;
-@@ -117,13 +114,12 @@ struct GGNNGPUInstance {
-   // number of shards that need to be processed by this instance
-   const int num_parts;
- 
--  GGNNGPUInstance(const int gpu_id, const Dataset<KeyT, BaseT, BAddrT>* dataset,
-+  GGNNGPUInstance(const int gpu_id,
-             const int N_shard, const int L,
-             const bool enable_construction, const float tau_build,
-             const int num_parts=1, const int num_cpu_buffers=1) :
-     N_shard{N_shard}, L{L}, tau_build{tau_build},
--    dataset{dataset}, gpu_id{gpu_id},
--    ggnn_query{dataset->N_query, D, KQuery, num_parts},
-+    gpu_id{gpu_id},
-     num_parts{num_parts}
-   {
-     CHECK_LE(L, MAX_LAYER);
-@@ -135,7 +131,6 @@ struct GGNNGPUInstance {
-       CHECK_EQ(current_gpu_id, gpu_id) << "cudaSetDevice() needs to be called in advance!";
-     }
- 
--    ggnn_query.loadQueriesAsync(dataset->h_query, 0);
- 
-     computeGraphParameters();
- 
-@@ -186,7 +181,7 @@ struct GGNNGPUInstance {
-   }
- 
-   GGNNGPUInstance(const GGNNGPUInstance& other)
--   : dataset{nullptr}, ggnn_query{0, D, KQuery},
-+   :
-      gpu_id{0}, N_shard{0}, num_parts{0} {
-     // this exists to allow using vector::emplace_back
-     // when it triggers a reallocation, this code will be called.
-@@ -305,6 +300,7 @@ struct GGNNGPUInstance {
- 
-   // io
- 
-+  /*
-   void waitForDiskIO(const int shard_id) {
-     auto& cpu_buffer = ggnn_cpu_buffers[shard_id%ggnn_cpu_buffers.size()];
-     if (cpu_buffer.disk_io_thread.joinable())
-@@ -468,11 +464,12 @@ struct GGNNGPUInstance {
-     CHECK_CUDA(cudaDeviceSynchronize());
-     CHECK_CUDA(cudaPeekAtLastError());
-   }
-+  */
- 
-   // graph operations
- 
-   template <int BLOCK_DIM_X = 32, int MAX_ITERATIONS = 400, int CACHE_SIZE = 512, int SORTED_SIZE = 256, bool DIST_STATS = false>
--  void queryLayer(const int shard_id = 0) const {
-+  void queryLayer(const BaseT* d_query, int batch_size, KeyT* d_query_result_ids, ValueT* d_query_result_dists, const int shard_id = 0) const {
-     CHECK_CUDA(cudaSetDevice(gpu_id));
-     const auto& shard = ggnn_shards.at(shard_id%ggnn_shards.size());
- 
-@@ -482,21 +479,21 @@ struct GGNNGPUInstance {
- 
-     int* m_dist_statistics = nullptr;
-     if (DIST_STATS)
--      cudaMallocManaged(&m_dist_statistics, dataset->N_query * sizeof(int));
-+      cudaMallocManaged(&m_dist_statistics, batch_size * sizeof(int));
- 
-     QueryKernel query_kernel;
-     query_kernel.d_base = shard.d_base;
--    query_kernel.d_query = ggnn_query.d_query;
-+    query_kernel.d_query = d_query;
- 
-     query_kernel.d_graph = shard.d_graph;
--    query_kernel.d_query_results = ggnn_query.d_query_result_ids;
--    query_kernel.d_query_results_dists = ggnn_query.d_query_result_dists;
-+    query_kernel.d_query_results = d_query_result_ids;
-+    query_kernel.d_query_results_dists = d_query_result_dists;
- 
-     query_kernel.d_translation = shard.d_translation;
- 
-     query_kernel.d_nn1_stats = shard.d_nn1_stats;
- 
--    query_kernel.N = dataset->N_query;
-+    query_kernel.N = batch_size;
-     query_kernel.N_offset = 0;
- 
-     query_kernel.d_dist_stats = m_dist_statistics;
-@@ -771,6 +768,16 @@ struct GGNNGPUInstance {
-       sym(layer, shard_id);
-     }
-   }
-+
-+  void set_stream(cudaStream_t stream) {
-+    assert(ggnn_shards.size() == 1);
-+    ggnn_shards.at(0).stream = stream;
-+  }
-+
-+  void set_base_data(const BaseT* dataset) {
-+    assert(ggnn_shards.size() == 1);
-+    ggnn_shards.at(0).d_base = dataset;
-+  }
- };
- 
- #endif  // INCLUDE_GGNN_CUDA_KNN_GGNN_GPU_INSTANCE_CUH_
-diff --git a/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh b/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
-index c94a8f1..781226d 100644
---- a/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
-+++ b/include/ggnn/graph/cuda_knn_ggnn_graph_device.cuh
-@@ -50,7 +50,7 @@ struct GGNNGraphDevice {
-   ValueT* d_nn1_stats;
- 
-   /// base data pointer for the shard.
--  BaseT* d_base;
-+  const BaseT* d_base;
- 
-   /// combined memory pool
-   char* d_memory;
-@@ -69,7 +69,9 @@ struct GGNNGraphDevice {
-     const size_t selection_translation_size = align8(ST_all * sizeof(KeyT));
-     const size_t nn1_stats_size = align8(2 * sizeof(ValueT));
-     total_graph_size = graph_size + 2 * selection_translation_size + nn1_stats_size;
--    base_size = align8(static_cast<size_t>(N) * D * sizeof(BaseT));
-+    // base_size = align8(static_cast<size_t>(N) * D * sizeof(BaseT));
-+    (void) N;
-+    (void) D;
- 
-     const size_t total_size = base_size+total_graph_size;
- 
-@@ -86,8 +88,7 @@ struct GGNNGraphDevice {
-     CHECK_CUDA(cudaMalloc(&d_memory, total_size));
- 
-     size_t pos = 0;
--    d_base = reinterpret_cast<BaseT*>(d_memory+pos);
--    pos += base_size;
-+    d_base = nullptr;
-     d_graph = reinterpret_cast<KeyT*>(d_memory+pos);
-     pos += graph_size;
-     d_translation = reinterpret_cast<KeyT*>(d_memory+pos);
-@@ -99,14 +100,14 @@ struct GGNNGraphDevice {
- 
-     CHECK_EQ(pos, total_size);
- 
--    CHECK_CUDA(cudaStreamCreate(&stream));
-+    // CHECK_CUDA(cudaStreamCreate(&stream));
- 
-     CHECK_CUDA(cudaPeekAtLastError());
-     CHECK_CUDA(cudaDeviceSynchronize());
-     CHECK_CUDA(cudaPeekAtLastError());
-   }
- 
--  GGNNGraphDevice(const GGNNGraphDevice& other) {
-+  GGNNGraphDevice(const GGNNGraphDevice&) {
-     // this exists to allow using vector::emplace_back
-     // when it triggers a reallocation, this code will be called.
-     // always make sure that enough memory is reserved ahead of time.
-@@ -116,7 +117,7 @@ struct GGNNGraphDevice {
-   ~GGNNGraphDevice() {
-     cudaFree(d_memory);
- 
--    CHECK_CUDA(cudaStreamDestroy(stream));
-+    // CHECK_CUDA(cudaStreamDestroy(stream));
-   }
- };
- 
-diff --git a/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh b/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
-index 2055f9e..ef5843a 100644
---- a/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
-+++ b/include/ggnn/graph/cuda_knn_ggnn_graph_host.cuh
-@@ -92,7 +92,7 @@ struct GGNNGraphHost {
-     CHECK_CUDA(cudaPeekAtLastError());
-   }
- 
--  GGNNGraphHost(const GGNNGraphHost& other) {
-+  GGNNGraphHost(const GGNNGraphHost&) {
-     // this exists to allow using vector::emplace_back
-     // when it triggers a reallocation, this code will be called.
-     // always make sure that enough memory is reserved ahead of time.
-diff --git a/include/ggnn/select/cuda_knn_wrs_select_layer.cuh b/include/ggnn/select/cuda_knn_wrs_select_layer.cuh
-index 49d76a1..eef69e6 100644
---- a/include/ggnn/select/cuda_knn_wrs_select_layer.cuh
-+++ b/include/ggnn/select/cuda_knn_wrs_select_layer.cuh
-@@ -22,7 +22,6 @@ limitations under the License.
- #include <cuda.h>
- #include <cuda_runtime.h>
- 
--#include <gflags/gflags.h>
- #include <cub/cub.cuh>
- 
- #include "ggnn/utils/cuda_knn_constants.cuh"
--- 
-2.43.0
-
diff --git a/cpp/cmake/patches/ggnn_override.json b/cpp/cmake/patches/ggnn_override.json
deleted file mode 100644
index 768fae8b0c..0000000000
--- a/cpp/cmake/patches/ggnn_override.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "packages" : {
-    "ggnn" : {
-      "version": "0.5",
-      "git_url": "https://github.com/cgtuebingen/ggnn.git",
-      "git_tag": "release_${version}",
-      "patches" : [
-        {
-          "file" : "${current_json_dir}/ggnn.diff",
-          "issue" : "Correct compilation issues",
-          "fixed_in" : ""
-        }
-      ]
-    }
-  }
-}
diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake
deleted file mode 100644
index 706b0c2f11..0000000000
--- a/cpp/cmake/thirdparty/get_faiss.cmake
+++ /dev/null
@@ -1,119 +0,0 @@
-#=============================================================================
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
-
-function(find_and_configure_faiss)
-  set(oneValueArgs VERSION REPOSITORY PINNED_TAG BUILD_STATIC_LIBS EXCLUDE_FROM_ALL ENABLE_GPU)
-  cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
-                        "${multiValueArgs}" ${ARGN} )
-
-  rapids_find_generate_module(faiss
-    HEADER_NAMES  faiss/IndexFlat.h
-    LIBRARY_NAMES faiss
-    )
-
-  set(patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches")
-  rapids_cpm_package_override("${patch_dir}/faiss_override.json")
-
-  include("${rapids-cmake-dir}/cpm/detail/package_details.cmake")
-  rapids_cpm_package_details(faiss version repository tag shallow exclude)
-
-  include("${rapids-cmake-dir}/cpm/detail/generate_patch_command.cmake")
-  rapids_cpm_generate_patch_command(faiss ${version} patch_command)
-
-  set(BUILD_SHARED_LIBS ON)
-  if (PKG_BUILD_STATIC_LIBS)
-    set(BUILD_SHARED_LIBS OFF)
-    set(CPM_DOWNLOAD_faiss ON)
-  endif()
-
-  include(cmake/modules/FindAVX)
-  # Link against AVX CPU lib if it exists
-  set(RAFT_FAISS_OPT_LEVEL "generic")
-  if(CXX_AVX2_FOUND)
-    set(RAFT_FAISS_OPT_LEVEL "avx2")
-  endif()
-
-  rapids_cpm_find(faiss ${version}
-    GLOBAL_TARGETS faiss faiss_avx2 faiss_gpu faiss::faiss faiss::faiss_avx2
-    CPM_ARGS
-    GIT_REPOSITORY ${repository}
-    GIT_TAG ${tag}
-    GIT_SHALLOW ${shallow} ${patch_command}
-    EXCLUDE_FROM_ALL ${exclude}
-    OPTIONS
-    "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}"
-    "FAISS_ENABLE_RAFT ${PKG_ENABLE_GPU}"
-    "FAISS_ENABLE_PYTHON OFF"
-    "FAISS_OPT_LEVEL ${RAFT_FAISS_OPT_LEVEL}"
-    "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}"
-    "BUILD_TESTING OFF"
-    "CMAKE_MESSAGE_LOG_LEVEL VERBOSE"
-    )
-
-  include("${rapids-cmake-dir}/cpm/detail/display_patch_status.cmake")
-  rapids_cpm_display_patch_status(hnswlib)
-
-  if(TARGET faiss AND NOT TARGET faiss::faiss)
-    add_library(faiss::faiss ALIAS faiss)
-    # We need to ensure that faiss has all the conda information. So we use this approach so that
-    # faiss will have the conda includes/link dirs
-    target_link_libraries(faiss PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>)
-  endif()
-  if(TARGET faiss_avx2 AND NOT TARGET faiss::faiss_avx2)
-    add_library(faiss::faiss_avx2 ALIAS faiss_avx2)
-    # We need to ensure that faiss has all the conda information. So we use this approach so that
-    # faiss will have the conda includes/link dirs
-    target_link_libraries(faiss_avx2 PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>)
-  endif()
-  if(TARGET faiss_gpu AND NOT TARGET faiss::faiss_gpu)
-    add_library(faiss::faiss_gpu ALIAS faiss_gpu)
-    # We need to ensure that faiss has all the conda information. So we use this approach so that
-    # faiss will have the conda includes/link dirs
-    target_link_libraries(faiss_gpu PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>)
-  endif()
-
-  if(faiss_ADDED)
-    rapids_export(BUILD faiss
-                  EXPORT_SET faiss-targets
-                  GLOBAL_TARGETS ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS}
-                  NAMESPACE faiss::)
-  endif()
-
-  # Need to tell CMake to rescan the link group of faiss::faiss_gpu and faiss
-  # so that we get proper link order when they are static
-  #
-  # We don't look at the existence of `faiss_avx2` as it will always exist
-  # even when CXX_AVX2_FOUND is false. In addition for arm builds the
-  # faiss_avx2 is marked as `EXCLUDE_FROM_ALL` so we don't want to add
-  # a dependency to it. Adding a dependency will cause it to compile,
-  # and fail due to invalid compiler flags.
-  if(PKG_ENABLE_GPU AND PKG_BUILD_STATIC_LIBS AND CXX_AVX2_FOUND)
-    set(RAFT_FAISS_TARGETS "$<LINK_GROUP:RESCAN,$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>,faiss::faiss_avx2>" PARENT_SCOPE)
-  elseif(PKG_ENABLE_GPU AND  PKG_BUILD_STATIC_LIBS)
-    set(RAFT_FAISS_TARGETS "$<LINK_GROUP:RESCAN,$<LINK_LIBRARY:WHOLE_ARCHIVE,faiss_gpu>,faiss::faiss>" PARENT_SCOPE)
-  elseif(CXX_AVX2_FOUND)
-    set(RAFT_FAISS_TARGETS faiss::faiss_avx2 PARENT_SCOPE)
-  else()
-    set(RAFT_FAISS_TARGETS faiss::faiss PARENT_SCOPE)
-  endif()
-
-endfunction()
-
-
-find_and_configure_faiss(
-  BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC}
-  ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU}
-)
\ No newline at end of file
diff --git a/cpp/cmake/thirdparty/get_fmt.cmake b/cpp/cmake/thirdparty/get_fmt.cmake
deleted file mode 100644
index c06f8a78bb..0000000000
--- a/cpp/cmake/thirdparty/get_fmt.cmake
+++ /dev/null
@@ -1,22 +0,0 @@
-# =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-# Use CPM to find or clone fmt
-function(find_and_configure_fmt)
-
-    include(${rapids-cmake-dir}/cpm/fmt.cmake)
-    rapids_cpm_fmt(INSTALL_EXPORT_SET rmm-exports BUILD_EXPORT_SET rmm-exports)
-endfunction()
-
-find_and_configure_fmt()
\ No newline at end of file
diff --git a/cpp/cmake/thirdparty/get_ggnn.cmake b/cpp/cmake/thirdparty/get_ggnn.cmake
deleted file mode 100644
index d8af4971a7..0000000000
--- a/cpp/cmake/thirdparty/get_ggnn.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-#=============================================================================
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
-
-function(find_and_configure_ggnn)
-
-  include(${rapids-cmake-dir}/cpm/package_override.cmake)
-  set(patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches")
-  rapids_cpm_package_override("${patch_dir}/ggnn_override.json")
-
-  include("${rapids-cmake-dir}/cpm/detail/package_details.cmake")
-  rapids_cpm_package_details(ggnn version repository tag shallow exclude)
-
-  include("${rapids-cmake-dir}/cpm/detail/generate_patch_command.cmake")
-  rapids_cpm_generate_patch_command(ggnn ${version} patch_command)
-
-  rapids_cpm_find(
-    ggnn ${version}
-    GLOBAL_TARGETS ggnn::ggnn
-    CPM_ARGS
-    GIT_REPOSITORY ${repository}
-    GIT_TAG ${tag}
-    GIT_SHALLOW ${shallow} ${patch_command}
-    EXCLUDE_FROM_ALL ${exclude}
-    DOWNLOAD_ONLY ON
-  )
-
-  include("${rapids-cmake-dir}/cpm/detail/display_patch_status.cmake")
-  rapids_cpm_display_patch_status(ggnn)
-
-  if(NOT TARGET ggnn::ggnn)
-    add_library(ggnn INTERFACE)
-    target_include_directories(ggnn INTERFACE "$<BUILD_INTERFACE:${ggnn_SOURCE_DIR}/include>")
-    add_library(ggnn::ggnn ALIAS ggnn)
-  endif()
-
-endfunction()
-find_and_configure_ggnn()
diff --git a/cpp/cmake/thirdparty/get_glog.cmake b/cpp/cmake/thirdparty/get_glog.cmake
deleted file mode 100644
index 35a9170f99..0000000000
--- a/cpp/cmake/thirdparty/get_glog.cmake
+++ /dev/null
@@ -1,48 +0,0 @@
-#=============================================================================
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
-
-function(find_and_configure_glog)
-    set(oneValueArgs VERSION FORK PINNED_TAG EXCLUDE_FROM_ALL)
-    cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
-            "${multiValueArgs}" ${ARGN} )
-
-    rapids_cpm_find(glog ${PKG_VERSION}
-            GLOBAL_TARGETS      glog::glog
-            BUILD_EXPORT_SET    raft-exports
-            INSTALL_EXPORT_SET  raft-exports
-            CPM_ARGS
-            GIT_REPOSITORY         https://github.com/${PKG_FORK}/glog.git
-            GIT_TAG                ${PKG_PINNED_TAG}
-            EXCLUDE_FROM_ALL       ${PKG_EXCLUDE_FROM_ALL}
-            )
-
-    if(glog_ADDED)
-        message(VERBOSE "RAFT: Using glog located in ${glog_SOURCE_DIR}")
-    else()
-        message(VERBOSE "RAFT: Using glog located in ${glog_DIR}")
-    endif()
-
-
-endfunction()
-
-# Change pinned tag here to test a commit in CI
-# To use a different RAFT locally, set the CMake variable
-# CPM_glog_SOURCE=/path/to/local/glog
-find_and_configure_glog(VERSION 0.6.0
-        FORK             google
-        PINNED_TAG       v0.6.0
-        EXCLUDE_FROM_ALL ON
-        )
diff --git a/cpp/cmake/thirdparty/get_nlohmann_json.cmake b/cpp/cmake/thirdparty/get_nlohmann_json.cmake
deleted file mode 100644
index 5de98a47ce..0000000000
--- a/cpp/cmake/thirdparty/get_nlohmann_json.cmake
+++ /dev/null
@@ -1,39 +0,0 @@
-#=============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
-
-function(find_and_configure_nlohmann_json)
-    set(oneValueArgs VERSION FORK PINNED_TAG EXCLUDE_FROM_ALL)
-    cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
-            "${multiValueArgs}" ${ARGN} )
-
-    rapids_cpm_find(nlohmann_json ${PKG_VERSION}
-            GLOBAL_TARGETS      nlohmann_json::nlohmann_json
-            BUILD_EXPORT_SET    raft-bench-ann-exports
-            INSTALL_EXPORT_SET  raft-bench-ann-exports
-            CPM_ARGS
-            GIT_REPOSITORY         https://github.com/${PKG_FORK}/json.git
-            GIT_TAG                ${PKG_PINNED_TAG}
-            EXCLUDE_FROM_ALL       ${PKG_EXCLUDE_FROM_ALL})
-
-endfunction()
-
-# Change pinned tag here to test a commit in CI
-# To use a different RAFT locally, set the CMake variable
-# CPM_raft_SOURCE=/path/to/local/raft
-find_and_configure_nlohmann_json(VERSION  3.11.2
-        FORK             nlohmann
-        PINNED_TAG       v3.11.2
-        EXCLUDE_FROM_ALL YES)
diff --git a/cpp/template/cmake/thirdparty/get_raft.cmake b/cpp/template/cmake/thirdparty/get_raft.cmake
index 07b0897be0..4474fd2875 100644
--- a/cpp/template/cmake/thirdparty/get_raft.cmake
+++ b/cpp/template/cmake/thirdparty/get_raft.cmake
@@ -51,7 +51,6 @@ function(find_and_configure_raft)
             OPTIONS
             "BUILD_TESTS OFF"
             "BUILD_PRIMS_BENCH OFF"
-            "BUILD_ANN_BENCH OFF"
             "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}"
             )
 endfunction()
diff --git a/dependencies.yaml b/dependencies.yaml
index 7766481c99..1772c5d539 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -22,19 +22,6 @@ files:
       - run_pylibraft
       - test_python_common
       - test_pylibraft
-  bench_ann:
-    output: conda
-    matrix:
-      cuda: ["11.8", "12.0"]
-      arch: [x86_64, aarch64]
-    includes:
-      - rapids_build
-      - cuda
-      - cuda_version
-      - develop
-      - nn_bench
-      - nn_bench_python
-      - rapids_build_skbuild
   test_cpp:
     output: none
     includes:
@@ -127,20 +114,6 @@ files:
       key: test
     includes:
       - test_python_common
-  py_build_raft_ann_bench:
-    output: pyproject
-    pyproject_dir: python/raft-ann-bench
-    extras:
-      table: build-system
-    includes:
-      - rapids_build_setuptools
-  py_run_raft_ann_bench:
-    output: pyproject
-    pyproject_dir: python/raft-ann-bench
-    extras:
-      table: project
-    includes:
-      - nn_bench_python
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -255,25 +228,6 @@ dependencies:
         packages:
           - clang==16.0.6
           - clang-tools=16.0.6
-  nn_bench:
-    common:
-      - output_types: [conda, pyproject, requirements]
-        packages:
-          - hnswlib=0.7.0
-          - nlohmann_json>=3.11.2
-          - glog>=0.6.0
-          - h5py>=3.8.0
-          - benchmark>=1.8.2
-          - openblas
-          - *rmm_unsuffixed
-  nn_bench_python:
-    common:
-      - output_types: [conda]
-        packages:
-          - matplotlib
-          - pandas
-          - pyyaml
-          - pandas
   cuda_version:
     specific:
       - output_types: conda
diff --git a/docs/source/ann_benchmarks_build.md b/docs/source/ann_benchmarks_build.md
deleted file mode 100644
index 56af8e555c..0000000000
--- a/docs/source/ann_benchmarks_build.md
+++ /dev/null
@@ -1,51 +0,0 @@
-### Dependencies
-
-CUDA 11 and a GPU with Pascal architecture or later are required to run the benchmarks. 
-
-Please refer to the  [installation docs](https://docs.rapids.ai/api/raft/stable/build.html#cuda-gpu-requirements) for the base requirements to build RAFT. 
-
-In addition to the base requirements for building RAFT, additional dependencies needed to build the ANN benchmarks include:
-1. FAISS GPU >= 1.7.1
-2. Google Logging (GLog)
-3. H5Py
-4. HNSWLib
-5. nlohmann_json
-6. GGNN
-
-[rapids-cmake](https://github.com/rapidsai/rapids-cmake) is used to build the ANN benchmarks so the code for dependencies not already supplied in the CUDA toolkit will be downloaded and built automatically.
-
-The easiest (and most reproducible) way to install the dependencies needed to build the ANN benchmarks is to use the conda environment file located in the `conda/environments` directory of the RAFT repository. The following command will use `mamba` (which is preferred over `conda`) to build and activate a new environment for compiling the benchmarks:
-
-```bash
-mamba env create --name raft_ann_benchmarks -f conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
-conda activate raft_ann_benchmarks
-```
-
-The above conda environment will also reduce the compile times as dependencies like FAISS will already be installed and not need to be compiled with `rapids-cmake`.
-
-### Compiling the Benchmarks
-
-After the needed dependencies are satisfied, the easiest way to compile ANN benchmarks is through the `build.sh` script in the root of the RAFT source code repository. The following will build the executables for all the support algorithms:
-```bash
-./build.sh bench-ann
-```
-
-You can limit the algorithms that are built by providing a semicolon-delimited list of executable names (each algorithm is suffixed with `_ANN_BENCH`):
-```bash
-./build.sh bench-ann -n --limit-bench-ann=HNSWLIB_ANN_BENCH;RAFT_IVF_PQ_ANN_BENCH
-```
-
-Available targets to use with `--limit-bench-ann` are:
-- FAISS_GPU_IVF_FLAT_ANN_BENCH
-- FAISS_GPU_IVF_PQ_ANN_BENCH
-- FAISS_CPU_IVF_FLAT_ANN_BENCH
-- FAISS_CPU_IVF_PQ_ANN_BENCH
-- FAISS_GPU_FLAT_ANN_BENCH
-- FAISS_CPU_FLAT_ANN_BENCH
-- GGNN_ANN_BENCH
-- HNSWLIB_ANN_BENCH
-- RAFT_CAGRA_ANN_BENCH
-- RAFT_IVF_PQ_ANN_BENCH
-- RAFT_IVF_FLAT_ANN_BENCH
-
-By default, the `*_ANN_BENCH` executables program infer the dataset's datatype from the filename's extension. For example, an extension of `fbin` uses a `float` datatype, `f16bin` uses a `float16` datatype, extension of `i8bin` uses `int8_t` datatype, and `u8bin` uses `uint8_t` type. Currently, only `float`, `float16`, int8_t`, and `unit8_t` are supported.
\ No newline at end of file
diff --git a/docs/source/ann_benchmarks_dataset.md b/docs/source/ann_benchmarks_dataset.md
deleted file mode 100644
index 26c1559504..0000000000
--- a/docs/source/ann_benchmarks_dataset.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# ANN Benchmarks Datasets
-
-A dataset usually has 4 binary files containing database vectors, query vectors, ground truth neighbors and their corresponding distances. For example, Glove-100 dataset has files `base.fbin` (database vectors), `query.fbin` (query vectors), `groundtruth.neighbors.ibin` (ground truth neighbors), and `groundtruth.distances.fbin` (ground truth distances). The first two files are for index building and searching, while the other two are associated with a particular distance and are used for evaluation.
-
-The file suffixes `.fbin`, `.f16bin`, `.ibin`, `.u8bin`, and `.i8bin` denote that the data type of vectors stored in the file are `float32`, `float16`(a.k.a `half`), `int`, `uint8`, and `int8`, respectively.
-These binary files are little-endian and the format is: the first 8 bytes are `num_vectors` (`uint32_t`) and `num_dimensions` (`uint32_t`), and the following `num_vectors * num_dimensions * sizeof(type)` bytes are vectors stored in row-major order.
-
-Some implementation can take `float16` database and query vectors as inputs and will have better performance. Use `script/fbin_to_f16bin.py` to transform dataset from `float32` to `float16` type.
-
-Commonly used datasets can be downloaded from two websites:
-1. Million-scale datasets can be found at the [Data sets](https://github.com/erikbern/ann-benchmarks#data-sets) section of [`ann-benchmarks`](https://github.com/erikbern/ann-benchmarks).
-
-    However, these datasets are in HDF5 format. Use `cpp/bench/ann/scripts/hdf5_to_fbin.py` to transform the format. A few Python packages are required to run it:
-    ```bash
-    pip3 install numpy h5py
-    ```
-    The usage of this script is:
-    ```bash
-    $ cpp/bench/ann/scripts/hdf5_to_fbin.py
-    usage: scripts/hdf5_to_fbin.py [-n] <input>.hdf5
-       -n: normalize base/query set
-     outputs: <input>.base.fbin
-              <input>.query.fbin
-              <input>.groundtruth.neighbors.ibin
-              <input>.groundtruth.distances.fbin
-    ```
-    So for an input `.hdf5` file, four output binary files will be produced. See previous section for an example of prepossessing GloVe dataset.
-
-    Most datasets provided by `ann-benchmarks` use `Angular` or `Euclidean` distance. `Angular` denotes cosine distance. However, computing cosine distance reduces to computing inner product by normalizing vectors beforehand. In practice, we can always do the normalization to decrease computation cost, so it's better to measure the performance of inner product rather than cosine distance. The `-n` option of `hdf5_to_fbin.py` can be used to normalize the dataset.
-
-2. <a id='billion-scale'></a>Billion-scale datasets can be found at [`big-ann-benchmarks`](http://big-ann-benchmarks.com). The ground truth file contains both neighbors and distances, thus should be split. A script is provided for this:
-    ```bash
-    $ cpp/bench/ann/scripts/split_groundtruth.pl
-    usage: script/split_groundtruth.pl input output_prefix
-    ```
-    Take Deep-1B dataset as an example:
-    ```bash
-    pushd
-    cd cpp/bench/ann
-    mkdir -p data/deep-1B && cd data/deep-1B
-    # download manually "Ground Truth" file of "Yandex DEEP"
-    # suppose the file name is deep_new_groundtruth.public.10K.bin
-    ../../scripts/split_groundtruth.pl deep_new_groundtruth.public.10K.bin groundtruth
-    # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
-    popd
-    ```
-    Besides ground truth files for the whole billion-scale datasets, this site also provides ground truth files for the first 10M or 100M vectors of the base sets. This mean we can use these billion-scale datasets as million-scale datasets. To facilitate this, an optional parameter `subset_size` for dataset can be used. See the next step for further explanation.
-
-## Generate ground truth
-
-If you have a dataset, but no corresponding ground truth file, then you can generate ground trunth using the `generate_groundtruth` utility. Example usage:
-
-```bash
-# With existing query file
-python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
-
-# With randomly generated queries
-python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.fbin --output=groundtruth_dir --queries=random --n_queries=10000
-
-# Using only a subset of the dataset. Define queries by randomly
-# selecting vectors from the (subset of the) dataset.
-python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.fbin --nrows=2000000 --output=groundtruth_dir --queries=random-choice --n_queries=10000
-```
\ No newline at end of file
diff --git a/docs/source/ann_benchmarks_low_level.md b/docs/source/ann_benchmarks_low_level.md
deleted file mode 100644
index 7ba13dec8d..0000000000
--- a/docs/source/ann_benchmarks_low_level.md
+++ /dev/null
@@ -1,219 +0,0 @@
-### Low-level Scripts and Executables
-#### End-to-end Example
-An end-to-end example (run from the RAFT source code root directory):
-```bash
-# (0) get raft sources
-git clone https://github.com/rapidsai/raft.git
-cd raft
-
-# (1) prepare a dataset
-export PYTHONPATH=python/raft-ann-bench/src:$PYTHONPATH
-python -m raft_ann_bench.get_dataset --dataset glove-100-angular --normalize
-
-# option --normalize is used here to normalize vectors so cosine distance is converted
-# to inner product; don't use -n for l2 distance
-
-# (2) build index
-$CONDA_PREFIX/bin/ann/RAFT_IVF_FLAT_ANN_BENCH \
-  --data_prefix=datasets \
-  --build \
-  --benchmark_filter="raft_ivf_flat\..*" \
-  python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json 
-
-# (3) search
-$CONDA_PREFIX/bin/ann/RAFT_IVF_FLAT_ANN_BENCH\
-  --data_prefix=datasets \
-  --benchmark_min_time=2s \
-  --benchmark_out=ivf_flat_search.csv \
-  --benchmark_out_format=csv \
-  --benchmark_counters_tabular \
-  --search \
-  --benchmark_filter="raft_ivf_flat\..*" \
-    python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json 
-
-
-# optional step: plot QPS-Recall figure using data in ivf_flat_search.csv with your favorite tool
-```
-
-##### Step 1: Prepare Dataset
-Note: the preferred way to download and process smaller (million scale) datasets is to use the `get_dataset` script as demonstrated in the example above.
-
-A dataset usually has 4 binary files containing database vectors, query vectors, ground truth neighbors and their corresponding distances. For example, Glove-100 dataset has files `base.fbin` (database vectors), `query.fbin` (query vectors), `groundtruth.neighbors.ibin` (ground truth neighbors), and `groundtruth.distances.fbin` (ground truth distances). The first two files are for index building and searching, while the other two are associated with a particular distance and are used for evaluation.
-
-The file suffixes `.fbin`, `.f16bin`, `.ibin`, `.u8bin`, and `.i8bin` denote that the data type of vectors stored in the file are `float32`, `float16`(a.k.a `half`), `int`, `uint8`, and `int8`, respectively.
-These binary files are little-endian and the format is: the first 8 bytes are `num_vectors` (`uint32_t`) and `num_dimensions` (`uint32_t`), and the following `num_vectors * num_dimensions * sizeof(type)` bytes are vectors stored in row-major order.
-
-Some implementation can take `float16` database and query vectors as inputs and will have better performance. Use `python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py` to transform dataset from `float32` to `float16` type.
-
-Commonly used datasets can be downloaded from two websites:
-1. Million-scale datasets can be found at the [Data sets](https://github.com/erikbern/ann-benchmarks#data-sets) section of [`ann-benchmarks`](https://github.com/erikbern/ann-benchmarks).
-
-    However, these datasets are in HDF5 format. Use `python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py/hdf5_to_fbin.py` to transform the format. A few Python packages are required to run it:
-    ```bash
-    pip3 install numpy h5py
-    ```
-    The usage of this script is:
-    ```bash
-    $ cpp/bench/ann/scripts/hdf5_to_fbin.py
-    usage: scripts/hdf5_to_fbin.py [-n] <input>.hdf5
-       -n: normalize base/query set
-     outputs: <input>.base.fbin
-              <input>.query.fbin
-              <input>.groundtruth.neighbors.ibin
-              <input>.groundtruth.distances.fbin
-    ```
-    So for an input `.hdf5` file, four output binary files will be produced. See previous section for an example of prepossessing GloVe dataset.
-
-    Most datasets provided by `ann-benchmarks` use `Angular` or `Euclidean` distance. `Angular` denotes cosine distance. However, computing cosine distance reduces to computing inner product by normalizing vectors beforehand. In practice, we can always do the normalization to decrease computation cost, so it's better to measure the performance of inner product rather than cosine distance. The `-n` option of `hdf5_to_fbin.py` can be used to normalize the dataset.
-
-2. Billion-scale datasets can be found at [`big-ann-benchmarks`](http://big-ann-benchmarks.com). The ground truth file contains both neighbors and distances, thus should be split. A script is provided for this:
-    ```bash
-    $ python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl
-    usage: split_groundtruth.pl input output_prefix
-    ```
-    Take Deep-1B dataset as an example:
-    ```bash
-    pushd
-    cd cpp/bench/ann
-    mkdir -p data/deep-1B && cd data/deep-1B
-    # download manually "Ground Truth" file of "Yandex DEEP"
-    # suppose the file name is deep_new_groundtruth.public.10K.bin
-    /path/to/raft/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl deep_new_groundtruth.public.10K.bin groundtruth
-    # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
-    popd
-    ```
-    Besides ground truth files for the whole billion-scale datasets, this site also provides ground truth files for the first 10M or 100M vectors of the base sets. This mean we can use these billion-scale datasets as million-scale datasets. To facilitate this, an optional parameter `subset_size` for dataset can be used. See the next step for further explanation.
-
-
-##### Step 2: Build Index
-An index is a data structure to facilitate searching. Different algorithms may use different data structures for their index. We can use `RAFT_IVF_FLAT_ANN_BENCH --build` to build an index and save it to disk.
-
-To run a benchmark executable, like `RAFT_IVF_FLAT_ANN_BENCH`, a JSON configuration file is required. Refer to [`cpp/bench/ann/conf/glove-100-inner.json`](../../cpp/cpp/bench/ann/conf/glove-100-inner.json) as an example. Configuration file has 3 sections:
-* `dataset` section specifies the name and files of a dataset, and also the distance in use. Since the `*_ANN_BENCH` programs are for index building and searching, only `base_file` for database vectors and `query_file` for query vectors are needed. Ground truth files are for evaluation thus not needed.
-    - To use only a subset of the base dataset, an optional parameter `subset_size` can be specified. It means using only the first `subset_size` vectors of `base_file` as the base dataset.
-* `search_basic_param` section specifies basic parameters for searching:
-    - `k` is the "k" in "k-nn", that is, the number of neighbors (or results) we want from the searching.
-* `index` section specifies an array of configurations for index building and searching:
-    - `build_param` and `search_params` are parameters for building and searching, respectively. `search_params` is an array since we will search with different parameters to get different recall values.
-    - `file` is the file name of index. Building will save built index to this file, while searching will load this file.
-    - if `refine_ratio` is specified, refinement, as a post-processing step of search, will be done. It's for algorithms that compress vectors. For example, if `"refine_ratio" : 2` is set, 2`k` results are first computed, then exact distances of them are computed using original uncompressed vectors, and finally top `k` results among them are kept.
-
-
-The usage of `*_ANN_BENCH` can be found by running `*_ANN_BENCH --help` on one of the executables:
-```bash
-$ ./cpp/build/*_ANN_BENCH --help
-benchmark [--benchmark_list_tests={true|false}]
-          [--benchmark_filter=<regex>]
-          [--benchmark_min_time=`<integer>x` OR `<float>s` ]
-          [--benchmark_min_warmup_time=<min_warmup_time>]
-          [--benchmark_repetitions=<num_repetitions>]
-          [--benchmark_enable_random_interleaving={true|false}]
-          [--benchmark_report_aggregates_only={true|false}]
-          [--benchmark_display_aggregates_only={true|false}]
-          [--benchmark_format=<console|json|csv>]
-          [--benchmark_out=<filename>]
-          [--benchmark_out_format=<json|console|csv>]
-          [--benchmark_color={auto|true|false}]
-          [--benchmark_counters_tabular={true|false}]
-          [--benchmark_context=<key>=<value>,...]
-          [--benchmark_time_unit={ns|us|ms|s}]
-          [--v=<verbosity>]
-          [--build|--search]
-          [--overwrite]
-          [--data_prefix=<prefix>]
-          <conf>.json
-
-Note the non-standard benchmark parameters:
-  --build: build mode, will build index
-  --search: search mode, will search using the built index
-            one and only one of --build and --search should be specified
-  --overwrite: force overwriting existing index files
-  --data_prefix=<prefix>: prepend <prefix> to dataset file paths specified in the <conf>.json.
-  --override_kv=<key:value1:value2:...:valueN>: override a build/search key one or more times multiplying the number of configurations; you can use this parameter multiple times to get the Cartesian product of benchmark configs.
-```
-* `--build`: build index.
-* `--search`: do the searching with built index.
-* `--overwrite`: by default, the building mode skips building an index if it find out it already exists. This is useful when adding more configurations to the config; only new indices are build without the need to specify an elaborate filtering regex. By supplying `overwrite` flag, you disable this behavior; all indices are build regardless whether they are already stored on disk.
-* `--data_prefix`: prepend an arbitrary path to the data file paths. By default, it is equal to `data`. Note, this does not apply to index file paths.
-* `--override_kv`: override a build/search key one or more times multiplying the number of configurations.
-
-In addition to these ANN-specific flags, you can use all of the standard google benchmark flags. Some of the useful flags:
-* `--benchmark_filter`: specify subset of benchmarks to run
-* `--benchmark_out`, `--benchmark_out_format`: store the output to a file
-* `--benchmark_list_tests`: check the available configurations
-* `--benchmark_min_time`: specify the minimum duration or number of iterations per case to improve accuracy of the benchmarks.
-
-Refer to the google benchmark [user guide](https://github.com/google/benchmark/blob/main/docs/user_guide.md#command-line) for more information about the command-line usage.
-
-##### Step 3: Searching
-Use the `--search` flag on any of the `*_ANN_BENCH` executables. Other options are the same as in step 2.
-
-## Adding a new ANN algorithm
-Implementation of a new algorithm should be a class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
-
-In addition, it should define two `struct`s for building and searching parameters. The searching parameter class should inherit `struct ANN<T>::AnnSearchParam`. Take `class HnswLib` as an example, its definition is:
-```c++
-template<typename T>
-class HnswLib : public ANN<T> {
-public:
-  struct BuildParam {
-    int M;
-    int ef_construction;
-    int num_threads;
-  };
-
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    int ef;
-    int num_threads;
-  };
-
-  // ...
-};
-```
-
-The benchmark program uses JSON configuration file. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. Still take the configuration for `HnswLib` as an example:
-```json
-{
-  "name" : "...",
-  "algo" : "hnswlib",
-  "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-  "file" : "/path/to/file",
-  "search_params" : [
-    {"ef":10, "numThreads":1},
-    {"ef":20, "numThreads":1},
-    {"ef":40, "numThreads":1}
-  ]
-},
-```
-
-How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
-1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
-    ```c++
-    template<typename T>
-    void parse_build_param(const nlohmann::json& conf,
-                           typename cuann::HnswLib<T>::BuildParam& param) {
-      param.ef_construction = conf.at("efConstruction");
-      param.M = conf.at("M");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-
-    template<typename T>
-    void parse_search_param(const nlohmann::json& conf,
-                            typename cuann::HnswLib<T>::SearchParam& param) {
-      param.ef = conf.at("ef");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-    ```
-
-2. Next, add corresponding `if` case to functions `create_algo()` and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
-    ```c++
-      // JSON configuration file contains a line like:  "algo" : "hnswlib"
-      if (algo == "hnswlib") {
-         // ...
-      }
-    ```
diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
deleted file mode 100644
index afb4ed18ea..0000000000
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ /dev/null
@@ -1,178 +0,0 @@
-# ANN Benchmarks Parameter Tuning Guide
-
-This guide outlines the various parameter settings that can be specified in [RAFT ANN Benchmark](raft_ann_benchmarks.md) json configuration files and explains the impact they have on corresponding algorithms to help inform their settings for benchmarking across desired levels of recall.
-
-
-## RAFT Indexes
-
-### `raft_brute_force`
-
-Use RAFT brute-force index for exact search. Brute-force has no further build or search parameters.
-
-### `raft_ivf_flat`
-
-IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes.
-
-IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
-
-| Parameter            | Type             | Required | Data Type                  | Default  | Description                                                                                                                                                                       |
-|----------------------|------------------|----------|----------------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`              | `build`    | Y        | Positive Integer >0        |          | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `niter`              | `build`    | N        | Positive Integer >0        | 20       | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`              | `build`    | N        | Positive Integer >0        | 2        | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `dataset_memory_type` | `build` | N | ["device", "host", "mmap"] | "mmap" | What memory type should the dataset reside?                                                                                                                                       |
-| `query_memory_type`  | `search` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? |
-| `nprobe`             | `search`  | Y        | Positive Integer >0        |          | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-
-
-### `raft_ivf_pq`
-
-IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
-
-| Parameter              | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                     |
-|------------------------|----------------|---|----------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`                | `build`  | Y | Positive Integer >0              |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `niter`                | `build`  | N | Positive Integer >0              | 20      | Number of k-means iterations to use when training the clusters.                                                                                                                 |
-| `ratio`                | `build`  | N | Positive Integer >0              | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `pq_dim`               | `build`  | N | Positive Integer. Multiple of 8. | 0       | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8.                        |
-| `pq_bits`              | `build`  | N | Positive Integer. [4-8]          | 8       | Bit length of the vector element after quantization.                                                                                                                            |
-| `codebook_kind`        | `build`  | N | ["cluster", "subspace"]          | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail                                 |
-| `dataset_memory_type`  | `build` | N | ["device", "host", "mmap"]       | "host" | What memory type should the dataset reside?                                                                                                                                       |
-| `query_memory_type`    | `search` | N | ["device", "host", "mmap"]       | "device | What memory type should the queries reside? |
-| `nprobe`               | `search` | Y | Positive Integer >0              |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                     |
-| `internalDistanceDtype` | `search` | N | [`float`, `half`]                | `half`  | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy.                                                           |
-| `smemLutDtype`         | `search` | N | [`float`, `half`, `fp8`]         | `half`  | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy.                                                   |
-| `refine_ratio`         | `search` | N| Positive Number >=1              | 1       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.           |
-
-
-### `raft_cagra`
-<a id='raft-cagra'></a>CAGRA uses a graph-based index, which creates an intermediate, approximate kNN graph using IVF-PQ and then further refining and optimizing to create a final kNN graph. This kNN graph is used by CAGRA as an index for search.
-
-| Parameter                   | Type           | Required | Data Type                  | Default | Description                                                                                                                                                                       |
-|-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `graph_degree`              | `build`  | N        | Positive Integer >0        | 64 | Degree of the final kNN graph index. |
-| `intermediate_graph_degree` | `build`  | N        | Positive Integer >0        | 128 | Degree of the intermediate kNN graph. |
-| `graph_build_algo`          | `build`  | N | ["IVF_PQ", "NN_DESCENT"]   | "IVF_PQ" | Algorithm to use for search |
-| `dataset_memory_type`       | `build`  | N | ["device", "host", "mmap"] | "mmap" | What memory type should the dataset reside while constructing the index?                                                                                                                                       |
-| `query_memory_type`         | `search` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? |
-| `itopk`                     | `search_wdith`  | N        | Positive Integer >0        | 64 | Number of intermediate search results retained during the search. Higher values improve search accuracy at the cost of speed. |
-| `search_width`              | `search`  | N        | Positive Integer >0        | 1 | Number of graph nodes to select as the starting point for the search in each iteration. |
-| `max_iterations`            | `search`  | N        | Integer >=0                | 0 | Upper limit of search iterations. Auto select when 0. |
-| `algo`                      | `search`  | N        | string                     | "auto" | Algorithm to use for search. Possible values: {"auto", "single_cta", "multi_cta", "multi_kernel"} |
-| `graph_memory_type`         | `search`  | N        | string                     | "device" | Memory type to store gaph. Must be one of {"device", "host_pinned", "host_huge_page"}. |
-| `internal_dataset_memory_type` | `search`  | N        | string                     | "device" | Memory type to store dataset in the index. Must be one of {"device", "host_pinned", "host_huge_page"}. |
-
-The `graph_memory_type` or `internal_dataset_memory_type` options can be useful for large datasets that do not fit the device memory. Setting `internal_dataset_memory_type` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Management or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper.
-
-To fine tune CAGRA index building we can customize IVF-PQ index builder options using the following settings. These take effect only if `graph_build_algo == "IVF_PQ"`. It is recommended to experiment using a separate IVF-PQ index to find the config that gives the largest QPS for large batch. Recall does not need to be very high, since CAGRA further optimizes the kNN neighbor graph. Some of the default values are derived from the dataset size which is assumed to be [n_vecs, dim].
-
-| Parameter              | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                     |
-|------------------------|----------------|---|----------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `ivf_pq_build_nlist`                | `build`  | N | Positive Integer >0              | n_vecs / 2500        | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ivf_pq_build_niter`                | `build`  | N | Positive Integer >0              | 25      | Number of k-means iterations to use when training the clusters.                                                                                                                 |
-| `ivf_pq_build_ratio`                | `build`  | N | Positive Integer >0              | 10      | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `ivf_pq_build_pq_dim`               | `build`  | N | Positive Integer. Multiple of 8. | dim/2 rounded up to 8     | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8.                        |
-| `ivf_pq_build_pq_bits`              | `build`  | N | Positive Integer. [4-8]          | 8       | Bit length of the vector element after quantization.                                                                                                                            |
-| `ivf_pq_build_codebook_kind`        | `build`  | N | ["cluster", "subspace"]          | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail                                 |
-| `ivf_pq_search_nprobe`               | `build` | N | Positive Integer >0              | min(2*dim, nlist)        | The closest number of clusters to search for each query vector.                                    |
-| `ivf_pq_search_internalDistanceDtype` | `build` | N | [`float`, `half`]                | `fp8`  | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy.                                                           |
-| `ivf_pq_search_smemLutDtype`         | `build` | N | [`float`, `half`, `fp8`]         | `half`  | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy.                                                   |
-| `ivf_pq_search_refine_ratio`         | `build` | N| Positive Number >=1              | 2       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.           |
-
-Alternatively, if `graph_build_algo == "NN_DESCENT"`, then we can customize the following parameters
-
-| Parameter                   | Type           | Required | Data Type                  | Default | Description                                                                                                                                                                       |
-|-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nn_descent_niter`          | `build`  | N        | Positive Integer>0         | 20 | Number of NN Descent iterations. |
-| `nn_descent_intermediate_graph_degree`          | `build`  | N        | Positive Integer>0         | `intermediate_graph_degree` * 1.5 | Intermadiate graph degree during NN descent iterations |
-| `nn_descent_max_iterations`          | `build`  | N        | Positive Integer>0         | 20 | Alias for `nn_descent_niter` |
-| `nn_descent_termination_threshold`          | `build`  | N        | Positive float>0         | 0.0001 | Termination threshold for NN descent. |
-
-### `raft_cagra_hnswlib`
-This is a benchmark that enables interoperability between `CAGRA` built `HNSW` search. It uses the `CAGRA` built graph as the base layer of an `hnswlib` index to search queries only within the base layer (this is enabled with a simple patch to `hnswlib`).
-
-`build` : Same as `build` of [CAGRA](#raft-cagra)
-
-`search` : Same as `search` of [hnswlib](#hnswlib)
-
-## FAISS Indexes
-
-### `faiss_gpu_flat`
-
-Use FAISS flat index on the GPU, which performs an exact search using brute-force and doesn't have any further build or search parameters. 
-
-### `faiss_gpu_ivf_flat`
-
-IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes.
-
-IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
-
-| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
-|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlists`  | `build`  | Y        | Positive Integer >0 |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`   | `build`  | N        | Positive Integer >0 | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `nprobe`  | `search` | Y        | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-
-### `faiss_gpu_ivf_pq`
-
-IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
-
-| Parameter        | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                       |
-|------------------|----------------|----------|----------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`          | `build`  | Y        | Positive Integer >0              |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`          | `build`  | N        | Positive Integer >0              | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `M_ratio`        | `build`  | Y        | Positive Integer Power of 2 [8-64] |         | Ratio of numbeer of chunks or subquantizers for each vector. Computed by `dims` / `M_ratio`                                                                                         |
-| `usePrecomputed` | `build`  | N        | Boolean. Default=`false`         | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage.                                                                                          |
-| `useFloat16`     | `build`  | N        | Boolean. Default=`false`         | `false`  | Use half-precision floats for clustering step.                                                                                                                                    |
-| `nprobe`         | `search` | Y        | Positive Integer >0              |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-| `refine_ratio`   | `search` | N| Positive Number >=1          | 1       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.             |
-
-### `faiss_cpu_flat`
-
-Use FAISS flat index on the CPU, which performs an exact search using brute-force and doesn't have any further build or search parameters.
-
-
-| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
-|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `numThreads`     | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-### `faiss_cpu_ivf_flat`
-
-Use FAISS IVF-Flat index on CPU
-
-| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
-|----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`  | `build`  | Y        | Positive Integer >0 |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`  | `build`  | N        | Positive Integer >0 | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                            |
-| `nprobe` | `search` | Y        | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-| `numThreads`    | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-### `faiss_cpu_ivf_pq`
-
-Use FAISS IVF-PQ index on CPU
-
-| Parameter        | Type           | Required | Data Type                          | Default | Description                                                                                                                                                                   |
-|------------------|----------------|----------|------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlist`          | `build`  | Y        | Positive Integer >0                |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`          | `build`  | N        | Positive Integer >0                | 2       | `1/ratio` is the number of training points which should be used to train the clusters.                                                                                        |
-| `M`              | `build`  | Y        | Positive Integer Power of 2 [8-64] |         | Number of chunks or subquantizers for each vector.                                                                                                                            |
-| `usePrecomputed` | `build`  | N        | Boolean. Default=`false`           | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage.                                                                                      |
-| `bitsPerCode`    | `build`  | N        | Positive Integer [4-8]             | 8       | Number of bits to use for each code.                                                                                                                                          |
-| `nprobe`         | `search` | Y        | Positive Integer >0                |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                   |
-| `refine_ratio`   | `search` | N| Positive Number >=1                | 1       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.         |
-| `numThreads`     | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-
-## HNSW
-<a id='hnswlib'></a>
-### `hnswlib`
-
-| Parameter        | Type      | Required | Data Type                            | Default | Description                                                                                                                                                                                                                                                                                       |
-|------------------|-----------|----------|--------------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `efConstruction` | `build`   | Y        | Positive Integer >0                  |         | Controls index time and accuracy. Bigger values increase the index quality. At some point, increasing this will no longer improve the quality.                                                                                                                                                    |
-| `M`              | `build`   | Y        | Positive Integer often between 2-100 |         | Number of bi-directional links create for every new element during construction. Higher values work for higher intrinsic dimensionality and/or high recall, low values can work for datasets with low intrinsic dimensionality and/or low recalls. Also affects the algorithm's memory consumption. |
-| `numThreads`     | `build`   | N        | Positive Integer >0                  | 1       | Number of threads to use to build the index.                                                                                                                                                                                                                                                      |
-| `ef`             | `search`  | Y        | Positive Integer >0                  |         | Size of the dynamic list for the nearest neighbors used for search. Higher value leads to more accurate but slower search. Cannot be lower than `k`.                                                                                                                                              |
-| `numThreads`     | `search` | N        | Positive Integer >0                  | 1       | Number of threads to use for queries.                                                                                                                                                                                                                                                             |
-
-Please refer to [HNSW algorithm parameters guide](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) from `hnswlib` to learn more about these arguments.
\ No newline at end of file
diff --git a/docs/source/build.md b/docs/source/build.md
index b9a1832b02..3d059d5a69 100644
--- a/docs/source/build.md
+++ b/docs/source/build.md
@@ -1,6 +1,6 @@
 # Installation
 
-RAFT currently provides libraries for C++ and Python. The C++ libraries, including the header-only and optional shared library, can be installed with Conda. 
+RAFT currently provides libraries for C++ and Python. The C++ libraries, including the header-only and optional shared library, can be installed with Conda.
 
 Both the C++ and Python APIs require CMake to build from source.
 
@@ -34,8 +34,6 @@ The easiest way to install RAFT is through conda and several packages are provid
 - `libraft` (optional) C++ shared library containing pre-compiled template instantiations and runtime API.
 - `pylibraft` (optional) Python library
 - `raft-dask` (optional) Python library for deployment of multi-node multi-GPU algorithms that use the RAFT `raft::comms` abstraction layer in Dask clusters.
-- `raft-ann-bench` (optional) Benchmarking tool for easily producing benchmarks that compare RAFT's vector search algorithms against other state-of-the-art implementations.
-- `raft-ann-bench-cpu` (optional) Reproducible benchmarking tool similar to above, but doesn't require CUDA to be installed on the machine. Can be used to test in environments with competitive CPUs.
 
 Use the following command, depending on your CUDA version, to install all of the RAFT packages with conda (replace `rapidsai` with `rapidsai-nightly` to install more up-to-date but less stable nightly packages). `mamba` is preferred over the `conda` command.
 ```bash
@@ -60,7 +58,7 @@ If installing the C++ APIs Please see [using libraft](https://docs.rapids.ai/api
 
 ## Installing Python through Pip
 
-`pylibraft` and `raft-dask` both have packages that can be [installed through pip](https://rapids.ai/pip.html#install). 
+`pylibraft` and `raft-dask` both have packages that can be [installed through pip](https://rapids.ai/pip.html#install).
 
 For CUDA 11 packages:
 ```bash
@@ -74,7 +72,7 @@ pip install pylibraft-cu12 --extra-index-url=https://pypi.nvidia.com
 pip install raft-dask-cu12 --extra-index-url=https://pypi.nvidia.com
 ```
 
-These packages statically build RAFT's pre-compiled instantiations, so the C++ headers and pre-compiled shared library won't be readily available to use in your code. 
+These packages statically build RAFT's pre-compiled instantiations, so the C++ headers and pre-compiled shared library won't be readily available to use in your code.
 
 ## Building C++ and Python from source
 
@@ -124,7 +122,7 @@ The recommended way to build and install RAFT from source is to use the `build.s
 
 `build.sh` uses [rapids-cmake](https://github.com/rapidsai/rapids-cmake), which will automatically download any dependencies which are not already installed. It's important to note that while all the headers will be installed and available, some parts of the RAFT API depend on libraries like CUTLASS, which will need to be explicitly enabled in `build.sh`.
 
-The following example will download the needed dependencies and install the RAFT headers into `$INSTALL_PREFIX/include/raft`. 
+The following example will download the needed dependencies and install the RAFT headers into `$INSTALL_PREFIX/include/raft`.
 ```bash
 ./build.sh libraft
 ```
@@ -201,8 +199,6 @@ It can take sometime to compile all of the benchmarks. You can build individual
 ./build.sh libraft bench-prims -n --limit-bench=NEIGHBORS_PRIMS_BENCH;DISTANCE_PRIMS_BENCH;LINALG_PRIMS_BENCH
 ```
 
-In addition to microbenchmarks for individual primitives, RAFT contains a reproducible benchmarking tool for evaluating the performance of RAFT's vector search algorithms against the existing state-of-the-art. Please refer to the [RAFT ANN Benchmarks](https://docs.rapids.ai/api/raft/nightly/raft_ann_benchmarks/) guide for more information on this tool.
-
 ### Python libraries
 
 The Python libraries can be built and installed using the `build.sh` script:
@@ -242,7 +238,7 @@ The Python packages can also be uninstalled using the `build.sh` script:
 
 ### Using CMake directly
 
-When building RAFT from source, the `build.sh` script offers a nice wrapper around the `cmake` commands to ease the burdens of manually configuring the various available cmake options. When more fine-grained control over the CMake configuration is desired, the `cmake` command can be invoked directly as the below example demonstrates. 
+When building RAFT from source, the `build.sh` script offers a nice wrapper around the `cmake` commands to ease the burdens of manually configuring the various available cmake options. When more fine-grained control over the CMake configuration is desired, the `cmake` command can be invoked directly as the below example demonstrates.
 
 The `CMAKE_INSTALL_PREFIX` installs RAFT into a specific location. The example below installs RAFT into the current Conda environment:
 ```bash
@@ -259,7 +255,6 @@ RAFT's CMake has the following configurable flags available:
 |---------------------------------|----------------------| --- |------------------------------------------------------------------------------|
 | BUILD_TESTS                     | ON, OFF              | ON  | Compile Googletests                                                          |
 | BUILD_PRIMS_BENCH               | ON, OFF              | OFF | Compile benchmarks                                                           |
-| BUILD_ANN_BENCH                 | ON, OFF              | OFF | Compile end-to-end ANN benchmarks                                            |
 | CUDA_ENABLE_KERNELINFO          | ON, OFF              | OFF | Enables `kernelinfo` in nvcc. This is useful for `compute-sanitizer`         |
 | CUDA_ENABLE_LINEINFO            | ON, OFF              | OFF | Enable the -lineinfo option for nvcc                                         |
 | CUDA_STATIC_RUNTIME             | ON, OFF              | OFF | Statically link the CUDA runtime                                             |
@@ -267,10 +262,10 @@ RAFT's CMake has the following configurable flags available:
 | DETECT_CONDA_ENV                | ON, OFF              | ON  | Enable detection of conda environment for dependencies                       |
 | raft_FIND_COMPONENTS            | compiled distributed |     | Configures the optional components as a space-separated list                 |
 | RAFT_COMPILE_LIBRARY            | ON, OFF              | ON if either BUILD_TESTS or BUILD_PRIMS_BENCH is ON; otherwise OFF | Compiles all `libraft` shared libraries (these are required for Googletests) |
-| RAFT_ENABLE_CUBLAS_DEPENDENCY   | ON, OFF              | ON  | Link against cublas library in `raft::raft`                                  | 
-| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against cusolver library in `raft::raft`                                | 
-| RAFT_ENABLE_CUSPARSE_DEPENDENCY | ON, OFF              | ON  | Link against cusparse library in `raft::raft`                                | 
-| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against curand library in `raft::raft`                                  | 
+| RAFT_ENABLE_CUBLAS_DEPENDENCY   | ON, OFF              | ON  | Link against cublas library in `raft::raft`                                  |
+| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against cusolver library in `raft::raft`                                |
+| RAFT_ENABLE_CUSPARSE_DEPENDENCY | ON, OFF              | ON  | Link against cusparse library in `raft::raft`                                |
+| RAFT_ENABLE_CUSOLVER_DEPENDENCY | ON, OFF              | ON  | Link against curand library in `raft::raft`                                  |
 | RAFT_NVTX                       | ON, OFF              | OFF | Enable NVTX Markers                                                          |
 
 ### Build documentation
@@ -316,4 +311,4 @@ The `raft::raft` CMake target is made available when including RAFT into your CM
 |-------------|---------------------|----------------------------------------------------------|----------------------------------------|
 | n/a         | `raft::raft`        | Full RAFT header library                                 | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
 | compiled    | `raft::compiled`    | Pre-compiled template instantiations and runtime library | raft::raft                             |
-| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL         
\ No newline at end of file
+| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs                      | raft::raft, UCX, NCCL
diff --git a/docs/source/index.rst b/docs/source/index.rst
index bee0e948ff..46ebd1b737 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -63,7 +63,6 @@ While not exhaustive, the following general categories help summarize the accele
    pylibraft_api.rst
    using_libraft.md
    vector_search_tutorial.md
-   raft_ann_benchmarks.md
    raft_dask_api.rst
    using_raft_comms.rst
    developer_guide.md
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
deleted file mode 100644
index 12a94e45ce..0000000000
--- a/docs/source/raft_ann_benchmarks.md
+++ /dev/null
@@ -1,597 +0,0 @@
-# RAFT ANN Benchmarks
-
-This project provides a benchmark program for various ANN search implementations. It's especially suitable for comparing GPU implementations as well as comparing GPU against CPU.
-
-> [!IMPORTANT]
-> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). As a result, `raft-ann-bench` is being migrated to `cuvs-bench` and will be removed from RAFT altogether in the 24.12 (December) release.
-
-
-## Table of Contents
-
-- [Installing the benchmarks](#installing-the-benchmarks)
-    - [Conda](#conda)
-    - [Docker](#docker)
-- [How to run the benchmarks](#how-to-run-the-benchmarks)
-  - [Step 1: prepare dataset](#step-1-prepare-dataset)
-  - [Step 2: build and search index](#step-2-build-and-search-index)
-  - [Step 3: data export](#step-3-data-export)
-  - [Step 4: plot results](#step-4-plot-results)
-- [Running the benchmarks](#running-the-benchmarks)
-  - [End to end: small-scale (<1M to 10M)](#end-to-end-small-scale-benchmarks-1m-to-10m)
-  - [End to end: large-scale (>10M)](#end-to-end-large-scale-benchmarks-10m-vectors)
-  - [Running with Docker containers](#running-with-docker-containers)
-  - [Evaluating the results](#evaluating-the-results)
-- [Creating and customizing dataset configurations](#creating-and-customizing-dataset-configurations)
-- [Adding a new ANN algorithm](#adding-a-new-ann-algorithm)
-- [Parameter tuning guide](https://docs.rapids.ai/api/raft/nightly/ann_benchmarks_param_tuning/)
-- [Wiki-all RAG/LLM Dataset](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/)
-
-## Installing the benchmarks
-
-There are two main ways pre-compiled benchmarks are distributed:
-
-- [Conda](#Conda): For users not using containers but want an easy to install and use Python package. Pip wheels are planned to be added as an alternative for users that cannot use conda and prefer to not use containers.
-- [Docker](#Docker): Only needs docker and [NVIDIA docker](https://github.com/NVIDIA/nvidia-docker) to use. Provides a single docker run command for basic dataset benchmarking, as well as all the functionality of the conda solution inside the containers.
-
-## Conda
-
-If containers are not an option or not preferred, the easiest way to install the ANN benchmarks is through conda. We provide packages for GPU enabled systems, as well for systems without a GPU. We suggest using mamba as it generally leads to a faster install time:
-
-```bash
-
-mamba create --name raft_ann_benchmarks
-conda activate raft_ann_benchmarks
-
-# to install GPU package:
-mamba install -c rapidsai -c conda-forge -c nvidia raft-ann-bench=<rapids_version> cuda-version=11.8*
-
-# to install CPU package for usage in CPU-only systems:
-mamba install -c rapidsai -c conda-forge  raft-ann-bench-cpu
-```
-
-The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired. The CPU package currently allows to run the HNSW benchmarks.
-
-Please see the [build instructions](ann_benchmarks_build.md) to build the benchmarks from source.
-
-## Docker
-
-We provide images for GPU enabled systems, as well as systems without a GPU. The following images are available:
-
-- `raft-ann-bench`: Contains GPU and CPU benchmarks, can run all algorithms supported. Will download million-scale datasets as required. Best suited for users that prefer a smaller container size for GPU based systems. Requires the NVIDIA Container Toolkit to run GPU algorithms, can run CPU algorithms without it.
-- `raft-ann-bench-datasets`: Contains the GPU and CPU benchmarks with million-scale datasets already included in the container. Best suited for users that want to run multiple million scale datasets already included in the image.
-- `raft-ann-bench-cpu`: Contains only CPU benchmarks with minimal size. Best suited for users that want the smallest containers to reproduce benchmarks on systems without a GPU.
-
-Nightly images are located in [dockerhub](https://hub.docker.com/r/rapidsai/raft-ann-bench/tags), meanwhile release (stable) versions are located in [NGC](https://hub.docker.com/r/rapidsai/raft-ann-bench), starting with release 23.12.
-
-- The following command pulls the nightly container for python version 10, cuda version 12, and RAFT version 23.10:
-
-```bash
-docker pull rapidsai/raft-ann-bench:24.12a-cuda12.0-py3.10 #substitute raft-ann-bench for the exact desired container.
-```
-
-The CUDA and python versions can be changed for the supported values:
-
-Supported CUDA versions: 11.2 and 12.0
-Supported Python versions: 3.9 and 3.10.
-
-You can see the exact versions as well in the dockerhub site:
-
-- [RAFT ANN Benchmark images](https://hub.docker.com/r/rapidsai/raft-ann-bench/tags)
-- [RAFT ANN Benchmark with datasets preloaded images](https://hub.docker.com/r/rapidsai/raft-ann-bench-cpu/tags)
-- [RAFT ANN Benchmark CPU only images](https://hub.docker.com/r/rapidsai/raft-ann-bench-datasets/tags)
-
-**Note:** GPU containers use the CUDA toolkit from inside the container, the only requirement is a driver installed on the host machine that supports that version. So, for example, CUDA 11.8 containers can run in systems with a CUDA 12.x capable driver. Please also note that the Nvidia-Docker runtime from the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker) is required to use GPUs inside docker containers.
-
-[//]: # (-  The following command &#40;only available after RAPIDS 23.10 release&#41; pulls the container:)
-
-[//]: # ()
-[//]: # (```bash)
-
-[//]: # (docker pull nvcr.io/nvidia/rapidsai/raft-ann-bench:24.12-cuda11.8-py3.10 #substitute raft-ann-bench for the exact desired container.)
-
-[//]: # (```)
-
-## How to run the benchmarks
-
-We provide a collection of lightweight Python scripts to run the benchmarks. There are 4 general steps to running the benchmarks and visualizing the results. 
-1. Prepare Dataset
-2. Build Index and Search Index
-3. Data Export
-4. Plot Results
-
-### Step 1: Prepare Dataset
-The script `raft_ann_bench.get_dataset` will download and unpack the dataset in directory
-that the user provides. As of now, only million-scale datasets are supported by this
-script. For more information on [datasets and formats](ann_benchmarks_dataset.md).
-
-The usage of this script is:
-```bash
-usage: get_dataset.py [-h] [--name NAME] [--dataset-path DATASET_PATH] [--normalize]
-
-options:
-  -h, --help            show this help message and exit
-  --dataset DATASET     dataset to download (default: glove-100-angular)
-  --dataset-path DATASET_PATH
-                        path to download dataset (default: ${RAPIDS_DATASET_ROOT_DIR})
-  --normalize           normalize cosine distance to inner product (default: False)
-```
-
-When option `normalize` is provided to the script, any dataset that has cosine distances
-will be normalized to inner product. So, for example, the dataset `glove-100-angular`
-will be written at location `datasets/glove-100-inner/`.
-
-### Step 2: Build and Search Index
-The script `raft_ann_bench.run` will build and search indices for a given dataset and its
-specified configuration.
-
-The usage of the script `raft_ann_bench.run` is:
-```bash
-usage: __main__.py [-h] [--subset-size SUBSET_SIZE] [-k COUNT] [-bs BATCH_SIZE] [--dataset-configuration DATASET_CONFIGURATION] [--configuration CONFIGURATION] [--dataset DATASET]
-                   [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS] [-f] [-m SEARCH_MODE]
-
-options:
-  -h, --help            show this help message and exit
-  --subset-size SUBSET_SIZE
-                        the number of subset rows of the dataset to build the index (default: None)
-  -k COUNT, --count COUNT
-                        the number of nearest neighbors to search for (default: 10)
-  -bs BATCH_SIZE, --batch-size BATCH_SIZE
-                        number of query vectors to use in each query trial (default: 10000)
-  --dataset-configuration DATASET_CONFIGURATION
-                        path to YAML configuration file for datasets (default: None)
-  --configuration CONFIGURATION
-                        path to YAML configuration file or directory for algorithms Any run groups found in the specified file/directory will automatically override groups of the same name
-                        present in the default configurations, including `base` (default: None)
-  --dataset DATASET     name of dataset (default: glove-100-inner)
-  --dataset-path DATASET_PATH
-                        path to dataset folder, by default will look in RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets subdirectory from the calling directory (default:
-                        os.getcwd()/datasets/)
-  --build
-  --search
-  --algorithms ALGORITHMS
-                        run only comma separated list of named algorithms. If parameters `groups` and `algo-groups are both undefined, then group `base` is run by default (default: None)
-  --groups GROUPS       run only comma separated groups of parameters (default: base)
-  --algo-groups ALGO_GROUPS
-                        add comma separated <algorithm>.<group> to run. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large" (default: None)
-  -f, --force           re-run algorithms even if their results already exist (default: False)
-  -m SEARCH_MODE, --search-mode SEARCH_MODE
-                        run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput)
-  -t SEARCH_THREADS, --search-threads SEARCH_THREADS
-                        specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is
-                        specified, then a single test is run with 'min' threads. By default min=1, max=<num hyper threads>. (default: None)
-  -r, --dry-run         dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but
-                        will not actually execute the command. (default: False)
-```
-
-`dataset`: name of the dataset to be searched in [datasets.yaml](#yaml-dataset-config)
-
-`dataset-configuration`: optional filepath to custom dataset YAML config which has an entry for arg `dataset`
-
-`configuration`: optional filepath to YAML configuration for an algorithm or to directory that contains YAML configurations for several algorithms. [Here's how to configure an algorithm.](#yaml-algo-config)
-
-`algorithms`: runs all algorithms that it can find in YAML configs found by `configuration`. By default, only `base` group will be run.
-
-`groups`: run only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group
-
-`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to run the benchmark for in addition to all the arguments from `algorithms` and `groups`. It is of the format `<algorithm>.<group>`, or for example, `raft_cagra.large`
-
-For every algorithm run by this script, it outputs an index build statistics JSON file in `<dataset-path/<dataset>/result/build/<{algo},{group}.json>`
-and an index search statistics JSON file in `<dataset-path/<dataset>/result/search/<{algo},{group},k{k},bs{batch_size}.json>`. NOTE: The filenames will not have ",{group}" if `group = "base"`.
-
-`dataset-path` :
-1. data is read from `<dataset-path>/<dataset>`
-2. indices are built in `<dataset-path>/<dataset>/index`
-3. build/search results are stored in `<dataset-path>/<dataset>/result`
-
-`build` and `search` : if both parameters are not supplied to the script then
-it is assumed both are `True`.
-
-`indices` and `algorithms` : these parameters ensure that the algorithm specified for an index
-is available in `algos.yaml` and not disabled, as well as having an associated executable.
-
-### Step 3: Data Export
-The script `raft_ann_bench.data_export` will convert the intermediate JSON outputs produced by `raft_ann_bench.run` to more
-easily readable CSV files, which are needed to build charts made by `raft_ann_bench.plot`.
-
-```bash
-usage: data_export.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH]
-
-options:
-  -h, --help            show this help message and exit
-  --dataset DATASET     dataset to download (default: glove-100-inner)
-  --dataset-path DATASET_PATH
-                        path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR})
-```
-Build statistics CSV file is stored in `<dataset-path/<dataset>/result/build/<{algo},{group}.csv>`
-and index search statistics CSV file in `<dataset-path/<dataset>/result/search/<{algo},{group},k{k},bs{batch_size},{suffix}.csv>`, where suffix has three values:
-1. `raw`: All search results are exported
-2. `throughput`: Pareto frontier of throughput results is exported
-3. `latency`: Pareto frontier of latency results is exported
-
-
-### Step 4: Plot Results
-The script `raft_ann_bench.plot` will plot results for all algorithms found in index search statistics
-CSV files `<dataset-path/<dataset>/result/search/*.csv`.
-
-The usage of this script is:
-```bash
-usage:  [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filepath OUTPUT_FILEPATH] [--algorithms ALGORITHMS] [--groups GROUPS] [--algo-groups ALGO_GROUPS]
-        [-k COUNT] [-bs BATCH_SIZE] [--build] [--search] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--x-start X_START] [--mode {throughput,latency}]
-        [--time-unit {s,ms,us}] [--raw]
-
-options:
-  -h, --help            show this help message and exit
-  --dataset DATASET     dataset to plot (default: glove-100-inner)
-  --dataset-path DATASET_PATH
-                        path to dataset folder (default: /home/coder/raft/datasets/)
-  --output-filepath OUTPUT_FILEPATH
-                        directory for PNG to be saved (default: /home/coder/raft)
-  --algorithms ALGORITHMS
-                        plot only comma separated list of named algorithms. If parameters `groups` and `algo-groups are both undefined, then group `base` is plot by default
-                        (default: None)
-  --groups GROUPS       plot only comma separated groups of parameters (default: base)
-  --algo-groups ALGO_GROUPS, --algo-groups ALGO_GROUPS
-                        add comma separated <algorithm>.<group> to plot. Example usage: "--algo-groups=raft_cagra.large,hnswlib.large" (default: None)
-  -k COUNT, --count COUNT
-                        the number of nearest neighbors to search for (default: 10)
-  -bs BATCH_SIZE, --batch-size BATCH_SIZE
-                        number of query vectors to use in each query trial (default: 10000)
-  --build
-  --search
-  --x-scale X_SCALE     Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear)
-  --y-scale {linear,log,symlog,logit}
-                        Scale to use when drawing the Y-axis (default: linear)
-  --x-start X_START     Recall values to start the x-axis from (default: 0.8)
-  --mode {throughput,latency}
-                        search mode whose Pareto frontier is used on the y-axis (default: throughput)
-  --time-unit {s,ms,us}
-                        time unit to plot when mode is latency (default: ms)
-  --raw                 Show raw results (not just Pareto frontier) of mode arg (default: False)
-```
-`mode`: plots pareto frontier of `throughput` or `latency` results exported in the previous step
-
-`algorithms`: plots all algorithms that it can find results for the specified `dataset`. By default, only `base` group will be plotted.
-
-`groups`: plot only specific groups of parameters configurations for an algorithm. Groups are defined in YAML configs (see `configuration`), and by default run `base` group
-
-`algo-groups`: this parameter is helpful to append any specific algorithm+group combination to plot results for in addition to all the arguments from `algorithms` and `groups`. It is of the format `<algorithm>.<group>`, or for example, `raft_cagra.large`
-
-The figure below is the resulting plot of running our benchmarks as of August 2023 for a batch size of 10, on an NVIDIA H100 GPU and an Intel Xeon Platinum 8480CL CPU. It presents the throughput (in Queries-Per-Second) performance for every level of recall.
-
-![Throughput vs recall plot comparing popular ANN algorithms with RAFT's at batch size 10](../../img/raft-vector-search-batch-10.png)
-
-## Running the benchmarks
-
-### End to end: small-scale benchmarks (<1M to 10M)
-
-The steps below demonstrate how to download, install, and run benchmarks on a subset of 10M vectors from the Yandex Deep-1B dataset By default the datasets will be stored and used from the folder indicated by the `RAPIDS_DATASET_ROOT_DIR` environment variable if defined, otherwise a datasets sub-folder from where the script is being called:
-
-```bash
-
-# (1) prepare dataset.
-python -m raft_ann_bench.get_dataset --dataset deep-image-96-angular --normalize
-
-# (2) build and search index
-python -m raft_ann_bench.run --dataset deep-image-96-inner --algorithms raft_cagra --batch-size 10 -k 10
-
-# (3) export data
-python -m raft_ann_bench.data_export --dataset deep-image-96-inner
-
-# (4) plot results
-python -m raft_ann_bench.plot --dataset deep-image-96-inner
-```
-
-Configuration files already exist for the following list of the million-scale datasets. Please refer to [ann-benchmarks datasets](https://github.com/erikbern/ann-benchmarks/#data-sets) for more information, including actual train and sizes. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `$CONDA_PREFIX/lib/python3.xx/site-packages/raft_ann_bench/run/conf`, or you can specify the `--configuration` option to use a specific file.
-
-| Dataset Name | Train Rows | Columns | Test Rows      | Distance   | 
-|-----|------------|----|----------------|------------|
-| `deep-image-96-angular` | 10M        | 96 | 10K            | Angular    |
-| `fashion-mnist-784-euclidean` | 60K        | 784 | 10K |  Euclidean |
-| `glove-50-angular` | 1.1M       | 50 | 10K | Angular |
-| `glove-100-angular` | 1.1M | 100 | 10K | Angular |
-| `mnist-784-euclidean` | 60K | 784 | 10K | Euclidean |
-| `nytimes-256-angular` | 290K | 256 | 10K | Angular |
-| `sift-128-euclidean` | 1M | 128 | 10K | Euclidean|
-
-All of the datasets above contain ground test datasets with 100 neighbors. Thus `k` for these datasets must be  less than or equal to 100.
-
-### End to end: large-scale benchmarks (>10M vectors)
-
-`raft_ann_bench.get_dataset` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale)
-due to their size. You should instead use our billion-scale datasets guide to download and prepare them.
-All other python commands mentioned below work as intended once the
-billion-scale dataset has been downloaded.
-To download billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-benchmarks.com/neurips21.html)
-
-We also provide a new dataset called `wiki-all` containing 88 million 768-dimensional vectors. This dataset is meant for benchmarking a realistic retrieval-augmented generation (RAG)/LLM embedding size at scale. It also contains 1M and 10M vector subsets for smaller-scale experiments. See our [Wiki-all Dataset Guide](https://docs.rapids.ai/api/raft/nightly/wiki_all_dataset/) for more information and to download the dataset.
-
-The steps below demonstrate how to download, install, and run benchmarks on a subset of 100M vectors from the Yandex Deep-1B dataset. Please note that datasets of this scale are recommended for GPUs with larger amounts of memory, such as the A100 or H100. 
-```bash
-
-mkdir -p datasets/deep-1B
-# (1) prepare dataset
-# download manually "Ground Truth" file of "Yandex DEEP"
-# suppose the file name is deep_new_groundtruth.public.10K.bin
-python -m raft_ann_bench.split_groundtruth --groundtruth datasets/deep-1B/deep_new_groundtruth.public.10K.bin
-# two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
-
-# (2) build and search index
-python -m raft_ann_bench.run --dataset deep-1B --algorithms raft_cagra --batch-size 10 -k 10
-
-# (3) export data
-python -m raft_ann_bench.data_export --dataset deep-1B
-
-# (4) plot results
-python -m raft_ann_bench.plot --dataset deep-1B
-```
-
-The usage of `python -m raft_ann_bench.split_groundtruth` is:
-```bash
-usage: split_groundtruth.py [-h] --groundtruth GROUNDTRUTH
-
-options:
-  -h, --help            show this help message and exit
-  --groundtruth GROUNDTRUTH
-                        Path to billion-scale dataset groundtruth file (default: None)
-```
-
-### Running with Docker containers
-
-Two methods are provided for running the benchmarks with the Docker containers. 
-
-#### End-to-end run on GPU
-
-When no other entrypoint is provided, an end-to-end script will run through all the steps in [Running the benchmarks](#running-the-benchmarks) above. 
-
-For GPU-enabled systems, the `DATA_FOLDER` variable should be a local folder where you want datasets stored in `$DATA_FOLDER/datasets` and results in `$DATA_FOLDER/result` (we highly recommend `$DATA_FOLDER` to be a dedicated folder for the datasets and results of the containers):
-```bash
-export DATA_FOLDER=path/to/store/datasets/and/results
-docker run --gpus all --rm -it -u $(id -u)                      \
-    -v $DATA_FOLDER:/data/benchmarks                            \
-    rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10              \
-    "--dataset deep-image-96-angular"                           \
-    "--normalize"                                               \
-    "--algorithms raft_cagra,raft_ivf_pq --batch-size 10 -k 10" \
-    ""
-```
-
-Usage of the above command is as follows:
-
-| Argument                                                  | Description                                                                                        |
-|-----------------------------------------------------------|----------------------------------------------------------------------------------------------------|
-| `rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10`          | Image to use. Can be either `raft-ann-bench` or `raft-ann-bench-datasets`                          |
-| `"--dataset deep-image-96-angular"`                       | Dataset name                                                                                       |
-| `"--normalize"`                                           | Whether to normalize the dataset                                                                   |
-| `"--algorithms raft_cagra,hnswlib --batch-size 10 -k 10"` | Arguments passed to the `run` script, such as the algorithms to benchmark, the batch size, and `k` |
-| `""`                                                      | Additional (optional) arguments that will be passed to the `plot` script.                          |
-
-***Note about user and file permissions:*** The flag `-u $(id -u)` allows the user inside the container to match the `uid` of the user outside the container, allowing the container to read and write to the mounted volume indicated by the `$DATA_FOLDER` variable.
-
-#### End-to-end run on CPU
-
-The container arguments in the above section also be used for the CPU-only container, which can be used on systems that don't have a GPU installed. 
-
-***Note:*** the image changes to `raft-ann-bench-cpu` container and the `--gpus all` argument is no longer used:
-```bash
-export DATA_FOLDER=path/to/store/datasets/and/results
-docker run  --rm -it -u $(id -u)                  \
-    -v $DATA_FOLDER:/data/benchmarks              \
-    rapidsai/raft-ann-bench-cpu:24.12a-py3.10     \
-     "--dataset deep-image-96-angular"            \
-     "--normalize"                                \
-     "--algorithms hnswlib --batch-size 10 -k 10" \
-     ""
-```
-
-#### Manually run the scripts inside the container
-
-All of the `raft-ann-bench` images contain the Conda packages, so they can be used directly by logging directly into the container itself:
-
-```bash
-export DATA_FOLDER=path/to/store/datasets/and/results
-docker run --gpus all --rm -it -u $(id -u)          \
-    --entrypoint /bin/bash                          \
-    --workdir /data/benchmarks                      \
-    -v $DATA_FOLDER:/data/benchmarks                \
-    rapidsai/raft-ann-bench:24.12a-cuda11.8-py3.10 
-```
-
-This will drop you into a command line in the container, with the `raft-ann-bench` python package ready to use, as described in the [Running the benchmarks](#running-the-benchmarks) section above:
-
-```
-(base) root@00b068fbb862:/data/benchmarks# python -m raft_ann_bench.get_dataset --dataset deep-image-96-angular --normalize
-```
-
-Additionally, the containers can be run in detached mode without any issue.
-
-
-### Evaluating the results
-
-The benchmarks capture several different measurements. The table below describes each of the measurements for index build benchmarks:
-
-| Name       | Description                                            | 
-|------------|--------------------------------------------------------|
-| Benchmark  | A name that uniquely identifies the benchmark instance | 
-| Time       | Wall-time spent training the index                     | 
-| CPU        | CPU time spent training the index                      |
-| Iterations | Number of iterations (this is usually 1)               |
-| GPU        | GPU time spent building                                |
-| index_size | Number of vectors used to train index |
-
-
-The table below describes each of the measurements for the index search benchmarks. The most important measurements `Latency`, `items_per_second`, `end_to_end`.
-
-| Name       | Description                                                                                                                                           |
-|------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Benchmark  | A name that uniquely identifies the benchmark instance                                                                                                |
-| Time       | The wall-clock time of a single iteration (batch) divided by the number of threads.                                                                   |
-| CPU        | The average CPU time (user + sys time). This does not include idle time (which can also happen while waiting for GPU sync).                           |
-| Iterations | Total number of batches. This is going to be `total_queries` / `n_queries`.                                                                            | 
-| GPU        | GPU latency of a single batch (seconds). In throughput mode this is averaged over multiple threads.                                                   |
-| Latency    | Latency of a single batch (seconds), calculated from wall-clock time. In throughput mode this is averaged over multiple threads.                       |
-| Recall     | Proportion of correct neighbors to ground truth neighbors. Note this column is only present if groundtruth file is specified in dataset configuration.|
-| items_per_second | Total throughput, a.k.a Queries per second (QPS). This is approximately `total_queries` / `end_to_end`.                                         |
-| k          | Number of neighbors being queried in each iteration                                                                                                   |
-| end_to_end | Total time taken to run all batches for all iterations                                                                                                | 
-| n_queries  | Total number of query vectors in each batch                                                                                                           |
-| total_queries | Total number of vectors queries across all iterations ( = `iterations` * `n_queries`)                                                                 |
-
-Note the following:
-- A slightly different method is used to measure `Time` and `end_to_end`. That is why `end_to_end` = `Time` * `Iterations` holds only approximately.
-- The actual table displayed on the screen may differ slightly as the hyper-parameters will also be displayed for each different combination being benchmarked.
-- Recall calculation: the number of queries processed per test depends on the number of iterations. Because of this, recall can show slight fluctuations if less neighbors are processed then it is available for the benchmark. 
-
-## Creating and customizing dataset configurations
-
-A single configuration will often define a set of algorithms, with associated index and search parameters, that can be generalize across datasets. We use YAML to define dataset specific and algorithm specific configurations.
-
-<a id='yaml-dataset-config'></a>A default `datasets.yaml` is provided by RAFT in `${RAFT_HOME}/python/raft-ann-bench/src/raft_ann_bench/run/conf` with configurations available for several datasets. Here's a simple example entry for the `sift-128-euclidean` dataset:
-
-```yaml
-- name: sift-128-euclidean
-  base_file: sift-128-euclidean/base.fbin
-  query_file: sift-128-euclidean/query.fbin
-  groundtruth_neighbors_file: sift-128-euclidean/groundtruth.neighbors.ibin
-  dims: 128
-  distance: euclidean
-```
-
-<a id='yaml-algo-config'></a>Configuration files for ANN algorithms supported by `raft-ann-bench` are provided in `${RAFT_HOME}/python/raft-ann-bench/src/raft_ann_bench/run/conf`. `raft_cagra` algorithm configuration looks like:
-```yaml
-name: raft_cagra
-groups:
-  base:
-    build:
-      graph_degree: [32, 64]
-      intermediate_graph_degree: [64, 96]
-      graph_build_algo: ["NN_DESCENT"]
-    search:
-      itopk: [32, 64, 128]
-
-  large:
-    build:
-      graph_degree: [32, 64]
-    search:
-      itopk: [32, 64, 128]
-```
-The default parameters for which the benchmarks are run can be overridden by creating a custom YAML file for algorithms with a `base` group.
-
-There config above has 2 fields:
-1. `name` - define the name of the algorithm for which the parameters are being specified.
-2. `groups` - define a run group which has a particular set of parameters. Each group helps create a cross-product of all hyper-parameter fields for `build` and `search`.
-
-The table below contains all algorithms supported by RAFT. Each unique algorithm will have its own set of `build` and `search` settings. The [ANN Algorithm Parameter Tuning Guide](ann_benchmarks_param_tuning.md) contains detailed instructions on choosing build and search parameters for each supported algorithm.
-
-| Library   | Algorithms                                                                            |
-|-----------|---------------------------------------------------------------------------------------|
-| FAISS GPU | `faiss_gpu_flat`, `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq`                            |
-| FAISS CPU | `faiss_cpu_flat`, `faiss_cpu_ivf_flat`, `faiss_cpu_ivf_pq`                            |
-| GGNN      | `ggnn`                                                                                |
-| HNSWlib   | `hnswlib`                                                                             |
-| RAFT      | `raft_brute_force`, `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq`, `raft_cagra_hnswlib`|
-
-## Adding a new ANN algorithm
-
-### Implementation and Configuration
-Implementation of a new algorithm should be a C++ class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
-
-In addition, it should define two `struct`s for building and searching parameters. The searching parameter class should inherit `struct ANN<T>::AnnSearchParam`. Take `class HnswLib` as an example, its definition is:
-```c++
-template<typename T>
-class HnswLib : public ANN<T> {
-public:
-  struct BuildParam {
-    int M;
-    int ef_construction;
-    int num_threads;
-  };
-
-  using typename ANN<T>::AnnSearchParam;
-  struct SearchParam : public AnnSearchParam {
-    int ef;
-    int num_threads;
-  };
-
-  // ...
-};
-```
-
-<a id='json-index-config'></a>The benchmark program uses JSON format in a configuration file to specify indexes to build, along with the build and search parameters. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. The `build_param` and `search_param` arguments will vary depending on the algorithm.  Take the configuration for `HnswLib` as an example:
-```json
-{
-  "name" : "hnswlib.M12.ef500.th32",
-  "algo" : "hnswlib",
-  "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-  "file" : "/path/to/file",
-  "search_params" : [
-    {"ef":10, "numThreads":1},
-    {"ef":20, "numThreads":1},
-    {"ef":40, "numThreads":1},
-  ],
-  "search_result_file" : "/path/to/file"
-},
-```
-How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
-1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
-    ```c++
-    template<typename T>
-    void parse_build_param(const nlohmann::json& conf,
-                           typename cuann::HnswLib<T>::BuildParam& param) {
-      param.ef_construction = conf.at("efConstruction");
-      param.M = conf.at("M");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-
-    template<typename T>
-    void parse_search_param(const nlohmann::json& conf,
-                            typename cuann::HnswLib<T>::SearchParam& param) {
-      param.ef = conf.at("ef");
-      if (conf.contains("numThreads")) {
-        param.num_threads = conf.at("numThreads");
-      }
-    }
-    ```
-
-2. Next, add corresponding `if` case to functions `create_algo()` (in `cpp/bench/ann/) and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
-    ```c++
-      // JSON configuration file contains a line like:  "algo" : "hnswlib"
-      if (algo == "hnswlib") {
-         // ...
-      }
-    ```
-
-
-### Adding a CMake Target
-In `raft/cpp/bench/ann/CMakeLists.txt`, we provide a `CMake` function to configure a new Benchmark target with the following signature:
-```
-ConfigureAnnBench(
-  NAME <algo_name> 
-  PATH </path/to/algo/benchmark/source/file> 
-  INCLUDES <additional_include_directories> 
-  CXXFLAGS <additional_cxx_flags>
-  LINKS <additional_link_library_targets>
-)
-```
-
-To add a target for `HNSWLIB`, we would call the function as:
-```
-ConfigureAnnBench(
-  NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES
-  ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib CXXFLAGS "${HNSW_CXX_FLAGS}"
-)
-```
-
-This will create an executable called `HNSWLIB_ANN_BENCH`, which can then be used to run `HNSWLIB` benchmarks.
-
-Add a new entry to `algos.yaml` to map the name of the algorithm to its binary executable and specify whether the algorithm requires GPU support.
-```yaml
-raft_ivf_pq:
-  executable: RAFT_IVF_PQ_ANN_BENCH
-  requires_gpu: true
-```
-
-`executable` : specifies the name of the binary that will build/search the index. It is assumed to be
-available in `raft/cpp/build/`.
-`requires_gpu` : denotes whether an algorithm requires GPU to run.
diff --git a/docs/source/vector_search_tutorial.md b/docs/source/vector_search_tutorial.md
index d1d5c57700..8f7b2d1bfd 100644
--- a/docs/source/vector_search_tutorial.md
+++ b/docs/source/vector_search_tutorial.md
@@ -17,7 +17,7 @@
 
 RAFT has several important algorithms for performing vector search on the GPU and this tutorial walks through the primary vector search APIs from start to finish to provide a reference for quick setup and C++ API usage.
 
-This tutorial assumes RAFT has been installed and/or added to your build so that you are able to compile and run RAFT code. If not done already, please follow the [build and install instructions](build.md) and consider taking a look at the [example c++ template project](https://github.com/rapidsai/raft/tree/HEAD/cpp/template) for ready-to-go examples that you can immediately build and start playing with. Also take a look at RAFT's library of [reproducible vector search benchmarks](raft_ann_benchmarks.md) to run benchmarks that compare RAFT against other state-of-the-art nearest neighbors algorithms at scale.
+This tutorial assumes RAFT has been installed and/or added to your build so that you are able to compile and run RAFT code. If not done already, please follow the [build and install instructions](build.md) and consider taking a look at the [example c++ template project](https://github.com/rapidsai/raft/tree/HEAD/cpp/template) for ready-to-go examples that you can immediately build and start playing with.
 
 For more information about the various APIs demonstrated in this tutorial, along with comprehensive usage examples of all the APIs offered by RAFT, please refer to the [RAFT's C++ API Documentation](https://docs.rapids.ai/api/raft/nightly/cpp_api/).
 
@@ -271,7 +271,7 @@ auto removed_indices = raft::make_device_vector<IdxT>(res, n_removed_indices);
 raft::core::bitset<std::uint32_t, IdxT> removed_indices_bitset(
   res, removed_indices.view(), dataset.extent(0));
 
-// ... Populate the bitset ... 
+// ... Populate the bitset ...
 
 // search K nearest neighbours according to a bitset filter
 auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k);
@@ -406,4 +406,4 @@ The below example specifies the total number of bytes that RAFT can use for temp
 
 std::shared_ptr<rmm::mr::managed_memory_resource> managed_resource;
 raft::device_resource res(managed_resource, std::make_optional<std::size_t>(3 * 1024^3));
-```
\ No newline at end of file
+```
diff --git a/docs/source/wiki_all_dataset.md b/docs/source/wiki_all_dataset.md
deleted file mode 100644
index c001bdc409..0000000000
--- a/docs/source/wiki_all_dataset.md
+++ /dev/null
@@ -1,47 +0,0 @@
-# Wiki-all Dataset
-
-The `wiki-all` dataset was created to stress vector search algorithms at scale with both a large number of vectors and dimensions. The entire dataset contains 88M vectors with 768 dimensions and is meant for testing the types of vectors one would typically encounter in retrieval augmented generation (RAG) workloads. The full dataset is ~251GB in size, which is intentionally larger than the typical memory of GPUs. The massive scale is intended to promote the use of compression and efficient out-of-core methods for both indexing and search.
-
-The dataset is composed of English wiki texts from [Kaggle](https://www.kaggle.com/datasets/jjinho/wikipedia-20230701) and multi-lingual wiki texts from [Cohere Wikipedia](https://huggingface.co/datasets/Cohere/wikipedia-22-12). 
-
-Cohere's English Texts are older (2022) and smaller than the Kaggle English Wiki texts (2023) so the English texts have been removed from Cohere completely. The final Wiki texts include English Wiki from Kaggle and the other languages from Cohere. The English texts constitute 50% of the total text size. 
-
-To form the final dataset, the Wiki texts were chunked into 85 million 128-token pieces. For reference, Cohere chunks Wiki texts into 104-token pieces. Finally, the embeddings of each chunk were computed using the [paraphrase-multilingual-mpnet-base-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2) embedding model. The resulting dataset is an embedding matrix of size 88 million by 768. Also included with the dataset is a query file containing 10k query vectors and a groundtruth file to evaluate nearest neighbors algorithms.
-
-## Getting the dataset
-
-### Full dataset
-
-A version of the dataset is made available in the binary format that can be used directly by the [raft-ann-bench](https://docs.rapids.ai/api/raft/nightly/raft_ann_benchmarks/) tool. The full 88M dataset is ~251GB and the download link below contains tarballs that have been split into multiple parts.
-
-The following will download all 10 the parts and untar them to a `wiki_all_88M` directory:
-```bash
-curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.{00..9} | tar -xf - -C wiki_all_88M/
-```
-
-The above has the unfortunate drawback that if the command should fail for any reason, all the parts need to be re-downloaded. The files can also be downloaded individually and then untarred to the directory. Each file is ~27GB and there are 10 of them.
-
-```bash
-curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.00
-...
-curl -s https://data.rapids.ai/raft/datasets/wiki_all/wiki_all.tar.09
-
-cat wiki_all.tar.* | tar -xf - -C wiki_all_88M/
-```
-
-### 1M and 10M subsets
-
-Also available are 1M and 10M subsets of the full dataset which are 2.9GB and 29GB, respectively. These subsets also include query sets of 10k vectors and corresponding groundtruth files. 
-
-```bash
-curl -s https://data.rapids.ai/raft/datasets/wiki_all_1M/wiki_all_1M.tar
-curl -s https://data.rapids.ai/raft/datasets/wiki_all_10M/wiki_all_10M.tar
-```
-
-## Using the dataset
-
-After the dataset is downloaded and extracted to the `wiki_all_88M` directory (or `wiki_all_1M`/`wiki_all_10M` depending on whether the subsets are used), the files can be used in the benchmarking tool. The dataset name is `wiki_all` (or `wiki_all_1M`/`wiki_all_10M`), and the benchmarking tool can be used by specifying the appropriate name `--dataset wiki_all_88M` in the scripts. 
-
-## License info
-
-The English wiki texts available on Kaggle come with the [CC BY-NCSA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/) license and the Cohere wikipedia data set comes with the [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) license.
\ No newline at end of file
diff --git a/python/pylibraft/CMakeLists.txt b/python/pylibraft/CMakeLists.txt
index c286d3debf..3e3cc15221 100644
--- a/python/pylibraft/CMakeLists.txt
+++ b/python/pylibraft/CMakeLists.txt
@@ -53,7 +53,6 @@ if(NOT raft_FOUND)
 
   set(BUILD_TESTS OFF)
   set(BUILD_PRIMS_BENCH OFF)
-  set(BUILD_ANN_BENCH OFF)
   set(RAFT_COMPILE_LIBRARY ON)
   set(CUDA_STATIC_RUNTIME ON)
   set(CUDA_STATIC_MATH_LIBRARIES ON)
diff --git a/python/raft-ann-bench/LICENSE b/python/raft-ann-bench/LICENSE
deleted file mode 120000
index 30cff7403d..0000000000
--- a/python/raft-ann-bench/LICENSE
+++ /dev/null
@@ -1 +0,0 @@
-../../LICENSE
\ No newline at end of file
diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml
deleted file mode 100644
index 0e4fda1f00..0000000000
--- a/python/raft-ann-bench/pyproject.toml
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-[build-system]
-build-backend = "rapids_build_backend.build"
-requires = [
-    "rapids-build-backend>=0.3.0,<0.4.0.dev0",
-    "setuptools",
-    "wheel",
-] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-
-[project]
-name = "raft-ann-bench"
-dynamic = ["version"]
-description = "RAFT ANN benchmarks"
-authors = [
-    { name = "NVIDIA Corporation" },
-]
-license = { text = "Apache 2.0" }
-requires-python = ">=3.10"
-dependencies = [
-] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-classifiers = [
-    "Intended Audience :: Developers",
-    "Topic :: Database",
-    "Topic :: Scientific/Engineering",
-    "License :: OSI Approved :: Apache Software License",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-]
-
-[project.urls]
-Homepage = "https://github.com/rapidsai/raft"
-
-[tool.setuptools.packages.find]
-where = ["src"]
-
-[tool.setuptools.package-data]
-"*" = ["*.*", "VERSION"]
-
-[tool.isort]
-line_length = 79
-multi_line_output = 3
-include_trailing_comma = true
-force_grid_wrap = 0
-combine_as_imports = true
-order_by_type = true
-skip = [
-    "thirdparty",
-    ".eggs",
-    ".git",
-    ".hg",
-    ".mypy_cache",
-    ".tox",
-    ".venv",
-    "_build",
-    "buck-out",
-    "build",
-    "dist",
-]
-
-[tool.setuptools.dynamic]
-version = { file = "raft_ann_bench/VERSION" }
-
-[tool.rapids-build-backend]
-build-backend = "setuptools.build_meta"
-requires = []
-dependencies-file = "../../dependencies.yaml"
-commit-files = ["src/raft_ann_bench/GIT_COMMIT"]
-matrix-entry = "cuda_suffixed=true"
diff --git a/python/raft-ann-bench/src/raft_ann_bench/VERSION b/python/raft-ann-bench/src/raft_ann_bench/VERSION
deleted file mode 120000
index a4e948506b..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-../../../../VERSION
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/__init__.py b/python/raft-ann-bench/src/raft_ann_bench/__init__.py
deleted file mode 100644
index 80a3b3f284..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from ._version import __git_commit__, __version__
diff --git a/python/raft-ann-bench/src/raft_ann_bench/_version.py b/python/raft-ann-bench/src/raft_ann_bench/_version.py
deleted file mode 100644
index 0fa0ba80bc..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/_version.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import importlib.resources
-
-__version__ = (
-    importlib.resources.files(__package__)
-    .joinpath("VERSION")
-    .read_text()
-    .strip()
-)
-try:
-    __git_commit__ = (
-        importlib.resources.files(__package__)
-        .joinpath("GIT_COMMIT")
-        .read_text()
-        .strip()
-    )
-except FileNotFoundError:
-    __git_commit__ = ""
-
-__all__ = ["__version__", "__git_commit__"]
diff --git a/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py b/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py
deleted file mode 100644
index e94ee56c92..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/constraints/__init__.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-DTYPE_SIZES = {"float": 4, "half": 2, "fp8": 1}
-
-
-def raft_cagra_build_constraints(params, dims):
-    if "graph_degree" in params and "intermediate_graph_degree" in params:
-        return params["graph_degree"] <= params["intermediate_graph_degree"]
-    return True
-
-
-def raft_ivf_pq_build_constraints(params, dims):
-    if "pq_dim" in params:
-        return params["pq_dim"] <= dims
-    return True
-
-
-def raft_ivf_pq_search_constraints(params, build_params, k, batch_size):
-    ret = True
-    if "internalDistanceDtype" in params and "smemLutDtype" in params:
-        ret = (
-            DTYPE_SIZES[params["smemLutDtype"]]
-            <= DTYPE_SIZES[params["internalDistanceDtype"]]
-        )
-
-    if "nlist" in build_params and "nprobe" in params:
-        ret = ret and build_params["nlist"] >= params["nprobe"]
-    return ret
-
-
-def raft_cagra_search_constraints(params, build_params, k, batch_size):
-    ret = True
-    if "itopk" in params:
-        ret = ret and params["itopk"] >= k
-    return ret
-
-
-def hnswlib_search_constraints(params, build_params, k, batch_size):
-    if "ef" in params:
-        return params["ef"] >= k
-
-
-def faiss_gpu_ivf_pq_build_constraints(params, dims):
-    ret = True
-    # M must be defined
-    ret = params["M"] <= dims and dims % params["M"] == 0
-    if "use_raft" in params and params["use_raft"]:
-        return ret
-    pq_bits = 8
-    if "bitsPerCode" in params:
-        pq_bits = params["bitsPerCode"]
-    lookup_table_size = 4
-    if "useFloat16" in params and params["useFloat16"]:
-        lookup_table_size = 2
-    # FAISS constraint to check if lookup table fits in shared memory
-    # for now hard code maximum shared memory per block to 49 kB (the value for A100 and V100)
-    return ret and lookup_table_size * params["M"] * (2**pq_bits) <= 49152
-
-
-def faiss_gpu_ivf_pq_search_constraints(params, build_params, k, batch_size):
-    ret = True
-    if "nlist" in build_params and "nprobe" in params:
-        ret = ret and build_params["nlist"] >= params["nprobe"]
-    return ret
diff --git a/python/raft-ann-bench/src/raft_ann_bench/data_export/__main__.py b/python/raft-ann-bench/src/raft_ann_bench/data_export/__main__.py
deleted file mode 100644
index c8a6375577..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/data_export/__main__.py
+++ /dev/null
@@ -1,257 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import argparse
-import json
-import os
-import sys
-import traceback
-import warnings
-
-import pandas as pd
-
-skip_build_cols = set(
-    [
-        "algo_name",
-        "index_name",
-        "time",
-        "name",
-        "family_index",
-        "per_family_instance_index",
-        "run_name",
-        "run_type",
-        "repetitions",
-        "repetition_index",
-        "iterations",
-        "real_time",
-        "time_unit",
-        "index_size",
-    ]
-)
-
-skip_search_cols = (
-    set(["recall", "qps", "latency", "items_per_second", "Recall", "Latency"])
-    | skip_build_cols
-)
-
-metrics = {
-    "k-nn": {
-        "description": "Recall",
-        "worst": float("-inf"),
-        "lim": [0.0, 1.03],
-    },
-    "throughput": {
-        "description": "Queries per second (1/s)",
-        "worst": float("-inf"),
-    },
-    "latency": {
-        "description": "Search Latency (s)",
-        "worst": float("inf"),
-    },
-}
-
-
-def read_file(dataset, dataset_path, method):
-    dir = os.path.join(dataset_path, dataset, "result", method)
-    for file in os.listdir(dir):
-        if file.endswith(".json"):
-            with open(
-                os.path.join(dir, file), "r", encoding="ISO-8859-1"
-            ) as f:
-                try:
-                    data = json.load(f)
-                    df = pd.DataFrame(data["benchmarks"])
-                    filename_split = file.split(",")
-                    algo_name = (filename_split[0], filename_split[1])
-                    yield os.path.join(dir, file), algo_name, df
-                except Exception as e:
-                    print(
-                        "An error occurred processing file %s (%s). "
-                        "Skipping..." % (file, e)
-                    )
-
-
-def convert_json_to_csv_build(dataset, dataset_path):
-    for file, algo_name, df in read_file(dataset, dataset_path, "build"):
-        try:
-            if "base" in algo_name[1]:
-                algo_name = algo_name[0]
-            else:
-                algo_name = "_".join(algo_name)
-            df["name"] = df["name"].str.split("/").str[0]
-            write = pd.DataFrame(
-                {
-                    "algo_name": [algo_name] * len(df),
-                    "index_name": df["name"],
-                    "time": df["real_time"],
-                }
-            )
-            for name in df:
-                if name not in skip_build_cols:
-                    write[name] = df[name]
-            write.to_csv(file.replace(".json", ".csv"), index=False)
-        except Exception as e:
-            print(
-                "An error occurred processing file %s (%s). Skipping..."
-                % (file, e)
-            )
-            traceback.print_exc()
-
-
-def create_pointset(data, xn, yn):
-    xm, ym = (metrics[xn], metrics[yn])
-    rev_y = -1 if ym["worst"] < 0 else 1
-    rev_x = -1 if xm["worst"] < 0 else 1
-
-    y_idx = 3 if yn == "throughput" else 4
-    data.sort(key=lambda t: (rev_y * t[y_idx], rev_x * t[2]))
-
-    lines = []
-    last_x = xm["worst"]
-    comparator = (
-        (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
-    )
-    for d in data:
-        if comparator(d[2], last_x):
-            last_x = d[2]
-            lines.append(d)
-    return lines
-
-
-def get_frontier(df, metric):
-    lines = create_pointset(df.values.tolist(), "k-nn", metric)
-    return pd.DataFrame(lines, columns=df.columns)
-
-
-def convert_json_to_csv_search(dataset, dataset_path):
-    for file, algo_name, df in read_file(dataset, dataset_path, "search"):
-        try:
-            build_file = os.path.join(
-                dataset_path,
-                dataset,
-                "result",
-                "build",
-                f"{','.join(algo_name)}.csv",
-            )
-            print(build_file)
-            if "base" in algo_name[1]:
-                algo_name = algo_name[0]
-            else:
-                algo_name = "_".join(algo_name)
-            df["name"] = df["name"].str.split("/").str[0]
-            try:
-                write = pd.DataFrame(
-                    {
-                        "algo_name": [algo_name] * len(df),
-                        "index_name": df["name"],
-                        "recall": df["Recall"],
-                        "throughput": df["items_per_second"],
-                        "latency": df["Latency"],
-                    }
-                )
-            except Exception as e:
-                print(
-                    "Search file %s (%s) missing a key. Skipping..."
-                    % (file, e)
-                )
-            for name in df:
-                if name not in skip_search_cols:
-                    write[name] = df[name]
-
-            if os.path.exists(build_file):
-                build_df = pd.read_csv(build_file)
-                write_ncols = len(write.columns)
-                write["build time"] = None
-                write["build threads"] = None
-                write["build cpu_time"] = None
-                write["build GPU"] = None
-
-                try:
-                    for col_idx in range(6, len(build_df.columns)):
-                        col_name = build_df.columns[col_idx]
-                        write[col_name] = None
-
-                    for s_index, search_row in write.iterrows():
-                        for b_index, build_row in build_df.iterrows():
-                            if (
-                                search_row["index_name"]
-                                == build_row["index_name"]
-                            ):
-                                write.iloc[
-                                    s_index, write_ncols
-                                ] = build_df.iloc[b_index, 2]
-                                write.iloc[
-                                    s_index, write_ncols + 1 :
-                                ] = build_df.iloc[b_index, 3:]
-                                break
-                except Exception as e:
-                    print(
-                        "Build file %s (%s) missing a key. Skipping..."
-                        % (build_file, e)
-                    )
-            else:
-                warnings.warn(
-                    f"Build CSV not found for {algo_name}, "
-                    f"build params won't be "
-                    "appended in the Search CSV"
-                )
-
-            write.to_csv(file.replace(".json", ",raw.csv"), index=False)
-            throughput = get_frontier(write, "throughput")
-            throughput.to_csv(
-                file.replace(".json", ",throughput.csv"), index=False
-            )
-            latency = get_frontier(write, "latency")
-            latency.to_csv(file.replace(".json", ",latency.csv"), index=False)
-        except Exception as e:
-            print(
-                "An error occurred processing file %s (%s). Skipping..."
-                % (file, e)
-            )
-            traceback.print_exc()
-
-
-def main():
-
-    call_path = os.getcwd()
-    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
-        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
-    else:
-        default_dataset_path = os.path.join(call_path, "datasets/")
-
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--dataset", help="dataset to download", default="glove-100-inner"
-    )
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=default_dataset_path,
-    )
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-    args = parser.parse_args()
-
-    convert_json_to_csv_build(args.dataset, args.dataset_path)
-    convert_json_to_csv_search(args.dataset, args.dataset_path)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/__main__.py b/python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/__main__.py
deleted file mode 100644
index e6f7aaf99c..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/__main__.py
+++ /dev/null
@@ -1,240 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-import argparse
-import os
-import sys
-
-import cupy as cp
-import numpy as np
-import rmm
-from pylibraft.common import DeviceResources
-from pylibraft.neighbors.brute_force import knn
-from rmm.allocators.cupy import rmm_cupy_allocator
-
-from .utils import memmap_bin_file, suffix_from_dtype, write_bin
-
-
-def generate_random_queries(n_queries, n_features, dtype=np.float32):
-    print("Generating random queries")
-    if np.issubdtype(dtype, np.integer):
-        queries = cp.random.randint(
-            0, 255, size=(n_queries, n_features), dtype=dtype
-        )
-    else:
-        queries = cp.random.uniform(size=(n_queries, n_features)).astype(dtype)
-    return queries
-
-
-def choose_random_queries(dataset, n_queries):
-    print("Choosing random vector from dataset as query vectors")
-    query_idx = np.random.choice(
-        dataset.shape[0], size=(n_queries,), replace=False
-    )
-    return dataset[query_idx, :]
-
-
-def calc_truth(dataset, queries, k, metric="sqeuclidean"):
-    handle = DeviceResources()
-    n_samples = dataset.shape[0]
-    n = 500000  # batch size for processing neighbors
-    i = 0
-    indices = None
-    distances = None
-    queries = cp.asarray(queries, dtype=cp.float32)
-
-    while i < n_samples:
-        print("Step {0}/{1}:".format(i // n, n_samples // n))
-        n_batch = n if i + n <= n_samples else n_samples - i
-
-        X = cp.asarray(dataset[i : i + n_batch, :], cp.float32)
-
-        D, Ind = knn(X, queries, k, metric=metric, handle=handle)
-        handle.sync()
-
-        D, Ind = cp.asarray(D), cp.asarray(Ind)
-        Ind += i  # shift neighbor index by offset i
-
-        if distances is None:
-            distances = D
-            indices = Ind
-        else:
-            distances = cp.concatenate([distances, D], axis=1)
-            indices = cp.concatenate([indices, Ind], axis=1)
-            idx = cp.argsort(distances, axis=1)[:, :k]
-            distances = cp.take_along_axis(distances, idx, axis=1)
-            indices = cp.take_along_axis(indices, idx, axis=1)
-
-        i += n_batch
-
-    return distances, indices
-
-
-def main():
-    pool = rmm.mr.PoolMemoryResource(
-        rmm.mr.CudaMemoryResource(), initial_pool_size=2**30
-    )
-    rmm.mr.set_current_device_resource(pool)
-    cp.cuda.set_allocator(rmm_cupy_allocator)
-
-    parser = argparse.ArgumentParser(
-        prog="generate_groundtruth",
-        description="Generate true neighbors using exact NN search. "
-        "The input and output files are in big-ann-benchmark's binary format.",
-        epilog="""Example usage
-    # With existing query file
-    python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.\
-fbin --output=groundtruth_dir --queries=/dataset/query.public.10K.fbin
-
-    # With randomly generated queries
-    python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.\
-fbin --output=groundtruth_dir --queries=random --n_queries=10000
-
-    # Using only a subset of the dataset. Define queries by randomly
-    # selecting vectors from the (subset of the) dataset.
-    python -m raft_ann_bench.generate_groundtruth --dataset /dataset/base.\
-fbin --nrows=2000000 --cols=128 --output=groundtruth_dir \
---queries=random-choice --n_queries=10000
-    """,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-
-    parser.add_argument("dataset", type=str, help="input dataset file name")
-    parser.add_argument(
-        "--queries",
-        type=str,
-        default="random",
-        help="Queries file name, or one of 'random-choice' or 'random' "
-        "(default). 'random-choice': select n_queries vectors from the input "
-        "dataset. 'random': generate n_queries as uniform random numbers.",
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        default="",
-        help="output directory name (default current dir)",
-    )
-
-    parser.add_argument(
-        "--n_queries",
-        type=int,
-        default=10000,
-        help="Number of quries to generate (if no query file is given). "
-        "Default: 10000.",
-    )
-
-    parser.add_argument(
-        "-N",
-        "--rows",
-        default=None,
-        type=int,
-        help="use only first N rows from dataset, by default the whole "
-        "dataset is used",
-    )
-    parser.add_argument(
-        "-D",
-        "--cols",
-        default=None,
-        type=int,
-        help="number of features (dataset columns). "
-        "Default: read from dataset file.",
-    )
-    parser.add_argument(
-        "--dtype",
-        type=str,
-        help="Dataset dtype. When not specified, then derived from extension."
-        " Supported types: 'float32', 'float16', 'uint8', 'int8'",
-    )
-
-    parser.add_argument(
-        "-k",
-        type=int,
-        default=100,
-        help="Number of neighbors (per query) to calculate",
-    )
-    parser.add_argument(
-        "--metric",
-        type=str,
-        default="sqeuclidean",
-        help="Metric to use while calculating distances. Valid metrics are "
-        "those that are accepted by pylibraft.neighbors.brute_force.knn. Most"
-        " commonly used with RAFT ANN are 'sqeuclidean' and 'inner_product'",
-    )
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-    args = parser.parse_args()
-
-    if args.rows is not None:
-        print("Reading subset of the data, nrows=", args.rows)
-    else:
-        print("Reading whole dataset")
-
-    # Load input data
-    dataset = memmap_bin_file(
-        args.dataset, args.dtype, shape=(args.rows, args.cols)
-    )
-    n_features = dataset.shape[1]
-    dtype = dataset.dtype
-
-    print(
-        "Dataset size {:6.1f} GB, shape {}, dtype {}".format(
-            dataset.size * dataset.dtype.itemsize / 1e9,
-            dataset.shape,
-            np.dtype(dtype),
-        )
-    )
-
-    if len(args.output) > 0:
-        os.makedirs(args.output, exist_ok=True)
-
-    if args.queries == "random" or args.queries == "random-choice":
-        if args.n_queries is None:
-            raise RuntimeError(
-                "n_queries must be given to generate random queries"
-            )
-        if args.queries == "random":
-            queries = generate_random_queries(
-                args.n_queries, n_features, dtype
-            )
-        elif args.queries == "random-choice":
-            queries = choose_random_queries(dataset, args.n_queries)
-
-        queries_filename = os.path.join(
-            args.output, "queries" + suffix_from_dtype(dtype)
-        )
-        print("Writing queries file", queries_filename)
-        write_bin(queries_filename, queries)
-    else:
-        print("Reading queries from file", args.queries)
-        queries = memmap_bin_file(args.queries, dtype)
-
-    print("Calculating true nearest neighbors")
-    distances, indices = calc_truth(dataset, queries, args.k, args.metric)
-
-    write_bin(
-        os.path.join(args.output, "groundtruth.neighbors.ibin"),
-        indices.astype(np.uint32),
-    )
-    write_bin(
-        os.path.join(args.output, "groundtruth.distances.fbin"),
-        distances.astype(np.float32),
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/utils.py b/python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/utils.py
deleted file mode 100644
index 3f2dd11a16..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/generate_groundtruth/utils.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-
-import numpy as np
-
-
-def dtype_from_filename(filename):
-    ext = os.path.splitext(filename)[1]
-    if ext == ".fbin":
-        return np.float32
-    if ext == ".hbin":
-        return np.float16
-    elif ext == ".ibin":
-        return np.int32
-    elif ext == ".u8bin":
-        return np.ubyte
-    elif ext == ".i8bin":
-        return np.byte
-    else:
-        raise RuntimeError("Not supported file extension" + ext)
-
-
-def suffix_from_dtype(dtype):
-    if dtype == np.float32:
-        return ".fbin"
-    if dtype == np.float16:
-        return ".hbin"
-    elif dtype == np.int32:
-        return ".ibin"
-    elif dtype == np.ubyte:
-        return ".u8bin"
-    elif dtype == np.byte:
-        return ".i8bin"
-    else:
-        raise RuntimeError("Not supported dtype extension" + dtype)
-
-
-def memmap_bin_file(
-    bin_file, dtype, shape=None, mode="r", size_dtype=np.uint32
-):
-    extent_itemsize = np.dtype(size_dtype).itemsize
-    offset = int(extent_itemsize) * 2
-    if bin_file is None:
-        return None
-    if dtype is None:
-        dtype = dtype_from_filename(bin_file)
-
-    if mode[0] == "r":
-        a = np.memmap(bin_file, mode=mode, dtype=size_dtype, shape=(2,))
-        if shape is None:
-            shape = (a[0], a[1])
-        else:
-            shape = tuple(
-                [
-                    aval if sval is None else sval
-                    for aval, sval in zip(a, shape)
-                ]
-            )
-
-        return np.memmap(
-            bin_file, mode=mode, dtype=dtype, offset=offset, shape=shape
-        )
-    elif mode[0] == "w":
-        if shape is None:
-            raise ValueError("Need to specify shape to map file in write mode")
-
-        print("creating file", bin_file)
-        dirname = os.path.dirname(bin_file)
-        if len(dirname) > 0:
-            os.makedirs(dirname, exist_ok=True)
-        a = np.memmap(bin_file, mode=mode, dtype=size_dtype, shape=(2,))
-        a[0] = shape[0]
-        a[1] = shape[1]
-        a.flush()
-        del a
-        fp = np.memmap(
-            bin_file, mode="r+", dtype=dtype, offset=offset, shape=shape
-        )
-        return fp
-
-    # print('# {}: shape: {}, dtype: {}'.format(bin_file, shape, dtype))
-
-
-def write_bin(fname, data):
-    print("writing", fname, data.shape, data.dtype, "...")
-    with open(fname, "wb") as f:
-        np.asarray(data.shape, dtype=np.uint32).tofile(f)
-        data.tofile(f)
diff --git a/python/raft-ann-bench/src/raft_ann_bench/get_dataset/__main__.py b/python/raft-ann-bench/src/raft_ann_bench/get_dataset/__main__.py
deleted file mode 100644
index 0a6c37aabc..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/get_dataset/__main__.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-import sys
-from urllib.request import urlretrieve
-
-
-def get_dataset_path(name, ann_bench_data_path):
-    if not os.path.exists(ann_bench_data_path):
-        os.mkdir(ann_bench_data_path)
-    return os.path.join(ann_bench_data_path, f"{name}.hdf5")
-
-
-def download_dataset(url, path):
-    if not os.path.exists(path):
-        print(f"downloading {url} -> {path}...")
-        urlretrieve(url, path)
-
-
-def convert_hdf5_to_fbin(path, normalize):
-    scripts_path = os.path.dirname(os.path.realpath(__file__))
-    ann_bench_scripts_path = os.path.join(scripts_path, "hdf5_to_fbin.py")
-    print(f"calling script {ann_bench_scripts_path}")
-    if normalize and "angular" in path:
-        subprocess.run(
-            ["python", ann_bench_scripts_path, "-n", "%s" % path], check=True
-        )
-    else:
-        subprocess.run(
-            ["python", ann_bench_scripts_path, "%s" % path], check=True
-        )
-
-
-def move(name, ann_bench_data_path):
-    if "angular" in name:
-        new_name = name.replace("angular", "inner")
-    else:
-        new_name = name
-    new_path = os.path.join(ann_bench_data_path, new_name)
-    if not os.path.exists(new_path):
-        os.mkdir(new_path)
-    for bin_name in [
-        "base.fbin",
-        "query.fbin",
-        "groundtruth.neighbors.ibin",
-        "groundtruth.distances.fbin",
-    ]:
-        os.rename(
-            f"{ann_bench_data_path}/{name}.{bin_name}",
-            f"{new_path}/{bin_name}",
-        )
-
-
-def download(name, normalize, ann_bench_data_path):
-    path = get_dataset_path(name, ann_bench_data_path)
-    try:
-        url = f"http://ann-benchmarks.com/{name}.hdf5"
-        download_dataset(url, path)
-
-        convert_hdf5_to_fbin(path, normalize)
-
-        move(name, ann_bench_data_path)
-    except Exception:
-        print(f"Cannot download {url}")
-        raise
-
-
-def main():
-    call_path = os.getcwd()
-    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
-        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
-    else:
-        default_dataset_path = os.path.join(call_path, "datasets/")
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--dataset", help="dataset to download", default="glove-100-angular"
-    )
-    parser.add_argument(
-        "--dataset-path",
-        help="path to download dataset",
-        default=default_dataset_path,
-    )
-    parser.add_argument(
-        "--normalize",
-        help="normalize cosine distance to inner product",
-        action="store_true",
-    )
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-    args = parser.parse_args()
-
-    download(args.dataset, args.normalize, args.dataset_path)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py b/python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py
deleted file mode 100755
index ee7410e0cc..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/get_dataset/fbin_to_f16bin.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-from __future__ import absolute_import, division, print_function
-
-import sys
-
-import numpy as np
-
-
-def read_fbin(fname):
-    shape = np.fromfile(fname, dtype=np.uint32, count=2)
-    if float(shape[0]) * shape[1] * 4 > 2000000000:
-        data = np.memmap(fname, dtype=np.float32, offset=8, mode="r").reshape(
-            shape
-        )
-    else:
-        data = np.fromfile(fname, dtype=np.float32, offset=8).reshape(shape)
-    return data
-
-
-def write_bin(fname, data):
-    with open(fname, "wb") as f:
-        np.asarray(data.shape, dtype=np.uint32).tofile(f)
-        data.tofile(f)
-
-
-if len(sys.argv) != 3:
-    print(
-        "usage: %s input.fbin output.f16bin" % (sys.argv[0]),
-        file=sys.stderr,
-    )
-    sys.exit(-1)
-
-data = read_fbin(sys.argv[1]).astype(np.float16)
-write_bin(sys.argv[2], data)
diff --git a/python/raft-ann-bench/src/raft_ann_bench/get_dataset/hdf5_to_fbin.py b/python/raft-ann-bench/src/raft_ann_bench/get_dataset/hdf5_to_fbin.py
deleted file mode 100755
index ba853c63f5..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/get_dataset/hdf5_to_fbin.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import sys
-
-import h5py
-import numpy as np
-
-
-def normalize(x):
-    norm = np.linalg.norm(x, axis=1)
-    return (x.T / norm).T
-
-
-def write_bin(fname, data):
-    with open(fname, "wb") as f:
-        np.asarray(data.shape, dtype=np.uint32).tofile(f)
-        data.tofile(f)
-
-
-if __name__ == "__main__":
-    if len(sys.argv) != 2 and len(sys.argv) != 3:
-        print(
-            "usage: %s [-n] <input>.hdf5\n" % (sys.argv[0]),
-            "  -n: normalize base/query set\n",
-            "outputs: <input>.base.fbin\n",
-            "         <input>.query.fbin\n",
-            "         <input>.groundtruth.neighbors.ibin\n",
-            "         <input>.groundtruth.distances.fbin",
-            file=sys.stderr,
-        )
-        sys.exit(-1)
-
-    need_normalize = False
-    if len(sys.argv) == 3:
-        assert sys.argv[1] == "-n"
-        need_normalize = True
-    fname_prefix = sys.argv[-1]
-    assert fname_prefix.endswith(".hdf5")
-    fname_prefix = fname_prefix[:-5]
-
-    hdf5 = h5py.File(sys.argv[-1], "r")
-    assert (
-        hdf5.attrs["distance"] == "angular"
-        or hdf5.attrs["distance"] == "euclidean"
-    )
-    assert hdf5["train"].dtype == np.float32
-    assert hdf5["test"].dtype == np.float32
-    assert hdf5["neighbors"].dtype == np.int32
-    assert hdf5["distances"].dtype == np.float32
-
-    base = hdf5["train"][:]
-    query = hdf5["test"][:]
-    if need_normalize:
-        base = normalize(base)
-        query = normalize(query)
-    elif hdf5.attrs["distance"] == "angular":
-        print(
-            "warning: input has angular distance, ",
-            "specify -n to normalize base/query set!\n",
-        )
-
-    output_fname = fname_prefix + ".base.fbin"
-    print("writing", output_fname, "...")
-    write_bin(output_fname, base)
-
-    output_fname = fname_prefix + ".query.fbin"
-    print("writing", output_fname, "...")
-    write_bin(output_fname, query)
-
-    output_fname = fname_prefix + ".groundtruth.neighbors.ibin"
-    print("writing", output_fname, "...")
-    write_bin(output_fname, hdf5["neighbors"][:])
-
-    output_fname = fname_prefix + ".groundtruth.distances.fbin"
-    print("writing", output_fname, "...")
-    write_bin(output_fname, hdf5["distances"][:])
diff --git a/python/raft-ann-bench/src/raft_ann_bench/plot/__main__.py b/python/raft-ann-bench/src/raft_ann_bench/plot/__main__.py
deleted file mode 100644
index 86fd527f5f..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/plot/__main__.py
+++ /dev/null
@@ -1,623 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script is inspired by
-# 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
-# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py  # noqa: E501
-# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py  # noqa: E501
-# Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE
-
-import argparse
-import itertools
-import os
-import sys
-from collections import OrderedDict
-
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-
-mpl.use("Agg")
-
-metrics = {
-    "k-nn": {
-        "description": "Recall",
-        "worst": float("-inf"),
-        "lim": [0.0, 1.03],
-    },
-    "throughput": {
-        "description": "Queries per second (1/s)",
-        "worst": float("-inf"),
-    },
-    "latency": {
-        "description": "Search Latency (s)",
-        "worst": float("inf"),
-    },
-}
-
-
-def positive_int(input_str: str) -> int:
-    try:
-        i = int(input_str)
-        if i < 1:
-            raise ValueError
-    except ValueError:
-        raise argparse.ArgumentTypeError(
-            f"{input_str} is not a positive integer"
-        )
-
-    return i
-
-
-def positive_float(input_str: str) -> float:
-    try:
-        i = float(input_str)
-        if i < 0.0:
-            raise ValueError
-    except ValueError:
-        raise argparse.ArgumentTypeError(
-            f"{input_str} is not a positive float"
-        )
-
-    return i
-
-
-def generate_n_colors(n):
-    vs = np.linspace(0.3, 0.9, 7)
-    colors = [(0.9, 0.4, 0.4, 1.0)]
-
-    def euclidean(a, b):
-        return sum((x - y) ** 2 for x, y in zip(a, b))
-
-    while len(colors) < n:
-        new_color = max(
-            itertools.product(vs, vs, vs),
-            key=lambda a: min(euclidean(a, b) for b in colors),
-        )
-        colors.append(new_color + (1.0,))
-    return colors
-
-
-def create_linestyles(unique_algorithms):
-    colors = dict(
-        zip(unique_algorithms, generate_n_colors(len(unique_algorithms)))
-    )
-    linestyles = dict(
-        (algo, ["--", "-.", "-", ":"][i % 4])
-        for i, algo in enumerate(unique_algorithms)
-    )
-    markerstyles = dict(
-        (algo, ["+", "<", "o", "*", "x"][i % 5])
-        for i, algo in enumerate(unique_algorithms)
-    )
-    faded = dict(
-        (algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items()
-    )
-    return dict(
-        (
-            algo,
-            (colors[algo], faded[algo], linestyles[algo], markerstyles[algo]),
-        )
-        for algo in unique_algorithms
-    )
-
-
-def create_plot_search(
-    all_data,
-    x_scale,
-    y_scale,
-    fn_out,
-    linestyles,
-    dataset,
-    k,
-    batch_size,
-    mode,
-    time_unit,
-    x_start,
-):
-    xn = "k-nn"
-    xm, ym = (metrics[xn], metrics[mode])
-    xm["lim"][0] = x_start
-    # Now generate each plot
-    handles = []
-    labels = []
-    plt.figure(figsize=(12, 9))
-
-    # Sorting by mean y-value helps aligning plots with labels
-    def mean_y(algo):
-        points = np.array(all_data[algo], dtype=object)
-        return -np.log(np.array(points[:, 3], dtype=np.float32)).mean()
-
-    # Find range for logit x-scale
-    min_x, max_x = 1, 0
-    for algo in sorted(all_data.keys(), key=mean_y):
-        points = np.array(all_data[algo], dtype=object)
-        xs = points[:, 2]
-        ys = points[:, 3]
-        min_x = min([min_x] + [x for x in xs if x > 0])
-        max_x = max([max_x] + [x for x in xs if x < 1])
-        color, faded, linestyle, marker = linestyles[algo]
-        (handle,) = plt.plot(
-            xs,
-            ys,
-            "-",
-            label=algo,
-            color=color,
-            ms=7,
-            mew=3,
-            lw=3,
-            marker=marker,
-        )
-        handles.append(handle)
-
-        labels.append(algo)
-
-    ax = plt.gca()
-    y_description = ym["description"]
-    if mode == "latency":
-        y_description = y_description.replace("(s)", f"({time_unit})")
-    ax.set_ylabel(y_description)
-    ax.set_xlabel("Recall")
-    # Custom scales of the type --x-scale a3
-    if x_scale[0] == "a":
-        alpha = float(x_scale[1:])
-
-        def fun(x):
-            return 1 - (1 - x) ** (1 / alpha)
-
-        def inv_fun(x):
-            return 1 - (1 - x) ** alpha
-
-        ax.set_xscale("function", functions=(fun, inv_fun))
-        if alpha <= 3:
-            ticks = [inv_fun(x) for x in np.arange(0, 1.2, 0.2)]
-            plt.xticks(ticks)
-        if alpha > 3:
-            from matplotlib import ticker
-
-            ax.xaxis.set_major_formatter(ticker.LogitFormatter())
-            # plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
-            plt.xticks([0, 1 / 2, 1 - 1e-1, 1 - 1e-2, 1 - 1e-3, 1 - 1e-4, 1])
-    # Other x-scales
-    else:
-        ax.set_xscale(x_scale)
-    ax.set_yscale(y_scale)
-    ax.set_title(f"{dataset} k={k} batch_size={batch_size}")
-    plt.gca().get_position()
-    # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
-    ax.legend(
-        handles,
-        labels,
-        loc="center left",
-        bbox_to_anchor=(1, 0.5),
-        prop={"size": 9},
-    )
-    plt.grid(visible=True, which="major", color="0.65", linestyle="-")
-    plt.setp(ax.get_xminorticklabels(), visible=True)
-
-    # Logit scale has to be a subset of (0,1)
-    if "lim" in xm and x_scale != "logit":
-        x0, x1 = xm["lim"]
-        plt.xlim(max(x0, 0), min(x1, 1))
-    elif x_scale == "logit":
-        plt.xlim(min_x, max_x)
-    if "lim" in ym:
-        plt.ylim(ym["lim"])
-
-    # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
-    ax.spines["bottom"]._adjust_location()
-
-    print(f"writing search output to {fn_out}")
-    plt.savefig(fn_out, bbox_inches="tight")
-    plt.close()
-
-
-def create_plot_build(
-    build_results, search_results, linestyles, fn_out, dataset, k, batch_size
-):
-    bt_80 = [0] * len(linestyles)
-
-    bt_90 = [0] * len(linestyles)
-
-    bt_95 = [0] * len(linestyles)
-
-    bt_99 = [0] * len(linestyles)
-
-    data = OrderedDict()
-    colors = OrderedDict()
-
-    # Sorting by mean y-value helps aligning plots with labels
-
-    def mean_y(algo):
-        points = np.array(search_results[algo], dtype=object)
-        return -np.log(np.array(points[:, 3], dtype=np.float32)).mean()
-
-    for pos, algo in enumerate(sorted(search_results.keys(), key=mean_y)):
-        points = np.array(search_results[algo], dtype=object)
-        # x is recall, ls is algo_name, idxs is index_name
-        xs = points[:, 2]
-        ls = points[:, 0]
-        idxs = points[:, 1]
-
-        len_80, len_90, len_95, len_99 = 0, 0, 0, 0
-        for i in range(len(xs)):
-            if xs[i] >= 0.80 and xs[i] < 0.90:
-                bt_80[pos] = bt_80[pos] + build_results[(ls[i], idxs[i])][0][2]
-                len_80 = len_80 + 1
-            elif xs[i] >= 0.9 and xs[i] < 0.95:
-                bt_90[pos] = bt_90[pos] + build_results[(ls[i], idxs[i])][0][2]
-                len_90 = len_90 + 1
-            elif xs[i] >= 0.95 and xs[i] < 0.99:
-                bt_95[pos] = bt_95[pos] + build_results[(ls[i], idxs[i])][0][2]
-                len_95 = len_95 + 1
-            elif xs[i] >= 0.99:
-                bt_99[pos] = bt_99[pos] + build_results[(ls[i], idxs[i])][0][2]
-                len_99 = len_99 + 1
-        if len_80 > 0:
-            bt_80[pos] = bt_80[pos] / len_80
-        if len_90 > 0:
-            bt_90[pos] = bt_90[pos] / len_90
-        if len_95 > 0:
-            bt_95[pos] = bt_95[pos] / len_95
-        if len_99 > 0:
-            bt_99[pos] = bt_99[pos] / len_99
-        data[algo] = [
-            bt_80[pos],
-            bt_90[pos],
-            bt_95[pos],
-            bt_99[pos],
-        ]
-        colors[algo] = linestyles[algo][0]
-
-    index = [
-        "@80% Recall",
-        "@90% Recall",
-        "@95% Recall",
-        "@99% Recall",
-    ]
-
-    df = pd.DataFrame(data, index=index)
-    df.replace(0.0, np.nan, inplace=True)
-    df = df.dropna(how="all")
-    plt.figure(figsize=(12, 9))
-    ax = df.plot.bar(rot=0, color=colors)
-    fig = ax.get_figure()
-    print(f"writing build output to {fn_out}")
-    plt.title(
-        "Average Build Time within Recall Range "
-        f"for k={k} batch_size={batch_size}"
-    )
-    plt.suptitle(f"{dataset}")
-    plt.ylabel("Build Time (s)")
-    fig.savefig(fn_out)
-
-
-def load_lines(results_path, result_files, method, index_key, mode, time_unit):
-    results = dict()
-
-    for result_filename in result_files:
-        try:
-            with open(os.path.join(results_path, result_filename), "r") as f:
-                lines = f.readlines()
-                lines = lines[:-1] if lines[-1] == "\n" else lines
-
-                if method == "build":
-                    key_idx = [2]
-                elif method == "search":
-                    y_idx = 3 if mode == "throughput" else 4
-                    key_idx = [2, y_idx]
-
-                for line in lines[1:]:
-                    split_lines = line.split(",")
-
-                    algo_name = split_lines[0]
-                    index_name = split_lines[1]
-
-                    if index_key == "algo":
-                        dict_key = algo_name
-                    elif index_key == "index":
-                        dict_key = (algo_name, index_name)
-                    if dict_key not in results:
-                        results[dict_key] = []
-                    to_add = [algo_name, index_name]
-                    for key_i in key_idx:
-                        to_add.append(float(split_lines[key_i]))
-                    if (
-                        mode == "latency"
-                        and time_unit != "s"
-                        and method == "search"
-                    ):
-                        to_add[-1] = (
-                            to_add[-1] * (10**3)
-                            if time_unit == "ms"
-                            else to_add[-1] * (10**6)
-                        )
-                    results[dict_key].append(to_add)
-        except Exception:
-            print(
-                f"An error occurred processing file {result_filename}. "
-                "Skipping..."
-            )
-
-    return results
-
-
-def load_all_results(
-    dataset_path,
-    algorithms,
-    groups,
-    algo_groups,
-    k,
-    batch_size,
-    method,
-    index_key,
-    raw,
-    mode,
-    time_unit,
-):
-    results_path = os.path.join(dataset_path, "result", method)
-    result_files = os.listdir(results_path)
-    if method == "build":
-        result_files = [
-            result_file
-            for result_file in result_files
-            if ".csv" in result_file
-        ]
-    elif method == "search":
-        if raw:
-            suffix = ",raw"
-        else:
-            suffix = f",{mode}"
-        result_files = [
-            result_file
-            for result_file in result_files
-            if f"{suffix}.csv" in result_file
-        ]
-    if len(result_files) == 0:
-        raise FileNotFoundError(f"No CSV result files found in {results_path}")
-
-    if method == "search":
-        filter_k_bs = []
-        for result_filename in result_files:
-            filename_split = result_filename.split(",")
-            if (
-                int(filename_split[-3][1:]) == k
-                and int(filename_split[-2][2:]) == batch_size
-            ):
-                filter_k_bs.append(result_filename)
-        result_files = filter_k_bs
-
-    algo_group_files = [
-        result_filename.replace(".csv", "").split(",")[:2]
-        for result_filename in result_files
-    ]
-    algo_group_files = list(zip(*algo_group_files))
-
-    if len(algorithms) > 0:
-        final_results = [
-            result_files[i]
-            for i in range(len(result_files))
-            if (algo_group_files[0][i] in algorithms)
-            and (algo_group_files[1][i] in groups)
-        ]
-    else:
-        final_results = [
-            result_files[i]
-            for i in range(len(result_files))
-            if (algo_group_files[1][i] in groups)
-        ]
-
-    if len(algo_groups) > 0:
-        split_algo_groups = [
-            algo_group.split(".") for algo_group in algo_groups
-        ]
-        split_algo_groups = list(zip(*split_algo_groups))
-        final_algo_groups = [
-            result_files[i]
-            for i in range(len(result_files))
-            if (algo_group_files[0][i] in split_algo_groups[0])
-            and (algo_group_files[1][i] in split_algo_groups[1])
-        ]
-        final_results = final_results + final_algo_groups
-        final_results = set(final_results)
-
-    results = load_lines(
-        results_path, final_results, method, index_key, mode, time_unit
-    )
-
-    return results
-
-
-def main():
-    call_path = os.getcwd()
-    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
-        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
-    else:
-        default_dataset_path = os.path.join(call_path, "datasets/")
-
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--dataset", help="dataset to plot", default="glove-100-inner"
-    )
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=default_dataset_path,
-    )
-    parser.add_argument(
-        "--output-filepath",
-        help="directory for PNG to be saved",
-        default=os.getcwd(),
-    )
-    parser.add_argument(
-        "--algorithms",
-        help="plot only comma separated list of named \
-              algorithms. If parameters `groups` and `algo-groups \
-              are both undefined, then group `base` is plot by default",
-        default=None,
-    )
-    parser.add_argument(
-        "--groups",
-        help="plot only comma separated groups of parameters",
-        default="base",
-    )
-    parser.add_argument(
-        "--algo-groups",
-        "--algo-groups",
-        help='add comma separated <algorithm>.<group> to plot. \
-              Example usage: "--algo-groups=raft_cagra.large,hnswlib.large"',
-    )
-    parser.add_argument(
-        "-k",
-        "--count",
-        default=10,
-        type=positive_int,
-        help="the number of nearest neighbors to search for",
-    )
-    parser.add_argument(
-        "-bs",
-        "--batch-size",
-        default=10000,
-        type=positive_int,
-        help="number of query vectors to use in each query trial",
-    )
-    parser.add_argument("--build", action="store_true")
-    parser.add_argument("--search", action="store_true")
-    parser.add_argument(
-        "--x-scale",
-        help="Scale to use when drawing the X-axis. \
-              Typically linear, logit or a2",
-        default="linear",
-    )
-    parser.add_argument(
-        "--y-scale",
-        help="Scale to use when drawing the Y-axis",
-        choices=["linear", "log", "symlog", "logit"],
-        default="linear",
-    )
-    parser.add_argument(
-        "--x-start",
-        help="Recall values to start the x-axis from",
-        default=0.8,
-        type=positive_float,
-    )
-    parser.add_argument(
-        "--mode",
-        help="search mode whose Pareto frontier is used on the y-axis",
-        choices=["throughput", "latency"],
-        default="throughput",
-    )
-    parser.add_argument(
-        "--time-unit",
-        help="time unit to plot when mode is latency",
-        choices=["s", "ms", "us"],
-        default="ms",
-    )
-    parser.add_argument(
-        "--raw",
-        help="Show raw results (not just Pareto frontier) of mode arg",
-        action="store_true",
-    )
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-    args = parser.parse_args()
-
-    if args.algorithms:
-        algorithms = args.algorithms.split(",")
-    else:
-        algorithms = []
-    groups = args.groups.split(",")
-    if args.algo_groups:
-        algo_groups = args.algo_groups.split(",")
-    else:
-        algo_groups = []
-    k = args.count
-    batch_size = args.batch_size
-    if not args.build and not args.search:
-        build = True
-        search = True
-    else:
-        build = args.build
-        search = args.search
-
-    search_output_filepath = os.path.join(
-        args.output_filepath,
-        f"search-{args.dataset}-k{k}-batch_size{batch_size}.png",
-    )
-    build_output_filepath = os.path.join(
-        args.output_filepath,
-        f"build-{args.dataset}-k{k}-batch_size{batch_size}.png",
-    )
-
-    search_results = load_all_results(
-        os.path.join(args.dataset_path, args.dataset),
-        algorithms,
-        groups,
-        algo_groups,
-        k,
-        batch_size,
-        "search",
-        "algo",
-        args.raw,
-        args.mode,
-        args.time_unit,
-    )
-    linestyles = create_linestyles(sorted(search_results.keys()))
-    if search:
-        create_plot_search(
-            search_results,
-            args.x_scale,
-            args.y_scale,
-            search_output_filepath,
-            linestyles,
-            args.dataset,
-            k,
-            batch_size,
-            args.mode,
-            args.time_unit,
-            args.x_start,
-        )
-    if build:
-        build_results = load_all_results(
-            os.path.join(args.dataset_path, args.dataset),
-            algorithms,
-            groups,
-            algo_groups,
-            k,
-            batch_size,
-            "build",
-            "index",
-            args.raw,
-            args.mode,
-            args.time_unit,
-        )
-        create_plot_build(
-            build_results,
-            search_results,
-            linestyles,
-            build_output_filepath,
-            args.dataset,
-            k,
-            batch_size,
-        )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/__main__.py b/python/raft-ann-bench/src/raft_ann_bench/run/__main__.py
deleted file mode 100644
index c34377d733..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/__main__.py
+++ /dev/null
@@ -1,614 +0,0 @@
-#
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import itertools
-import json
-import os
-import subprocess
-import sys
-import uuid
-import warnings
-from importlib import import_module
-
-import yaml
-
-log_levels = {
-    "off": 0,
-    "error": 1,
-    "warn": 2,
-    "info": 3,
-    "debug": 4,
-    "trace": 5,
-}
-
-
-def parse_log_level(level_str):
-    if level_str not in log_levels:
-        raise ValueError("Invalid log level: %s" % level_str)
-    return log_levels[level_str.lower()]
-
-
-def positive_int(input_str: str) -> int:
-    try:
-        i = int(input_str)
-        if i < 1:
-            raise ValueError
-    except ValueError:
-        raise argparse.ArgumentTypeError(
-            f"{input_str} is not a positive integer"
-        )
-
-    return i
-
-
-def merge_build_files(build_dir, build_file, temp_build_file):
-
-    build_dict = {}
-
-    # If build file exists, read it
-    build_json_path = os.path.join(build_dir, build_file)
-    tmp_build_json_path = os.path.join(build_dir, temp_build_file)
-    if os.path.isfile(build_json_path):
-        try:
-            with open(build_json_path, "r") as f:
-                build_dict = json.load(f)
-        except Exception as e:
-            print(
-                "Error loading existing build file: %s (%s)"
-                % (build_json_path, e)
-            )
-
-    temp_build_dict = {}
-    if os.path.isfile(tmp_build_json_path):
-        with open(tmp_build_json_path, "r") as f:
-            temp_build_dict = json.load(f)
-    else:
-        raise ValueError("Temp build file not found: %s" % tmp_build_json_path)
-
-    tmp_benchmarks = (
-        temp_build_dict["benchmarks"]
-        if "benchmarks" in temp_build_dict
-        else {}
-    )
-    benchmarks = build_dict["benchmarks"] if "benchmarks" in build_dict else {}
-
-    # If the build time is absolute 0 then an error occurred
-    final_bench_dict = {}
-    for b in benchmarks:
-        if b["real_time"] > 0:
-            final_bench_dict[b["name"]] = b
-
-    for tmp_bench in tmp_benchmarks:
-        if tmp_bench["real_time"] > 0:
-            final_bench_dict[tmp_bench["name"]] = tmp_bench
-
-    temp_build_dict["benchmarks"] = [v for k, v in final_bench_dict.items()]
-    with open(build_json_path, "w") as f:
-        json_str = json.dumps(temp_build_dict, indent=2)
-        f.write(json_str)
-
-
-def validate_algorithm(algos_conf, algo, gpu_present):
-    algos_conf_keys = set(algos_conf.keys())
-    if gpu_present:
-        return algo in algos_conf_keys
-    else:
-        return (
-            algo in algos_conf_keys
-            and algos_conf[algo]["requires_gpu"] is False
-        )
-
-
-def find_executable(algos_conf, algo, group, k, batch_size):
-    executable = algos_conf[algo]["executable"]
-
-    file_name = (f"{algo},{group}", f"{algo},{group},k{k},bs{batch_size}")
-
-    build_path = os.getenv("RAFT_HOME")
-    if build_path is not None:
-        build_path = os.path.join(
-            build_path, "cpp", "build", "release", executable
-        )
-        if os.path.exists(build_path):
-            print(f"-- Using RAFT bench from repository in {build_path}. ")
-            return (executable, build_path, file_name)
-
-    # if there is no build folder present, we look in the conda environment
-    conda_path = os.getenv("CONDA_PREFIX")
-    if conda_path is not None:
-        conda_path = os.path.join(conda_path, "bin", "ann", executable)
-        if os.path.exists(conda_path):
-            print("-- Using RAFT bench found in conda environment. ")
-            return (executable, conda_path, file_name)
-
-    else:
-        raise FileNotFoundError(executable)
-
-
-def run_build_and_search(
-    conf_file,
-    conf_filename,
-    conf_filedir,
-    executables_to_run,
-    dataset_path,
-    force,
-    build,
-    search,
-    dry_run,
-    k,
-    batch_size,
-    search_threads,
-    mode="throughput",
-    raft_log_level="info",
-):
-    for (
-        executable,
-        ann_executable_path,
-        output_filename,
-    ) in executables_to_run.keys():
-        # Need to write temporary configuration
-        temp_conf_filename = (
-            f"{conf_filename}_{output_filename[1]}_{uuid.uuid1()}.json"
-        )
-        with open(temp_conf_filename, "w") as f:
-            temp_conf = dict()
-            temp_conf["dataset"] = conf_file["dataset"]
-            temp_conf["search_basic_param"] = conf_file["search_basic_param"]
-            temp_conf["index"] = executables_to_run[
-                (executable, ann_executable_path, output_filename)
-            ]["index"]
-            json_str = json.dumps(temp_conf, indent=2)
-            f.write(json_str)
-
-        legacy_result_folder = os.path.join(
-            dataset_path, conf_file["dataset"]["name"], "result"
-        )
-        os.makedirs(legacy_result_folder, exist_ok=True)
-        if build:
-            build_folder = os.path.join(legacy_result_folder, "build")
-            os.makedirs(build_folder, exist_ok=True)
-            build_file = f"{output_filename[0]}.json"
-            temp_build_file = f"{build_file}.lock"
-            cmd = [
-                ann_executable_path,
-                "--build",
-                "--data_prefix=" + dataset_path,
-                "--benchmark_out_format=json",
-                "--benchmark_counters_tabular=true",
-                "--benchmark_out="
-                + f"{os.path.join(build_folder, temp_build_file)}",
-                "--raft_log_level=" + f"{parse_log_level(raft_log_level)}",
-            ]
-            if force:
-                cmd = cmd + ["--force"]
-            cmd = cmd + [temp_conf_filename]
-
-            if dry_run:
-                print(
-                    "Benchmark command for %s:\n%s\n"
-                    % (output_filename[0], " ".join(cmd))
-                )
-            else:
-                try:
-                    subprocess.run(cmd, check=True)
-                    merge_build_files(
-                        build_folder, build_file, temp_build_file
-                    )
-                except Exception as e:
-                    print("Error occurred running benchmark: %s" % e)
-                finally:
-                    os.remove(os.path.join(build_folder, temp_build_file))
-                    if not search:
-                        os.remove(temp_conf_filename)
-
-        if search:
-            search_folder = os.path.join(legacy_result_folder, "search")
-            os.makedirs(search_folder, exist_ok=True)
-            search_file = f"{output_filename[1]}.json"
-            cmd = [
-                ann_executable_path,
-                "--search",
-                "--data_prefix=" + dataset_path,
-                "--benchmark_counters_tabular=true",
-                "--override_kv=k:%s" % k,
-                "--override_kv=n_queries:%s" % batch_size,
-                "--benchmark_min_warmup_time=1",
-                "--benchmark_out_format=json",
-                "--mode=%s" % mode,
-                "--benchmark_out="
-                + f"{os.path.join(search_folder, search_file)}",
-                "--raft_log_level=" + f"{parse_log_level(raft_log_level)}",
-            ]
-            if force:
-                cmd = cmd + ["--force"]
-
-            if search_threads:
-                cmd = cmd + ["--threads=%s" % search_threads]
-
-            cmd = cmd + [temp_conf_filename]
-            if dry_run:
-                print(
-                    "Benchmark command for %s:\n%s\n"
-                    % (output_filename[1], " ".join(cmd))
-                )
-            else:
-                try:
-                    subprocess.run(cmd, check=True)
-                except Exception as e:
-                    print("Error occurred running benchmark: %s" % e)
-                finally:
-                    os.remove(temp_conf_filename)
-
-
-def main():
-    scripts_path = os.path.dirname(os.path.realpath(__file__))
-    call_path = os.getcwd()
-
-    # Read list of allowed algorithms
-    try:
-        import rmm  # noqa: F401
-
-        gpu_present = True
-    except ImportError:
-        gpu_present = False
-
-    with open(f"{scripts_path}/algos.yaml", "r") as f:
-        algos_yaml = yaml.safe_load(f)
-
-    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
-        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
-    else:
-        default_dataset_path = os.path.join(call_path, "datasets/")
-
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-
-    parser.add_argument(
-        "--subset-size",
-        type=positive_int,
-        help="the number of subset rows of the dataset to build the index",
-    )
-    parser.add_argument(
-        "-k",
-        "--count",
-        default=10,
-        type=positive_int,
-        help="the number of nearest neighbors to search for",
-    )
-    parser.add_argument(
-        "-bs",
-        "--batch-size",
-        default=10000,
-        type=positive_int,
-        help="number of query vectors to use in each query trial",
-    )
-    parser.add_argument(
-        "--dataset-configuration",
-        help="path to YAML configuration file for datasets",
-    )
-    parser.add_argument(
-        "--configuration",
-        help="path to YAML configuration file or directory for algorithms\
-              Any run groups found in the specified file/directory will \
-              automatically override groups of the same name present in the \
-              default configurations, including `base`",
-    )
-    parser.add_argument(
-        "--dataset",
-        help="name of dataset",
-        default="glove-100-inner",
-    )
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder, by default will look in "
-        "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets "
-        "subdirectory from the calling directory",
-        default=default_dataset_path,
-    )
-    parser.add_argument("--build", action="store_true")
-    parser.add_argument("--search", action="store_true")
-    parser.add_argument(
-        "--algorithms",
-        help="run only comma separated list of named \
-              algorithms. If parameters `groups` and `algo-groups \
-              are both undefined, then group `base` is run by default",
-        default=None,
-    )
-    parser.add_argument(
-        "--groups",
-        help="run only comma separated groups of parameters",
-        default="base",
-    )
-    parser.add_argument(
-        "--algo-groups",
-        help='add comma separated <algorithm>.<group> to run. \
-              Example usage: "--algo-groups=raft_cagra.large,hnswlib.large"',
-    )
-    parser.add_argument(
-        "-f",
-        "--force",
-        help="re-run algorithms even if their results \
-                              already exist",
-        action="store_true",
-    )
-
-    parser.add_argument(
-        "-m",
-        "--search-mode",
-        help="run search in 'latency' (measure individual batches) or "
-        "'throughput' (pipeline batches and measure end-to-end) mode",
-        default="latency",
-    )
-
-    parser.add_argument(
-        "-t",
-        "--search-threads",
-        help="specify the number threads to use for throughput benchmark."
-        " Single value or a pair of min and max separated by ':'. "
-        "Example: --search-threads=1:4. Power of 2 values between 'min' "
-        "and 'max' will be used. If only 'min' is specified, then a "
-        "single test is run with 'min' threads. By default min=1, "
-        "max=<num hyper threads>.",
-        default=None,
-    )
-
-    parser.add_argument(
-        "-r",
-        "--dry-run",
-        help="dry-run mode will convert the yaml config for the specified "
-        "algorithms and datasets to the json format that's consumed "
-        "by the lower-level c++ binaries and then print the command "
-        "to run execute the benchmarks but will not actually execute "
-        "the command.",
-        action="store_true",
-    )
-    parser.add_argument(
-        "--raft-log-level",
-        help="Log level, possible values are "
-        "[off, error, warn, info, debug, trace]. "
-        "Default: 'info'. Note that 'debug' or more detailed "
-        "logging level requires that the library is compiled with "
-        "-DRAFT_ACTIVE_LEVEL=<L> where <L> >= <requested log level>",
-        default="info",
-    )
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-    args = parser.parse_args()
-
-    # If both build and search are not provided,
-    # run both
-    if not args.build and not args.search:
-        build = True
-        search = True
-    else:
-        build = args.build
-        search = args.search
-
-    dry_run = args.dry_run
-
-    mode = args.search_mode
-    k = args.count
-    batch_size = args.batch_size
-
-    # Read configuration file associated to datasets
-    if args.dataset_configuration:
-        dataset_conf_f = args.dataset_configuration
-    else:
-        dataset_conf_f = os.path.join(scripts_path, "conf", "datasets.yaml")
-    with open(dataset_conf_f, "r") as f:
-        dataset_conf_all = yaml.safe_load(f)
-
-    dataset_conf = None
-    for dataset in dataset_conf_all:
-        if args.dataset == dataset["name"]:
-            dataset_conf = dataset
-            break
-    if not dataset_conf:
-        raise ValueError("Could not find a dataset configuration")
-
-    conf_file = dict()
-    conf_file["dataset"] = dataset_conf
-    if args.subset_size:
-        conf_file["dataset"]["subset_size"] = args.subset_size
-
-    conf_file["search_basic_param"] = {}
-    conf_file["search_basic_param"]["k"] = k
-    conf_file["search_basic_param"]["batch_size"] = batch_size
-
-    algos_conf_fs = os.listdir(os.path.join(scripts_path, "conf", "algos"))
-    algos_conf_fs = [
-        os.path.join(scripts_path, "conf", "algos", f)
-        for f in algos_conf_fs
-        if ".json" not in f
-    ]
-    conf_filedir = os.path.join(scripts_path, "conf", "algos")
-    if args.configuration:
-        if os.path.isdir(args.configuration):
-            conf_filedir = args.configuration
-            algos_conf_fs = algos_conf_fs + [
-                os.path.join(args.configuration, f)
-                for f in os.listdir(args.configuration)
-                if ".json" not in f
-            ]
-        elif os.path.isfile(args.configuration):
-            conf_filedir = os.path.normpath(args.configuration).split(os.sep)
-            conf_filedir = os.path.join(*conf_filedir[:-1])
-            algos_conf_fs = algos_conf_fs + [args.configuration]
-
-    filter_algos = True if args.algorithms else False
-    if filter_algos:
-        allowed_algos = args.algorithms.split(",")
-    named_groups = args.groups.split(",")
-    filter_algo_groups = True if args.algo_groups else False
-    allowed_algo_groups = None
-    if filter_algo_groups:
-        allowed_algo_groups = [
-            algo_group.split(".") for algo_group in args.algo_groups.split(",")
-        ]
-        allowed_algo_groups = list(zip(*allowed_algo_groups))
-    algos_conf = dict()
-    for algo_f in algos_conf_fs:
-        with open(algo_f, "r") as f:
-            try:
-                algo = yaml.safe_load(f)
-            except Exception as e:
-                warnings.warn(
-                    f"Could not load YAML config {algo_f} due to "
-                    + e.with_traceback()
-                )
-                continue
-            insert_algo = True
-            insert_algo_group = False
-            if filter_algos:
-                if algo["name"] not in allowed_algos:
-                    insert_algo = False
-            if filter_algo_groups:
-                if algo["name"] in allowed_algo_groups[0]:
-                    insert_algo_group = True
-
-            def add_algo_group(group_list):
-                if algo["name"] not in algos_conf:
-                    algos_conf[algo["name"]] = {"groups": {}}
-                for group in algo["groups"].keys():
-                    if group in group_list:
-                        algos_conf[algo["name"]]["groups"][group] = algo[
-                            "groups"
-                        ][group]
-                if "constraints" in algo:
-                    algos_conf[algo["name"]]["constraints"] = algo[
-                        "constraints"
-                    ]
-
-            if insert_algo:
-                add_algo_group(named_groups)
-            if insert_algo_group:
-                add_algo_group(allowed_algo_groups[1])
-
-    executables_to_run = dict()
-    for algo in algos_conf.keys():
-        validate_algorithm(algos_yaml, algo, gpu_present)
-        for group in algos_conf[algo]["groups"].keys():
-            executable = find_executable(
-                algos_yaml, algo, group, k, batch_size
-            )
-            if executable not in executables_to_run:
-                executables_to_run[executable] = {"index": []}
-            build_params = algos_conf[algo]["groups"][group]["build"] or {}
-            search_params = algos_conf[algo]["groups"][group]["search"] or {}
-
-            param_names = []
-            param_lists = []
-            for param in build_params.keys():
-                param_names.append(param)
-                param_lists.append(build_params[param])
-
-            all_build_params = itertools.product(*param_lists)
-
-            search_param_names = []
-            search_param_lists = []
-            for search_param in search_params.keys():
-                search_param_names.append(search_param)
-                search_param_lists.append(search_params[search_param])
-
-            for params in all_build_params:
-                index = {"algo": algo, "build_param": {}}
-                if group != "base":
-                    index_name = f"{algo}_{group}"
-                else:
-                    index_name = f"{algo}"
-                for i in range(len(params)):
-                    index["build_param"][param_names[i]] = params[i]
-                    index_name += "." + f"{param_names[i]}{params[i]}"
-
-                if "constraints" in algos_conf[algo]:
-                    if "build" in algos_conf[algo]["constraints"]:
-                        importable = algos_conf[algo]["constraints"]["build"]
-                        importable = importable.split(".")
-                        module = ".".join(importable[:-1])
-                        func = importable[-1]
-                        validator = import_module(module)
-                        build_constraints = getattr(validator, func)
-                        if "dims" not in conf_file["dataset"]:
-                            raise ValueError(
-                                "`dims` needed for build constraints but not "
-                                "specified in datasets.yaml"
-                            )
-                        if not build_constraints(
-                            index["build_param"], conf_file["dataset"]["dims"]
-                        ):
-                            continue
-                index_filename = (
-                    index_name
-                    if len(index_name) < 128
-                    else str(hash(index_name))
-                )
-                index["name"] = index_name
-                index["file"] = os.path.join(
-                    args.dataset_path, args.dataset, "index", index_filename
-                )
-                index["search_params"] = []
-                all_search_params = itertools.product(*search_param_lists)
-                for search_params in all_search_params:
-                    search_dict = dict()
-                    for i in range(len(search_params)):
-                        search_dict[search_param_names[i]] = search_params[i]
-                    if "constraints" in algos_conf[algo]:
-                        if "search" in algos_conf[algo]["constraints"]:
-                            importable = algos_conf[algo]["constraints"][
-                                "search"
-                            ]
-                            importable = importable.split(".")
-                            module = ".".join(importable[:-1])
-                            func = importable[-1]
-                            validator = import_module(module)
-                            search_constraints = getattr(validator, func)
-                            if search_constraints(
-                                search_dict,
-                                index["build_param"],
-                                k,
-                                batch_size,
-                            ):
-                                index["search_params"].append(search_dict)
-                    else:
-                        index["search_params"].append(search_dict)
-                executables_to_run[executable]["index"].append(index)
-
-                if len(index["search_params"]) == 0:
-                    print("No search parameters were added to configuration")
-
-    run_build_and_search(
-        conf_file,
-        f"{args.dataset}",
-        conf_filedir,
-        executables_to_run,
-        args.dataset_path,
-        args.force,
-        build,
-        search,
-        dry_run,
-        k,
-        batch_size,
-        args.search_threads,
-        mode,
-        args.raft_log_level,
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/algos.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/algos.yaml
deleted file mode 100644
index e382bdcba6..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/algos.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-faiss_gpu_flat:
-  executable: FAISS_GPU_FLAT_ANN_BENCH
-  requires_gpu: true
-faiss_gpu_ivf_flat:
-  executable: FAISS_GPU_IVF_FLAT_ANN_BENCH
-  requires_gpu: true
-faiss_gpu_ivf_pq:
-  executable: FAISS_GPU_IVF_PQ_ANN_BENCH
-  requires_gpu: true
-faiss_gpu_ivf_sq:
-  executable: FAISS_GPU_IVF_PQ_ANN_BENCH
-  requires_gpu: true
-faiss_cpu_flat:
-  executable: FAISS_CPU_FLAT_ANN_BENCH
-  requires_gpu: false
-faiss_cpu_ivf_flat:
-  executable: FAISS_CPU_IVF_FLAT_ANN_BENCH
-  requires_gpu: false
-faiss_cpu_ivf_pq:
-  executable: FAISS_CPU_IVF_PQ_ANN_BENCH
-  requires_gpu: false
-raft_ivf_flat:
-  executable: RAFT_IVF_FLAT_ANN_BENCH
-  requires_gpu: true
-raft_ivf_pq:
-  executable: RAFT_IVF_PQ_ANN_BENCH
-  requires_gpu: true
-raft_cagra:
-  executable: RAFT_CAGRA_ANN_BENCH
-  requires_gpu: true
-raft_brute_force:
-  executable: RAFT_BRUTE_FORCE_ANN_BENCH
-  requires_gpu: true
-ggnn:
-  executable: GGNN_ANN_BENCH
-  requires_gpu: true
-hnswlib:
-  executable: HNSWLIB_ANN_BENCH
-  requires_gpu: false
-raft_cagra_hnswlib:
-  executable: RAFT_CAGRA_HNSWLIB_ANN_BENCH
-  requires_gpu: true
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_flat.yaml
deleted file mode 100644
index 25eaf03d40..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_flat.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-name: faiss_cpu_flat
-groups:
-  base:
-    build:
-    search:
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml
deleted file mode 100644
index 29c145f86d..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_flat.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: faiss_cpu_ivf_flat
-groups:
-  base:
-    build:
-      nlist: [2048]
-      ratio: [10]
-      useFloat16: [False]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1]
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml
deleted file mode 100644
index a531ec8294..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_cpu_ivf_pq.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: faiss_cpu_ivf_pq
-groups:
-  base:
-    build:
-      nlist: [1024, 2048, 4096, 8192]
-      M: [48, 32, 16]
-      ratio: [10]
-      bitsPerCode: [8, 6, 5, 4]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200]
-  large:
-    build:
-      nlist: [8192, 16384, 32768, 65536]
-      M: [48, 32, 16]
-      ratio: [10]
-      bitsPerCode: [8, 6, 5, 4]
-    search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_flat.yaml
deleted file mode 100644
index a722e1b91c..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_flat.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-name: faiss_gpu_flat
-groups:
-  base:
-    build:
-    search:
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml
deleted file mode 100644
index e4abc35f5c..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_flat.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-name: faiss_gpu_ivf_flat
-groups:
-  base:
-    build:
-      nlist: [2048]
-      ratio: [10]
-      useFloat16: [False, True]
-      use_raft: [False]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1]
-groups:
-  baseraft:
-    build:
-      nlist: [2048]
-      ratio: [10]
-      useFloat16: [False, True]
-      use_raft: [True]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1]
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml
deleted file mode 100644
index 7560ceaa9c..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/faiss_gpu_ivf_pq.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-name: faiss_gpu_ivf_pq
-constraints:
-  build: raft-ann-bench.constraints.faiss_gpu_ivf_pq_build_constraints
-  search: raft-ann-bench.constraints.faiss_gpu_ivf_pq_search_constraints
-groups:
-  base:
-    build:
-      nlist: [1024, 2048, 4096, 8192]
-      M: [64, 32, 16]
-      ratio: [10]
-      usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [False]
-      bitsPerCode: [8]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1, 2, 4]
-  baseraft:
-    build:
-      nlist: [1024, 2048, 4096, 8192]
-      M: [64, 32, 16]
-      ratio: [10]
-      usePrecomputed: [False]
-      useFloat16: [False, True]
-      use_raft: [True]
-      bitsPerCode: [8, 6, 5, 4]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200]
-      refine_ratio: [1, 2, 4]
-  large:
-    build:
-      nlist: [8192, 16384, 32768, 65536]
-      M: [48, 32, 16]
-      ratio: [4]
-      usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [False]
-      bitsPerCode: [8]
-    search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      refine_ratio: [1, 2, 4]
-  largeraft:
-    build:
-      nlist: [8192, 16384, 32768, 65536]
-      M: [48, 32, 16]
-      ratio: [4]
-      usePrecomputed: [False]
-      useFloat16: [False, True]
-      use_raft: [True]
-      bitsPerCode: [8, 6, 5, 4]
-    search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      refine_ratio: [1, 2, 4]
-  100M:
-    build:
-      nlist: [50000]
-      M: [48]
-      ratio: [10]
-      usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [False]
-      bitsPerCode: [8]
-    search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      refine_ratio: [1]
-  100Mraft:
-    build:
-      nlist: [50000]
-      M: [48]
-      ratio: [10]
-      usePrecomputed: [False, True]
-      useFloat16: [False, True]
-      use_raft: [True]
-      bitsPerCode: [8, 6, 5, 4]
-    search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      refine_ratio: [1]
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/hnswlib.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/hnswlib.yaml
deleted file mode 100644
index e7a4e6b506..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/hnswlib.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-name: hnswlib
-constraints:
-  search: raft_ann_bench.constraints.hnswlib_search_constraints
-groups:
-  base:
-    build:
-      M: [12, 16, 24, 36]
-      efConstruction: [64, 128, 256, 512]
-    search:
-      ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_brute_force.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_brute_force.yaml
deleted file mode 100644
index da99841f9b..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_brute_force.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-name: raft_brute_force
-groups:
-  base:
-    build:
-    search:
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra.yaml
deleted file mode 100644
index bb66b4b232..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: raft_cagra
-constraints:
-  build: raft_ann_bench.constraints.raft_cagra_build_constraints
-  search: raft_ann_bench.constraints.raft_cagra_search_constraints
-groups:
-  base:
-    build:
-      graph_degree: [32, 64, 128, 256]
-      intermediate_graph_degree: [32, 64, 96, 128]
-      graph_build_algo: ["NN_DESCENT"]
-    search:
-      itopk: [32, 64, 128, 256, 512]
-      search_width: [1, 2, 4, 8, 16, 32, 64]
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra_hnswlib.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra_hnswlib.yaml
deleted file mode 100644
index 3ac2d16b68..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_cagra_hnswlib.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: raft_cagra_hnswlib
-constraints:
-  search: raft_ann_bench.constraints.hnswlib_search_constraints
-groups:
-  base:
-    build:
-      graph_degree: [32, 64, 128, 256]
-      intermediate_graph_degree: [32, 64, 96, 128]
-      graph_build_algo: ["NN_DESCENT"]
-    search:
-      ef: [10, 20, 40, 60, 80, 120, 200, 400, 600, 800]
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_flat.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_flat.yaml
deleted file mode 100644
index c36a26514d..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_flat.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-name: raft_ivf_flat
-groups:
-  base:
-    build:
-      nlist: [1024, 2048, 4096, 8192, 16384, 32000, 64000]
-      ratio: [1, 2, 4]
-      niter: [20, 25]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200, 500, 1000, 2000]
\ No newline at end of file
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml
deleted file mode 100644
index bcdcde42a2..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/algos/raft_ivf_pq.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-name: raft_ivf_pq
-constraints:
-  build: raft_ann_bench.constraints.raft_ivf_pq_build_constraints
-  search: raft_ann_bench.constraints.raft_ivf_pq_search_constraints
-groups:
-  base:
-    build:
-      nlist: [1024, 2048, 4096, 8192]
-      pq_dim: [64, 32, 16]
-      pq_bits: [8, 6, 5, 4]
-      ratio: [10]
-      niter: [25]
-    search:
-      nprobe: [1, 5, 10, 50, 100, 200]
-      internalDistanceDtype: ["float"]
-      smemLutDtype: ["float", "fp8", "half"]
-      refine_ratio: [1, 2, 4]
-  large:
-    build:
-      nlist: [8192, 16384, 32768, 65536]
-      pq_dim: [48, 32, 16]
-      pq_bits: [8, 6, 5, 4]
-      ratio: [4]
-      niter: [20]
-    search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      internalDistanceDtype: ["float"]
-      smemLutDtype: ["float", "fp8", "half"]
-      refine_ratio: [1, 2, 4]
-  100M:
-    build:
-      nlist: [50000]
-      pq_dim: [48]
-      pq_bits: [8, 6, 5, 4]
-      ratio: [10]
-      niter: [10]
-    search:
-      nprobe: [20, 30, 40, 50, 100, 200, 500, 1000]
-      internalDistanceDtype: ["float"]
-      smemLutDtype: ["float", "fp8", "half"]
-      refine_ratio: [1]
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/bigann-100M.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/bigann-100M.json
deleted file mode 100644
index 55abca25d2..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/bigann-100M.json
+++ /dev/null
@@ -1,192 +0,0 @@
-{
-  "dataset": {
-    "name": "bigann-100M",
-    "base_file": "bigann-1B/base.1B.u8bin",
-    "subset_size": 100000000,
-    "query_file": "bigann-1B/query.public.10K.u8bin",
-    "groundtruth_neighbors_file": "bigann-100M/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster5K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 5000, "pq_dim": 64, "ratio": 10},
-      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster10K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 10000, "pq_dim": 64, "ratio": 10},
-      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "hnswlib.M12",
-      "algo": "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M12",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M16",
-      "algo": "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M16",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M24",
-      "algo": "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M24",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M36",
-      "algo": "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M36",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist100K",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
-      "file": "bigann-100M/raft_ivf_flat/nlist100K",
-      "search_params": [
-        {"max_batch":10000, "max_k":10, "nprobe":20},
-        {"max_batch":10000, "max_k":10, "nprobe":30},
-        {"max_batch":10000, "max_k":10, "nprobe":40},
-        {"max_batch":10000, "max_k":10, "nprobe":50},
-        {"max_batch":10000, "max_k":10, "nprobe":100},
-        {"max_batch":10000, "max_k":10, "nprobe":200},
-        {"max_batch":10000, "max_k":10, "nprobe":500},
-        {"max_batch":10000, "max_k":10, "nprobe":1000}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 32},
-      "file": "bigann-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim64",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 64},
-      "file": "bigann-100M/raft_cagra/dim64",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/datasets.yaml b/python/raft-ann-bench/src/raft_ann_bench/run/conf/datasets.yaml
deleted file mode 100644
index 188d24d20f..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/datasets.yaml
+++ /dev/null
@@ -1,127 +0,0 @@
-- name: bigann-1B
-  base_file: bigann-1B/base.1B.u8bin
-  subset_size: 100000000
-  dims: 128
-  query_file: bigann-1B/query.public.10K.u8bin
-  groundtruth_neighbors_file: bigann-1B/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: deep-1B
-  base_file: deep-1B/base.1B.fbin
-  query_file: deep-1B/query.public.10K.fbin
-  dims: 96
-  groundtruth_neighbors_file: deep-1B/groundtruth.neighbors.ibin
-  distance: inner_product
-
-- name: bigann-100M
-  base_file: bigann-100M/base.1B.u8bin
-  subset_size: 100000000
-  dims: 128
-  query_file: bigann-100M/query.public.10K.u8bin
-  groundtruth_neighbors_file: bigann-100M/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: deep-image-96-inner
-  base_file: deep-image-96-inner/base.fbin
-  query_file: deep-image-96-inner/query.fbin
-  dims: 96
-  groundtruth_neighbors_file: deep-image-96-inner/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: fashion-mnist-784-euclidean
-  dims: 784
-  base_file: fashion-mnist-784-euclidean/base.fbin
-  query_file: fashion-mnist-784-euclidean/query.fbin
-  groundtruth_neighbors_file: fashion-mnist-784-euclidean/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: gist-960-euclidean
-  dims: 960
-  base_file: gist-960-euclidean/base.fbin
-  query_file: gist-960-euclidean/query.fbin
-  groundtruth_neighbors_file: gist-960-euclidean/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: glove-50-angular
-  dims: 50
-  base_file: glove-50-angular/base.fbin
-  query_file: glove-50-angular/query.fbin
-  groundtruth_neighbors_file: glove-50-angular/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: glove-50-inner
-  dims: 50
-  base_file: glove-50-inner/base.fbin
-  query_file: glove-50-inner/query.fbin
-  groundtruth_neighbors_file: glove-50-inner/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: glove-100-angular
-  dims: 100
-  base_file: glove-100-angular/base.fbin
-  query_file: glove-100-angular/query.fbin
-  groundtruth_neighbors_file: glove-100-angular/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: glove-100-inner
-  dims: 100
-  base_file: glove-100-inner/base.fbin
-  query_file: glove-100-inner/query.fbin
-  groundtruth_neighbors_file: glove-100-inner/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: lastfm-65-angular
-  dims: 65
-  base_file: lastfm-65-angular/base.fbin
-  query_file: lastfm-65-angular/query.fbin
-  groundtruth_neighbors_file: lastfm-65-angular/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: mnist-784-euclidean
-  dims: 784
-  base_file: mnist-784-euclidean/base.fbin
-  query_file: mnist-784-euclidean/query.fbin
-  groundtruth_neighbors_file: mnist-784-euclidean/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: nytimes-256-angular
-  dims: 256
-  base_file: nytimes-256-angular/base.fbin
-  query_file: nytimes-256-angular/query.fbin
-  groundtruth_neighbors_file: nytimes-256-angular/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: nytimes-256-inner
-  dims: 256
-  base_file: nytimes-256-inner/base.fbin
-  query_file: nytimes-256-inner/query.fbin
-  groundtruth_neighbors_file: nytimes-256-inner/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: sift-128-euclidean
-  dims: 128
-  base_file: sift-128-euclidean/base.fbin
-  query_file: sift-128-euclidean/query.fbin
-  groundtruth_neighbors_file: sift-128-euclidean/groundtruth.neighbors.ibin
-  distance: euclidean
-
-- name: wiki_all_1M
-  dims: 768
-  base_file: wiki_all_1M/base.1M.fbin
-  query_file: wiki_all_1M/queries.fbin
-  groundtruth_neighbors_file: wiki_all_1M/groundtruth.1M.neighbors.ibin
-  distance: euclidean
-
-- name: wiki_all_10M
-  dims: 768
-  base_file: wiki_all_10M/base.10M.fbin
-  query_file: wiki_all_10M/queries.fbin
-  groundtruth_neighbors_file: wiki_all_10M/groundtruth.10M.neighbors.ibin
-  distance: euclidean
-
-- name: wiki_all_88M
-  dims: 768
-  base_file: wiki_all_88M/base.88M.fbin
-  query_file: wiki_all_88M/queries.fbin
-  groundtruth_neighbors_file: wiki_all_88M/groundtruth.88M.neighbors.ibin
-  distance: euclidean
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-100M.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-100M.json
deleted file mode 100644
index ea92a0de18..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-100M.json
+++ /dev/null
@@ -1,458 +0,0 @@
-{
-  "dataset": {
-    "name": "deep-100M",
-    "base_file": "deep-100M/base.1B.fbin",
-    "subset_size": 100000000,
-    "query_file": "deep-100M/query.public.10K.fbin",
-    "groundtruth_neighbors_file": "deep-100M/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "hnswlib.M12",
-      "algo": "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M12",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M16",
-      "algo": "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M16",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M24",
-      "algo": "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M24",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M36",
-      "algo": "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M36",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist50K",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":50000},
-      "file": "deep-100M/faiss_gpu_ivf_flat/nlist50K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist100K",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":100000},
-      "file": "deep-100M/faiss_gpu_ivf_flat/nlist100K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist200K",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":200000},
-      "file": "deep-100M/faiss_gpu_ivf_flat/nlist200K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M48-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":16384, "M":48},
-      "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist16K",
-      "search_params": [
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M48-nlist50K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":50000, "M":48},
-      "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist50K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M48-nlist100K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":100000, "M":48},
-      "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist100K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M48-nlist200K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":200000, "M":48},
-      "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist200K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-
-
-    {
-      "name": "raft_ivf_flat.nlist50K",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 50000, "niter": 25, "ratio": 5},
-      "file": "deep-100M/raft_ivf_flat/nlist50K",
-      "search_params": [
-        {"max_batch":10000, "max_k":10, "nprobe":20},
-        {"max_batch":10000, "max_k":10, "nprobe":30},
-        {"max_batch":10000, "max_k":10, "nprobe":40},
-        {"max_batch":10000, "max_k":10, "nprobe":50},
-        {"max_batch":10000, "max_k":10, "nprobe":100},
-        {"max_batch":10000, "max_k":10, "nprobe":200},
-        {"max_batch":10000, "max_k":10, "nprobe":500},
-        {"max_batch":10000, "max_k":10, "nprobe":1000}
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist100K",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
-      "file": "deep-100M/raft_ivf_flat/nlist100K",
-      "search_params": [
-        {"max_batch":10000, "max_k":10, "nprobe":20},
-        {"max_batch":10000, "max_k":10, "nprobe":30},
-        {"max_batch":10000, "max_k":10, "nprobe":40},
-        {"max_batch":10000, "max_k":10, "nprobe":50},
-        {"max_batch":10000, "max_k":10, "nprobe":100},
-        {"max_batch":10000, "max_k":10, "nprobe":200},
-        {"max_batch":10000, "max_k":10, "nprobe":500},
-        {"max_batch":10000, "max_k":10, "nprobe":1000}
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist200K",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 200000, "niter": 25, "ratio": 5},
-      "file": "deep-100M/raft_ivf_flat/nlist200K",
-      "search_params": [
-        {"max_batch":10000, "max_k":10, "nprobe":20},
-        {"max_batch":10000, "max_k":10, "nprobe":30},
-        {"max_batch":10000, "max_k":10, "nprobe":40},
-        {"max_batch":10000, "max_k":10, "nprobe":50},
-        {"max_batch":10000, "max_k":10, "nprobe":100},
-        {"max_batch":10000, "max_k":10, "nprobe":200},
-        {"max_batch":10000, "max_k":10, "nprobe":500},
-        {"max_batch":10000, "max_k":10, "nprobe":1000}
-      ]
-    },
-{
-      "name": "raft_ivf_pq.d96b5n50K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"nlist": 50000, "pq_dim": 96, "pq_bits": 5, "ratio": 10, "niter": 25},
-      "file": "deep-100M/raft_ivf_pq/d96b5n50K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 },
-        { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d64b5n50K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"nlist": 50000, "pq_dim": 64, "pq_bits": 5, "ratio": 10, "niter": 25},
-      "file": "deep-100M/raft_ivf_pq/d64b5n50K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 },
-        { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },    
-    {
-      "name": "raft_cagra.dim32",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
-      "file": "deep-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta"},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta"},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "single_cta"},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "single_cta"},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "single_cta"},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "single_cta"},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "single_cta"},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "single_cta"},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "single_cta"},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "single_cta"},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "single_cta"},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "single_cta"}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32.multi_cta",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
-      "file": "deep-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta"},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta"},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta"},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta"},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta"},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta"},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta"},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta"},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta"},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta"},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta"},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta"}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32.multi_kernel",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
-      "file": "deep-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_kernel"},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_kernel"},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_kernel"},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_kernel"},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_kernel"},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_kernel"},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_kernel"},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_kernel"},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_kernel"},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_kernel"},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_kernel"},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_kernel"}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim64",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 64},
-      "file": "deep-100M/raft_cagra/dim64",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64}
-      ]
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-1B.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-1B.json
deleted file mode 100644
index e5190e073e..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-1B.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "dataset": {
-    "name": "deep-1B",
-    "base_file": "deep-1B/base.1B.fbin",
-    "query_file": "deep-1B/query.public.10K.fbin",
-    "groundtruth_neighbors_file": "deep-1B/groundtruth.neighbors.ibin",
-    "distance": "inner_product"
-  },
-
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "faiss_gpu_ivf_pq.M48-nlist50K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":50000, "M":48},
-      "file": "deep-1B/faiss_gpu_ivf_pq/M48-nlist50K",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-image-96-inner.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-image-96-inner.json
deleted file mode 100644
index 3d69e775a1..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/deep-image-96-inner.json
+++ /dev/null
@@ -1,1013 +0,0 @@
-{
-  "dataset": {
-    "name": "deep-image-96-inner",
-    "base_file": "deep-image-96-inner/base.fbin",
-    "query_file": "deep-image-96-inner/query.fbin",
-    "groundtruth_neighbors_file": "deep-image-96-inner/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-inner/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/deep-image-96-inner/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-inner/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/deep-image-96-inner/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-inner/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/deep-image-96-inner/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-inner/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/deep-image-96-inner/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-
-      "build_param": {},
-      "file": "index/deep-image-96-inner/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/deep-image-96-inner/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/deep-image-96-inner/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-
-      "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half"},
-        {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half"},
-        {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half"},
-        {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half"},
-        {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half"},
-        {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {"nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float"},
-        {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float"},
-        {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float"},
-        {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float"},
-        {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float"},
-        {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
-        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/deep-image-96-inner/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-inner/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/deep-image-96-inner/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/deep-image-96-inner/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/deep-image-96-inner/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/deep-image-96-inner/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/fashion-mnist-784-euclidean.json
deleted file mode 100644
index 2c86b0c4ee..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/fashion-mnist-784-euclidean.json
+++ /dev/null
@@ -1,1352 +0,0 @@
-{
-  "dataset": {
-    "name": "fashion-mnist-784-euclidean",
-    "base_file": "fashion-mnist-784-euclidean/base.fbin",
-    "query_file": "fashion-mnist-784-euclidean/query.fbin",
-    "groundtruth_neighbors_file": "fashion-mnist-784-euclidean/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/fashion-mnist-784-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/fashion-mnist-784-euclidean/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/gist-960-euclidean.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/gist-960-euclidean.json
deleted file mode 100644
index c5480900a7..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/gist-960-euclidean.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "gist-960-euclidean",
-    "base_file": "gist-960-euclidean/base.fbin",
-    "query_file": "gist-960-euclidean/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/gist-960-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/gist-960-euclidean/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/gist-960-euclidean/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/gist-960-euclidean/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-angular.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-angular.json
deleted file mode 100644
index 2074ef13a3..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-angular.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "glove-100-angular",
-    "base_file": "glove-100-angular/base.fbin",
-    "query_file": "glove-100-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/glove-100-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/glove-100-angular/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/glove-100-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/glove-100-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-100-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/glove-100-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-100-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json
deleted file mode 100644
index 5da3fa18d3..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-100-inner.json
+++ /dev/null
@@ -1,1314 +0,0 @@
-{
-  "dataset": {
-    "name": "glove-100-inner",
-    "base_file": "glove-100-inner/base.fbin",
-    "query_file": "glove-100-inner/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-inner/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-inner/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-inner/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-inner/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/glove-100-inner/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":1024},
-      "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":2048},
-      "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":4096},
-      "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":8192},
-      "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/glove-100-inner/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/glove-100-inner/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/glove-100-inner/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-inner/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":2048, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":4096, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":8192, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":16384, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":1024, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":2048, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":4096, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":8192, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":16384, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "glove-100-inner/faiss_gpu_flat/flat",
-      "search_params": [{}],
-      "search_result_file": "result/glove-100-inner/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_gpu_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-inner/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/glove-100-inner/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-inner/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/glove-100-inner/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-100-inner/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/glove-100-inner/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-100-inner/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-angular.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-angular.json
deleted file mode 100644
index 11fa07c5c9..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-angular.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "glove-50-angular",
-    "base_file": "glove-50-angular/base.fbin",
-    "query_file": "glove-50-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/glove-50-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/glove-50-angular/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/glove-50-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/glove-50-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-50-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/glove-50-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-50-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-inner.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-inner.json
deleted file mode 100644
index 32613b7c16..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/glove-50-inner.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "glove-50-inner",
-    "base_file": "glove-50-inner/base.fbin",
-    "query_file": "glove-50-inner/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-inner/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-inner/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-inner/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-inner/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-inner/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-inner/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-inner/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/glove-50-inner/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/glove-50-inner/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/glove-50-inner/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/glove-50-inner/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/glove-50-inner/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/glove-50-inner/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/glove-50-inner/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/glove-50-inner/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/glove-50-inner/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-inner/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/glove-50-inner/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/glove-50-inner/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "nprobe": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "nprobe": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-inner/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/glove-50-inner/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-inner/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/glove-50-inner/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-50-inner/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/glove-50-inner/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-50-inner/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/lastfm-65-angular.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/lastfm-65-angular.json
deleted file mode 100644
index 943d09231a..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/lastfm-65-angular.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "lastfm-65-angular",
-    "base_file": "lastfm-65-angular/base.fbin",
-    "query_file": "lastfm-65-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/lastfm-65-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/lastfm-65-angular/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/lastfm-65-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/lastfm-65-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/mnist-784-euclidean.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/mnist-784-euclidean.json
deleted file mode 100644
index 04e7ecb469..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/mnist-784-euclidean.json
+++ /dev/null
@@ -1,1352 +0,0 @@
-{
-  "dataset": {
-    "name": "mnist-784-euclidean",
-    "base_file": "mnist-784-euclidean/base.fbin",
-    "query_file": "mnist-784-euclidean/query.fbin",
-    "groundtruth_neighbors_file": "mnist-784-euclidean/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/mnist-784-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/mnist-784-euclidean/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/mnist-784-euclidean/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/mnist-784-euclidean/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-angular.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-angular.json
deleted file mode 100644
index df2a16f1f8..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-angular.json
+++ /dev/null
@@ -1,1352 +0,0 @@
-{
-  "dataset": {
-    "name": "nytimes-256-angular",
-    "base_file": "nytimes-256-angular/base.fbin",
-    "query_file": "nytimes-256-angular/query.fbin",
-    "groundtruth_neighbors_file": "nytimes-256-angular/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/nytimes-256-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/nytimes-256-angular/faiss_gpu_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_gpu_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/nytimes-256-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/nytimes-256-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-inner.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-inner.json
deleted file mode 100644
index 18942a95c3..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/nytimes-256-inner.json
+++ /dev/null
@@ -1,1352 +0,0 @@
-{
-  "dataset": {
-    "name": "nytimes-256-inner",
-    "base_file": "nytimes-256-inner/base.fbin",
-    "query_file": "nytimes-256-inner/query.fbin",
-    "groundtruth_neighbors_file": "nytimes-256-inner/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-inner/hnswlib/M12",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-inner/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-inner/hnswlib/M16",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-inner/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-inner/hnswlib/M24",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-inner/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-inner/hnswlib/M36",
-      "search_params" : [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ],
-      "search_result_file" : "result/nytimes-256-inner/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/nytimes-256-inner/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/nytimes-256-inner/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/nytimes-256-inner/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/nytimes-256-inner/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-inner/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 32
-      },
-      "file" : "index/nytimes-256-inner/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/nytimes-256-inner/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "graph_degree" : 64
-      },
-      "file" : "index/nytimes-256-inner/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/nytimes-256-inner/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/sift-128-euclidean.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/sift-128-euclidean.json
deleted file mode 100644
index 791261251a..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/sift-128-euclidean.json
+++ /dev/null
@@ -1,498 +0,0 @@
-{
-  "dataset": {
-    "name": "sift-128-euclidean",
-    "base_file": "sift-128-euclidean/base.fbin",
-    "query_file": "sift-128-euclidean/query.fbin",
-    "groundtruth_neighbors_file": "sift-128-euclidean/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "hnswlib.M12",
-      "algo": "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M12",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M16",
-      "algo": "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M16",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M24",
-      "algo": "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M24",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "hnswlib.M36",
-      "algo": "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M36",
-      "search_params": [
-        {"ef":10},
-        {"ef":20},
-        {"ef":40},
-        {"ef":60},
-        {"ef":80},
-        {"ef":120},
-        {"ef":200},
-        {"ef":400},
-        {"ef":600},
-        {"ef":800}
-      ]
-    },
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "sift-128-euclidean/raft_bfknn/bfknn",
-      "search_params": [{"probe": 1}]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 1024},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist1024",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 2048},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist2048",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 4096},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist4096",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 8192},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist8192",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 16384},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist16384",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist": 1024, "M": 64, "useFloat16": true, "usePrecomputed": true},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "sift-128-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 1024, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 2048, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 4096, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 8192, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 16384, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 1024, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 2048,"quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 4096, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 8192, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 16384, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "faiss_gpu_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "sift-128-euclidean/faiss_gpu_flat/flat",
-      "search_params": [{}]
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-bitpq8-cluster1K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 64, "pq_bits": 8, "ratio": 1},
-      "file": "sift-128-euclidean/raft_ivf_pq/dimpq64-bitpq8-cluster1K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-bitpq6-cluster1K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 128, "pq_bits": 6, "ratio": 1},
-      "file": "sift-128-euclidean/raft_ivf_pq/dimpq128-bitpq6-cluster1K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 1024, "ratio": 1, "niter": 25},
-      "file": "sift-128-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 16384, "ratio": 2, "niter": 20},
-      "file": "sift-128-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 32},
-      "file": "sift-128-euclidean/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim64",
-      "algo": "raft_cagra",
-      "build_param": {"graph_degree": 64},
-      "file": "sift-128-euclidean/raft_cagra/dim64",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    }
-  ]
-}
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_10M.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_10M.json
deleted file mode 100644
index e5f77e7858..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_10M.json
+++ /dev/null
@@ -1,200 +0,0 @@
-{
-  "dataset": {
-    "name": "wiki_all_10M",
-    "base_file": "wiki_all_10M/base.88M.fbin",
-    "query_file": "wiki_all_10M/queries.fbin",
-    "groundtruth_neighbors_file": "wiki_all_10M/groundtruth.88M.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-  "index": [
-    {
-      "name": "hnswlib.M16.ef50",
-      "algo": "hnswlib",
-      "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 },
-      "file": "wiki_all_10M/hnswlib/M16.ef50",
-      "search_params": [
-        { "ef": 10, "numThreads": 56 },
-        { "ef": 20, "numThreads": 56 },
-        { "ef": 40, "numThreads": 56 },
-        { "ef": 60, "numThreads": 56 },
-        { "ef": 80, "numThreads": 56 },
-        { "ef": 120, "numThreads": 56 },
-        { "ef": 200, "numThreads": 56 },
-        { "ef": 400, "numThreads": 56 },
-        { "ef": 600, "numThreads": 56 },
-        { "ef": 800, "numThreads": 56 }
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M32-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "M": 32,
-        "nlist": 16384,
-        "ratio": 2
-      },
-      "file": "wiki_all_10M/faiss_ivf_pq/M32-nlist16K_ratio2",
-      "search_params": [
-        { "nprobe": 10 },
-        { "nprobe": 20 },
-        { "nprobe": 30 },
-        { "nprobe": 40 },
-        { "nprobe": 50 },
-        { "nprobe": 100 },
-        { "nprobe": 200 },
-        { "nprobe": 500 }
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "M": 64,
-        "nlist": 16384,
-        "ratio": 2
-      },
-      "file": "wiki_all_10M/faiss_ivf_pq/M64-nlist16K_ratio2",
-      "search_params": [
-        { "nprobe": 10 },
-        { "nprobe": 20 },
-        { "nprobe": 30 },
-        { "nprobe": 40 },
-        { "nprobe": 50 },
-        { "nprobe": 100 },
-        { "nprobe": 200 },
-        { "nprobe": 500 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d128-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 128,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_10M/raft_ivf_pq/d128-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d64-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 64,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_10M/raft_ivf_pq/d64-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d32-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 32,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_10M/raft_ivf_pq/d32-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d32X-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 32,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_10M/raft_ivf_pq/d32-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }
-
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32.multi_cta",
-      "algo": "raft_cagra",
-      "build_param": { "graph_degree": 32, "intermediate_graph_degree": 48 },
-      "file": "wiki_all_10M/raft_cagra/dim32.ibin",
-      "search_params": [
-        { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" },
-        { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" },
-        { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" },
-        { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" },
-        { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" }
-      ]
-    }
-
-  ]
-}
-
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_1M.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_1M.json
deleted file mode 100644
index 2d1ec1e322..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_1M.json
+++ /dev/null
@@ -1,216 +0,0 @@
-{
-  "dataset": {
-    "name": "wiki_all_1M",
-    "base_file": "wiki_all_1M/base.1M.fbin",
-    "subset_size": 1000000,
-    "query_file": "wiki_all_1M/queries.fbin",
-    "groundtruth_neighbors_file": "wiki_all_1M/groundtruth.1M.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-  "index": [
-    {
-      "name": "hnswlib.M16.ef50",
-      "algo": "hnswlib",
-      "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 },
-      "file": "wiki_all_1M/hnswlib/M16.ef50",
-      "search_params": [
-        { "ef": 10, "numThreads": 56 },
-        { "ef": 20, "numThreads": 56 },
-        { "ef": 40, "numThreads": 56 },
-        { "ef": 60, "numThreads": 56 },
-        { "ef": 80, "numThreads": 56 },
-        { "ef": 120, "numThreads": 56 },
-        { "ef": 200, "numThreads": 56 },
-        { "ef": 400, "numThreads": 56 },
-        { "ef": 600, "numThreads": 56 },
-        { "ef": 800, "numThreads": 56 }
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M32-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "M": 32,
-        "nlist": 16384,
-        "ratio": 2
-      },
-      "file": "wiki_all_1M/faiss_ivf_pq/M32-nlist16K_ratio2",
-      "search_params": [
-        { "nprobe": 10 },
-        { "nprobe": 20 },
-        { "nprobe": 30 },
-        { "nprobe": 40 },
-        { "nprobe": 50 },
-        { "nprobe": 100 },
-        { "nprobe": 200 },
-        { "nprobe": 500 }
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "M": 64,
-        "nlist": 16384,
-        "ratio": 2
-      },
-      "file": "wiki_all_1M/faiss_ivf_pq/M64-nlist16K_ratio2",
-      "search_params": [
-        { "nprobe": 10 },
-        { "nprobe": 20 },
-        { "nprobe": 30 },
-        { "nprobe": 40 },
-        { "nprobe": 50 },
-        { "nprobe": 100 },
-        { "nprobe": 200 },
-        { "nprobe": 500 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d128-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 128,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_1M/raft_ivf_pq/d128-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d64-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 64,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_1M/raft_ivf_pq/d64-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d32-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 32,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_1M/raft_ivf_pq/d32-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d32X-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 32,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_1M/raft_ivf_pq/d32-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }
-
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32.multi_cta",
-      "algo": "raft_cagra",
-      "build_param": { "graph_degree": 32,
-        "intermediate_graph_degree": 48,
-        "graph_build_algo": "NN_DESCENT",
-        "ivf_pq_build_pq_dim": 32,
-        "ivf_pq_build_pq_bits": 8,
-        "ivf_pq_build_nlist": 16384,
-        "ivf_pq_build_niter": 10,
-        "ivf_pq_build_ratio": 10,
-        "ivf_pq_search_nprobe": 30,
-        "ivf_pq_search_internalDistanceDtype": "half",
-        "ivf_pq_search_smemLutDtype": "half",
-        "ivf_pq_search_refine_ratio": 8,
-        "nn_descent_max_iterations": 10,
-        "nn_descent_intermediate_graph_degree": 72,
-        "nn_descent_termination_threshold": 0.001
-      },
-      "file": "wiki_all_1M/raft_cagra/dim32.ibin",
-      "search_params": [
-        { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" },
-        { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" },
-        { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" },
-        { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" },
-        { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" }
-      ]
-    }
-
-  ]
-}
-
diff --git a/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_88M.json b/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_88M.json
deleted file mode 100644
index e50b40f554..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/run/conf/wiki_all_88M.json
+++ /dev/null
@@ -1,200 +0,0 @@
-{
-  "dataset": {
-    "name": "wiki_all_88M",
-    "base_file": "wiki_all_88M/base.88M.fbin",
-    "query_file": "wiki_all_88M/queries.fbin",
-    "groundtruth_neighbors_file": "wiki_all_88M/groundtruth.88M.neighbors.ibin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-  "index": [
-    {
-      "name": "hnswlib.M16.ef50",
-      "algo": "hnswlib",
-      "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 },
-      "file": "wiki_all_88M/hnswlib/M16.ef50",
-      "search_params": [
-        { "ef": 10, "numThreads": 56 },
-        { "ef": 20, "numThreads": 56 },
-        { "ef": 40, "numThreads": 56 },
-        { "ef": 60, "numThreads": 56 },
-        { "ef": 80, "numThreads": 56 },
-        { "ef": 120, "numThreads": 56 },
-        { "ef": 200, "numThreads": 56 },
-        { "ef": 400, "numThreads": 56 },
-        { "ef": 600, "numThreads": 56 },
-        { "ef": 800, "numThreads": 56 }
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M32-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "M": 32,
-        "nlist": 16384,
-        "ratio": 2
-      },
-      "file": "wiki_all_88M/faiss_ivf_pq/M32-nlist16K_ratio2",
-      "search_params": [
-        { "nprobe": 10 },
-        { "nprobe": 20 },
-        { "nprobe": 30 },
-        { "nprobe": 40 },
-        { "nprobe": 50 },
-        { "nprobe": 100 },
-        { "nprobe": 200 },
-        { "nprobe": 500 }
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "M": 64,
-        "nlist": 16384,
-        "ratio": 2
-      },
-      "file": "wiki_all_88M/faiss_ivf_pq/M64-nlist16K_ratio2",
-      "search_params": [
-        { "nprobe": 10 },
-        { "nprobe": 20 },
-        { "nprobe": 30 },
-        { "nprobe": 40 },
-        { "nprobe": 50 },
-        { "nprobe": 100 },
-        { "nprobe": 200 },
-        { "nprobe": 500 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d128-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 128,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_88M/raft_ivf_pq/d128-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d64-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 64,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_88M/raft_ivf_pq/d64-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d32-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 32,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_88M/raft_ivf_pq/d32-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.d32X-nlist16K",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "pq_dim": 32,
-        "pq_bits": 8,
-        "nlist": 16384,
-        "niter": 10,
-        "ratio": 10
-      },
-      "file": "wiki_all_88M/raft_ivf_pq/d32-nlist16K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }
-
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32.multi_cta",
-      "algo": "raft_cagra",
-      "build_param": { "graph_degree": 32, "intermediate_graph_degree": 48 },
-      "file": "wiki_all_88M/raft_cagra/dim32.ibin",
-      "search_params": [
-        { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" },
-        { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" },
-        { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" },
-        { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" },
-        { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" },
-        { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" },
-        { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" },
-        { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" },
-        { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" }
-      ]
-    }
-
-  ]
-}
-
diff --git a/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/__main__.py b/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/__main__.py
deleted file mode 100644
index c65360ebb0..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/__main__.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-import sys
-
-
-def split_groundtruth(groundtruth_filepath):
-    ann_bench_scripts_path = os.path.join(
-        os.path.dirname(os.path.realpath(__file__)), "split_groundtruth.pl"
-    )
-    pwd = os.getcwd()
-    path_to_groundtruth = os.path.normpath(groundtruth_filepath).split(os.sep)
-    if len(path_to_groundtruth) > 1:
-        os.chdir(os.path.join(*path_to_groundtruth[:-1]))
-    groundtruth_filename = path_to_groundtruth[-1]
-    subprocess.run(
-        [ann_bench_scripts_path, groundtruth_filename, "groundtruth"],
-        check=True,
-    )
-    os.chdir(pwd)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--groundtruth",
-        help="Path to billion-scale dataset groundtruth file",
-        required=True,
-    )
-
-    if len(sys.argv) == 1:
-        parser.print_help()
-        sys.exit(1)
-    args = parser.parse_args()
-
-    split_groundtruth(args.groundtruth)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl b/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl
deleted file mode 100755
index b0a59f806c..0000000000
--- a/python/raft-ann-bench/src/raft_ann_bench/split_groundtruth/split_groundtruth.pl
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/perl
-
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-
-use warnings;
-use strict;
-use autodie qw(open close);
-
-
-@ARGV == 2
-  or die "usage: $0 input output_prefix\n";
-
-open my $fh, '<:raw', $ARGV[0];
-
-my $raw;
-read($fh, $raw, 8);
-my ($nrows, $dim) = unpack('LL', $raw);
-
-my $expected_size = 8 + $nrows * $dim * (4 + 4);
-my $size = (stat($fh))[7];
-$size == $expected_size
-  or die("error: expected size is $expected_size, but actual size is $size\n");
-
-
-open my $fh_out1, '>:raw', "$ARGV[1].neighbors.ibin";
-open my $fh_out2, '>:raw', "$ARGV[1].distances.fbin";
-
-print {$fh_out1} $raw;
-print {$fh_out2} $raw;
-
-read($fh, $raw, $nrows * $dim * 4);
-print {$fh_out1} $raw;
-read($fh, $raw, $nrows * $dim * 4);
-print {$fh_out2} $raw;
diff --git a/python/raft-dask/CMakeLists.txt b/python/raft-dask/CMakeLists.txt
index 197ddae05f..9ebbaa5298 100644
--- a/python/raft-dask/CMakeLists.txt
+++ b/python/raft-dask/CMakeLists.txt
@@ -45,7 +45,6 @@ if(NOT raft_FOUND)
   # raft-dask doesn't actually use raft libraries, it just needs the headers, so we can turn off all
   # library compilation and we don't need to install anything here.
   set(BUILD_TESTS OFF)
-  set(BUILD_ANN_BENCH OFF)
   set(BUILD_PRIMS_BENCH OFF)
   set(RAFT_COMPILE_LIBRARIES OFF)
   set(RAFT_COMPILE_DIST_LIBRARY OFF)