From bd55afe071ac69dba54562955902964d86b3c96c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 19 Oct 2023 00:30:00 +0200 Subject: [PATCH 01/10] Add IVF-PQ config options for CAGRA index building --- cpp/bench/ann/src/raft/raft_benchmark.cu | 32 ++++++--- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 72 +++++++++++++++++---- 2 files changed, 85 insertions(+), 19 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index fa20c5c223..2120f295af 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -34,8 +34,10 @@ extern template class raft::bench::ann::RaftIvfFlatGpu; extern template class raft::bench::ann::RaftIvfFlatGpu; extern template class raft::bench::ann::RaftIvfFlatGpu; #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ +#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) #include "raft_ivf_pq_wrapper.h" +#endif +#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftIvfPQ; @@ -69,7 +71,7 @@ void parse_search_param(const nlohmann::json& conf, } #endif -#ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ +#if defined(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ) || defined(RAFT_ANN_BENCH_USE_RAFT_CAGRA) template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftIvfPQ::BuildParam& param) @@ -141,20 +143,34 @@ void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftCagra::BuildParam& param) { if (conf.contains("graph_degree")) { - param.graph_degree = conf.at("graph_degree"); - param.intermediate_graph_degree = param.graph_degree * 2; + param.cagra_params.graph_degree = conf.at("graph_degree"); + param.cagra_params.intermediate_graph_degree = param.cagra_params.graph_degree * 2; } if (conf.contains("intermediate_graph_degree")) { - param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); + param.cagra_params.intermediate_graph_degree = conf.at("intermediate_graph_degree"); } if (conf.contains("graph_build_algo")) { if (conf.at("graph_build_algo") == "IVF_PQ") { - param.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ; + param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ; } else if (conf.at("graph_build_algo") == "NN_DESCENT") { - param.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; + param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; } } - if (conf.contains("nn_descent_niter")) { param.nn_descent_niter = conf.at("nn_descent_niter"); } + if (conf.contains("nn_descent_niter")) { + param.cagra_params.nn_descent_niter = conf.at("nn_descent_niter"); + } + if (conf.contains("ivf_pq_build_params")) { + raft::neighbors::ivf_pq::index_params bparam; + parse_build_param(conf.at("ivf_pq_build_params"), bparam); + param.ivf_pq_build_params = bparam; + std::cout << "Parsed ivf_pq build params, pq_dim=" << param.ivf_pq_build_params->pq_dim + << std::endl; + } + if (conf.contains("ivf_pq_search_params")) { + typename raft::bench::ann::RaftIvfPQ::SearchParam sparam; + parse_search_param(conf.at("ivf_pq_search_params"), sparam); + param.ivf_pq_search_params = sparam.pq_param; + } } template diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 19c5151186..6c688c6d62 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +52,12 @@ class RaftCagra : public ANN { auto needs_dataset() const -> bool override { return true; } }; - using BuildParam = raft::neighbors::cagra::index_params; + struct BuildParam { + raft::neighbors::cagra::index_params cagra_params; + std::optional ivf_pq_refine_rate = std::nullopt; + std::optional ivf_pq_build_params = std::nullopt; + std::optional ivf_pq_search_params = std::nullopt; + }; RaftCagra(Metric metric, int dim, const BuildParam& param, int concurrent_searches = 1) : ANN(metric, dim), @@ -59,7 +66,8 @@ class RaftCagra : public ANN { mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull) { rmm::mr::set_current_device_resource(&mr_); - index_params_.metric = parse_metric_type(metric); + index_params_.cagra_params.metric = parse_metric_type(metric); + index_params_.ivf_pq_build_params->metric = parse_metric_type(metric); RAFT_CUDA_TRY(cudaGetDevice(&device_)); } @@ -105,17 +113,59 @@ class RaftCagra : public ANN { template void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) { - if (raft::get_device_for_address(dataset) == -1) { - auto dataset_view = - raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); - return; + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); + + auto& params = index_params_.cagra_params; + + // Copied from cagra.cuh + size_t intermediate_degree = params.intermediate_graph_degree; + size_t graph_degree = params.graph_degree; + if (intermediate_degree >= static_cast(nrow)) { + RAFT_LOG_WARN( + "Intermediate graph degree cannot be larger than dataset size, reducing it to %lu", nrow); + intermediate_degree = nrow - 1; + } + if (intermediate_degree < graph_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " + "graph_degree.", + graph_degree, + intermediate_degree); + graph_degree = intermediate_degree; + } + + std::optional> knn_graph( + raft::make_host_matrix(nrow, intermediate_degree)); + + if (params.build_algo == raft::neighbors::cagra::graph_build_algo::IVF_PQ) { + raft::neighbors::cagra::build_knn_graph(handle_, + dataset_view, + knn_graph->view(), + index_params_.ivf_pq_refine_rate, + index_params_.ivf_pq_build_params, + index_params_.ivf_pq_search_params); + } else { - auto dataset_view = - raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); - return; + // Use nn-descent to build CAGRA knn graph + auto nn_descent_params = raft::neighbors::experimental::nn_descent::index_params(); + nn_descent_params.graph_degree = intermediate_degree; + nn_descent_params.intermediate_graph_degree = 1.5 * intermediate_degree; + nn_descent_params.max_iterations = params.nn_descent_niter; + raft::neighbors::cagra::build_knn_graph( + handle_, dataset_view, knn_graph->view(), nn_descent_params); } + + auto cagra_graph = raft::make_host_matrix(nrow, graph_degree); + + raft::neighbors::cagra::optimize(handle_, knn_graph->view(), cagra_graph.view()); + + // free intermediate graph before trying to create the index + knn_graph.reset(); + + index_.emplace(raft::neighbors::cagra::index( + handle_, params.metric, dataset_view, raft::make_const_mdspan(cagra_graph.view()))); + return; } template From 7779b0d7a2699911bcbb83c80cff5aded60eebc0 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 19 Oct 2023 00:57:54 +0200 Subject: [PATCH 02/10] Refactor cagra::build move implementation to detail --- cpp/bench/ann/src/raft/raft_benchmark.cu | 2 - cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 54 ++----------- cpp/include/raft/neighbors/cagra.cuh | 59 +------------- .../neighbors/detail/cagra/cagra_build.cuh | 78 +++++++++++++++++++ 4 files changed, 87 insertions(+), 106 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 2120f295af..b82f8c0b4c 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -163,8 +163,6 @@ void parse_build_param(const nlohmann::json& conf, raft::neighbors::ivf_pq::index_params bparam; parse_build_param(conf.at("ivf_pq_build_params"), bparam); param.ivf_pq_build_params = bparam; - std::cout << "Parsed ivf_pq build params, pq_dim=" << param.ivf_pq_build_params->pq_dim - << std::endl; } if (conf.contains("ivf_pq_search_params")) { typename raft::bench::ann::RaftIvfPQ::SearchParam sparam; diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 6c688c6d62..a9e01d62ea 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -118,53 +119,12 @@ void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) auto& params = index_params_.cagra_params; - // Copied from cagra.cuh - size_t intermediate_degree = params.intermediate_graph_degree; - size_t graph_degree = params.graph_degree; - if (intermediate_degree >= static_cast(nrow)) { - RAFT_LOG_WARN( - "Intermediate graph degree cannot be larger than dataset size, reducing it to %lu", nrow); - intermediate_degree = nrow - 1; - } - if (intermediate_degree < graph_degree) { - RAFT_LOG_WARN( - "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " - "graph_degree.", - graph_degree, - intermediate_degree); - graph_degree = intermediate_degree; - } - - std::optional> knn_graph( - raft::make_host_matrix(nrow, intermediate_degree)); - - if (params.build_algo == raft::neighbors::cagra::graph_build_algo::IVF_PQ) { - raft::neighbors::cagra::build_knn_graph(handle_, - dataset_view, - knn_graph->view(), - index_params_.ivf_pq_refine_rate, - index_params_.ivf_pq_build_params, - index_params_.ivf_pq_search_params); - - } else { - // Use nn-descent to build CAGRA knn graph - auto nn_descent_params = raft::neighbors::experimental::nn_descent::index_params(); - nn_descent_params.graph_degree = intermediate_degree; - nn_descent_params.intermediate_graph_degree = 1.5 * intermediate_degree; - nn_descent_params.max_iterations = params.nn_descent_niter; - raft::neighbors::cagra::build_knn_graph( - handle_, dataset_view, knn_graph->view(), nn_descent_params); - } - - auto cagra_graph = raft::make_host_matrix(nrow, graph_degree); - - raft::neighbors::cagra::optimize(handle_, knn_graph->view(), cagra_graph.view()); - - // free intermediate graph before trying to create the index - knn_graph.reset(); - - index_.emplace(raft::neighbors::cagra::index( - handle_, params.metric, dataset_view, raft::make_const_mdspan(cagra_graph.view()))); + index_.emplace(raft::neighbors::cagra::detail::build(handle_, + params, + dataset_view, + index_params_.ivf_pq_refine_rate, + index_params_.ivf_pq_build_params, + index_params_.ivf_pq_search_params)); return; } diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index 1efb4da95e..73fb012f37 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -224,22 +224,7 @@ void optimize(raft::resources const& res, mdspan, row_major, g_accessor> knn_graph, raft::host_matrix_view new_graph) { - using internal_IdxT = typename std::make_unsigned::type; - - auto new_graph_internal = raft::make_host_matrix_view( - reinterpret_cast(new_graph.data_handle()), - new_graph.extent(0), - new_graph.extent(1)); - - using g_accessor_internal = - host_device_accessor, memory_type::host>; - auto knn_graph_internal = - mdspan, row_major, g_accessor_internal>( - reinterpret_cast(knn_graph.data_handle()), - knn_graph.extent(0), - knn_graph.extent(1)); - - cagra::detail::graph::optimize(res, knn_graph_internal, new_graph_internal); + detail::optimize(res, knn_graph, new_graph); } /** @@ -290,47 +275,7 @@ index build(raft::resources const& res, const index_params& params, mdspan, row_major, Accessor> dataset) { - size_t intermediate_degree = params.intermediate_graph_degree; - size_t graph_degree = params.graph_degree; - if (intermediate_degree >= static_cast(dataset.extent(0))) { - RAFT_LOG_WARN( - "Intermediate graph degree cannot be larger than dataset size, reducing it to %lu", - dataset.extent(0)); - intermediate_degree = dataset.extent(0) - 1; - } - if (intermediate_degree < graph_degree) { - RAFT_LOG_WARN( - "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " - "graph_degree.", - graph_degree, - intermediate_degree); - graph_degree = intermediate_degree; - } - - std::optional> knn_graph( - raft::make_host_matrix(dataset.extent(0), intermediate_degree)); - - if (params.build_algo == graph_build_algo::IVF_PQ) { - build_knn_graph(res, dataset, knn_graph->view()); - - } else { - // Use nn-descent to build CAGRA knn graph - auto nn_descent_params = experimental::nn_descent::index_params(); - nn_descent_params.graph_degree = intermediate_degree; - nn_descent_params.intermediate_graph_degree = 1.5 * intermediate_degree; - nn_descent_params.max_iterations = params.nn_descent_niter; - build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); - } - - auto cagra_graph = raft::make_host_matrix(dataset.extent(0), graph_degree); - - optimize(res, knn_graph->view(), cagra_graph.view()); - - // free intermediate graph before trying to create the index - knn_graph.reset(); - - // Construct an index from dataset and optimized knn graph. - return index(res, params.metric, dataset, raft::make_const_mdspan(cagra_graph.view())); + return detail::build(res, params, dataset); } /** diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index 40024a3deb..acc3f995d9 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -264,4 +264,82 @@ void build_knn_graph(raft::resources const& res, graph::sort_knn_graph(res, dataset, knn_graph_internal); } +template , memory_type::host>> +void optimize(raft::resources const& res, + mdspan, row_major, g_accessor> knn_graph, + raft::host_matrix_view new_graph) +{ + using internal_IdxT = typename std::make_unsigned::type; + + auto new_graph_internal = raft::make_host_matrix_view( + reinterpret_cast(new_graph.data_handle()), + new_graph.extent(0), + new_graph.extent(1)); + + using g_accessor_internal = + host_device_accessor, memory_type::host>; + auto knn_graph_internal = + mdspan, row_major, g_accessor_internal>( + reinterpret_cast(knn_graph.data_handle()), + knn_graph.extent(0), + knn_graph.extent(1)); + + cagra::detail::graph::optimize(res, knn_graph_internal, new_graph_internal); +} + +template , memory_type::host>> +index build(raft::resources const& res, + const index_params& params, + mdspan, row_major, Accessor> dataset, + std::optional refine_rate = std::nullopt, + std::optional build_params = std::nullopt, + std::optional search_params = std::nullopt) +{ + size_t intermediate_degree = params.intermediate_graph_degree; + size_t graph_degree = params.graph_degree; + if (intermediate_degree >= static_cast(dataset.extent(0))) { + RAFT_LOG_WARN( + "Intermediate graph degree cannot be larger than dataset size, reducing it to %lu", + dataset.extent(0)); + intermediate_degree = dataset.extent(0) - 1; + } + if (intermediate_degree < graph_degree) { + RAFT_LOG_WARN( + "Graph degree (%lu) cannot be larger than intermediate graph degree (%lu), reducing " + "graph_degree.", + graph_degree, + intermediate_degree); + graph_degree = intermediate_degree; + } + + std::optional> knn_graph( + raft::make_host_matrix(dataset.extent(0), intermediate_degree)); + + if (params.build_algo == graph_build_algo::IVF_PQ) { + build_knn_graph(res, dataset, knn_graph->view(), refine_rate, build_params, search_params); + + } else { + // Use nn-descent to build CAGRA knn graph + auto nn_descent_params = experimental::nn_descent::index_params(); + nn_descent_params.graph_degree = intermediate_degree; + nn_descent_params.intermediate_graph_degree = 1.5 * intermediate_degree; + nn_descent_params.max_iterations = params.nn_descent_niter; + build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); + } + + auto cagra_graph = raft::make_host_matrix(dataset.extent(0), graph_degree); + + optimize(res, knn_graph->view(), cagra_graph.view()); + + // free intermediate graph before trying to create the index + knn_graph.reset(); + + // Construct an index from dataset and optimized knn graph. + return index(res, params.metric, dataset, raft::make_const_mdspan(cagra_graph.view())); +} } // namespace raft::neighbors::cagra::detail From 4f46e1f3e7586d8a1cae34f7ca3b92bb7e64667a Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 19 Oct 2023 01:21:56 +0200 Subject: [PATCH 03/10] Process refine ration input param --- cpp/bench/ann/src/raft/raft_benchmark.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index b82f8c0b4c..6e5c71ae69 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -168,6 +168,7 @@ void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftIvfPQ::SearchParam sparam; parse_search_param(conf.at("ivf_pq_search_params"), sparam); param.ivf_pq_search_params = sparam.pq_param; + param.ivf_pq_refine_rate = sparam.refine_ratio; } } From a281964117b14956d2dc4015807b5b2b4baa1ed1 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 19 Oct 2023 01:24:55 +0200 Subject: [PATCH 04/10] add wiki_all.json config --- .../src/raft-ann-bench/run/conf/wiki_all.json | 248 ++++++++++++++++++ 1 file changed, 248 insertions(+) create mode 100755 python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json new file mode 100755 index 0000000000..1de3757f6d --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json @@ -0,0 +1,248 @@ +{ + "dataset": { + "name": "wiki_all-88M", + "base_file": "wiki/all/base.88M.fbin", + "query_file": "wiki/all/queries.fbin", + "groundtruth_neighbors_file": "wiki/all/groundtruth.88M.neighbors.ibin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 10000, + "k": 10 + }, + "index": [ + { + "name": "hnswlib.M16.ef50", + "algo": "hnswlib", + "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 }, + "file": "wiki_all/hnswlib/M16.ef50", + "search_params": [ + { "ef": 10, "numThreads": 56 }, + { "ef": 20, "numThreads": 56 }, + { "ef": 40, "numThreads": 56 }, + { "ef": 60, "numThreads": 56 }, + { "ef": 80, "numThreads": 56 }, + { "ef": 120, "numThreads": 56 }, + { "ef": 200, "numThreads": 56 }, + { "ef": 400, "numThreads": 56 }, + { "ef": 600, "numThreads": 56 }, + { "ef": 800, "numThreads": 56 } + ] + }, + { + "name": "faiss_ivf_pq.M32-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 32, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all/faiss_ivf_pq/M32-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "faiss_ivf_pq.M64-nlist16K", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "M": 64, + "nlist": 16384, + "ratio": 2 + }, + "file": "wiki_all/faiss_ivf_pq/M64-nlist16K_ratio2", + "search_params": [ + { "nprobe": 10 }, + { "nprobe": 20 }, + { "nprobe": 30 }, + { "nprobe": 40 }, + { "nprobe": 50 }, + { "nprobe": 100 }, + { "nprobe": 200 }, + { "nprobe": 500 } + ] + }, + { + "name": "raft_ivf_pq.d128-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 128, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d128-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 } + ] + }, + { + "name": "raft_ivf_pq.d64-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 64, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d64-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + ] + }, + { + "name": "raft_ivf_pq.d32-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 } + ] + }, + { + "name": "raft_ivf_pq.d32X-nlist16K", + "algo": "raft_ivf_pq", + "build_param": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "file": "wiki_all/raft_ivf_pq/d32-nlist16K", + "search_params": [ + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + + ] + }, + { + "name": "raft_cagra.dim32.single_cta", + "algo": "raft_cagra", + "build_param": { + "graph_degree": 32, + "intermediate_graph_degree": 48 , + "ivf_pq_build_params": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "ivf_pq_search_params": { "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }}, + "file": "/workspace1/index/wiki_all/raft_cagra/dim32.ibin", + "search_params": [ + { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "single_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "single_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "single_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "single_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "single_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "single_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "single_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "single_cta" }, + { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "single_cta" }, + { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "single_cta" }, + { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "single_cta" }, + { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "single_cta" }, + { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "single_cta" }, + { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "single_cta" }, + { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "single_cta" }, + { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "single_cta" }, + { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "single_cta" }, + { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "single_cta" } + ] + }, + { + "name": "raft_cagra.dim32.multi_cta", + "algo": "raft_cagra", + "build_param": { + "graph_degree": 32, + "intermediate_graph_degree": 48, + "ivf_pq_build_params": { + "pq_dim": 32, + "pq_bits": 8, + "nlist": 16384, + "niter": 10, + "ratio": 10 + }, + "ivf_pq_search_params": { "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } + }, + "file": "/workspace1/index/wiki_all/raft_cagra/dim32.ibin", + "search_params": [ + { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" }, + { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" }, + { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" }, + { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" }, + { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" }, + { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" }, + { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" } + ] + } + + ] +} From 96946456a60d1d56ee7d0df0529411026194c13d Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 6 Nov 2023 00:29:05 +0100 Subject: [PATCH 05/10] Specify nn_descent_params through benchmark config file --- cpp/bench/ann/src/raft/raft_benchmark.cu | 19 +++++++++++++ cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 4 +++ .../neighbors/detail/cagra/cagra_build.cuh | 28 +++++++++++-------- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 404e56d036..e9cbfb4811 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -139,6 +139,20 @@ void parse_search_param(const nlohmann::json& conf, #endif #ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA +template +void parse_build_param(const nlohmann::json& conf, + raft::neighbors::experimental::nn_descent::index_params& param) +{ + if (conf.contains("graph_degree")) { param.graph_degree = conf.at("graph_degree"); } + if (conf.contains("intermediate_graph_degree")) { + param.graph_degree = conf.at("intermediate_graph_degree"); + } + if (conf.contains("max_iterations")) { param.graph_degree = conf.at("max_iterations"); } + if (conf.contains("termination_threshold")) { + param.graph_degree = conf.at("termination_threshold"); + } +} + template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftCagra::BuildParam& param) @@ -171,6 +185,11 @@ void parse_build_param(const nlohmann::json& conf, param.ivf_pq_search_params = sparam.pq_param; param.ivf_pq_refine_rate = sparam.refine_ratio; } + if (conf.contains("nn_descent_params")) { + raft::neighbors::experimental::nn_descent::index_params nn_param; + parse_build_param(conf.at("nn_descent_params"), nn_param); + param.nn_descent_params = nn_param; + } } template diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 337c597b0c..73fae027bc 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,8 @@ class RaftCagra : public ANN { struct BuildParam { raft::neighbors::cagra::index_params cagra_params; + std::optional nn_descent_params = + std::nullopt; std::optional ivf_pq_refine_rate = std::nullopt; std::optional ivf_pq_build_params = std::nullopt; std::optional ivf_pq_search_params = std::nullopt; @@ -116,6 +119,7 @@ void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) index_.emplace(raft::neighbors::cagra::detail::build(handle_, params, dataset_view, + index_params_.nn_descent_params, index_params_.ivf_pq_refine_rate, index_params_.ivf_pq_build_params, index_params_.ivf_pq_search_params)); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index acc3f995d9..ddaf77a22f 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -293,12 +293,14 @@ template , memory_type::host>> -index build(raft::resources const& res, - const index_params& params, - mdspan, row_major, Accessor> dataset, - std::optional refine_rate = std::nullopt, - std::optional build_params = std::nullopt, - std::optional search_params = std::nullopt) +index build( + raft::resources const& res, + const index_params& params, + mdspan, row_major, Accessor> dataset, + std::optional nn_descent_params = std::nullopt, + std::optional refine_rate = std::nullopt, + std::optional pq_build_params = std::nullopt, + std::optional search_params = std::nullopt) { size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -321,15 +323,17 @@ index build(raft::resources const& res, raft::make_host_matrix(dataset.extent(0), intermediate_degree)); if (params.build_algo == graph_build_algo::IVF_PQ) { - build_knn_graph(res, dataset, knn_graph->view(), refine_rate, build_params, search_params); + build_knn_graph(res, dataset, knn_graph->view(), refine_rate, pq_build_params, search_params); } else { // Use nn-descent to build CAGRA knn graph - auto nn_descent_params = experimental::nn_descent::index_params(); - nn_descent_params.graph_degree = intermediate_degree; - nn_descent_params.intermediate_graph_degree = 1.5 * intermediate_degree; - nn_descent_params.max_iterations = params.nn_descent_niter; - build_knn_graph(res, dataset, knn_graph->view(), nn_descent_params); + if (!nn_descent_params) { + nn_descent_params = experimental::nn_descent::index_params(); + nn_descent_params->graph_degree = intermediate_degree; + nn_descent_params->intermediate_graph_degree = 1.5 * intermediate_degree; + nn_descent_params->max_iterations = params.nn_descent_niter; + } + build_knn_graph(res, dataset, knn_graph->view(), *nn_descent_params); } auto cagra_graph = raft::make_host_matrix(dataset.extent(0), graph_degree); From 37e35e834e3c023e69f82e6e9450eb31f5b1458d Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 6 Nov 2023 15:05:45 +0100 Subject: [PATCH 06/10] Fix template params --- cpp/include/raft/neighbors/cagra.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index 73fb012f37..384ed05e1f 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -275,7 +275,7 @@ index build(raft::resources const& res, const index_params& params, mdspan, row_major, Accessor> dataset) { - return detail::build(res, params, dataset); + return detail::build(res, params, dataset); } /** From 5704e97cd65b7bd2cd35bc68a74da5fb96d9ec06 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 7 Nov 2023 02:29:18 +0100 Subject: [PATCH 07/10] Flatten CAGRA build config --- cpp/bench/ann/src/raft/raft_benchmark.cu | 51 +++- .../src/raft-ann-bench/run/conf/wiki_all.json | 248 ------------------ .../raft-ann-bench/run/conf/wiki_all_1M.json | 23 +- 3 files changed, 57 insertions(+), 265 deletions(-) delete mode 100755 python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index e9cbfb4811..fb7d83a829 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -77,7 +78,7 @@ template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::RaftIvfPQ::BuildParam& param) { - param.n_lists = conf.at("nlist"); + if (conf.contains("nlist")) { param.n_lists = conf.at("nlist"); } if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); } if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); } if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); } @@ -99,7 +100,7 @@ template void parse_search_param(const nlohmann::json& conf, typename raft::bench::ann::RaftIvfPQ::SearchParam& param) { - param.pq_param.n_probes = conf.at("nprobe"); + if (conf.contains("nprobe")) { param.pq_param.n_probes = conf.at("nprobe"); } if (conf.contains("internalDistanceDtype")) { std::string type = conf.at("internalDistanceDtype"); if (type == "float") { @@ -145,12 +146,28 @@ void parse_build_param(const nlohmann::json& conf, { if (conf.contains("graph_degree")) { param.graph_degree = conf.at("graph_degree"); } if (conf.contains("intermediate_graph_degree")) { - param.graph_degree = conf.at("intermediate_graph_degree"); + param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); } - if (conf.contains("max_iterations")) { param.graph_degree = conf.at("max_iterations"); } + // we allow niter shorthand for max_iterations + if (conf.contains("niter")) { param.max_iterations = conf.at("niter"); } + if (conf.contains("max_iterations")) { param.max_iterations = conf.at("max_iterations"); } if (conf.contains("termination_threshold")) { - param.graph_degree = conf.at("termination_threshold"); + param.termination_threshold = conf.at("termination_threshold"); + } +} + +nlohmann::json collect_conf_with_prefix(const nlohmann::json& conf, + const std::string& prefix, + bool remove_prefix = true) +{ + nlohmann::json out; + for (auto& i : conf.items()) { + if (i.key().compare(0, prefix.size(), prefix) == 0) { + auto new_key = remove_prefix ? i.key().substr(prefix.size()) : i.key(); + out[new_key] = i.value(); + } } + return out; } template @@ -171,23 +188,29 @@ void parse_build_param(const nlohmann::json& conf, param.cagra_params.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT; } } - if (conf.contains("nn_descent_niter")) { - param.cagra_params.nn_descent_niter = conf.at("nn_descent_niter"); - } - if (conf.contains("ivf_pq_build_params")) { + nlohmann::json ivf_pq_build_conf = collect_conf_with_prefix(conf, "ivf_pq_build_"); + if (!ivf_pq_build_conf.empty()) { raft::neighbors::ivf_pq::index_params bparam; - parse_build_param(conf.at("ivf_pq_build_params"), bparam); + parse_build_param(ivf_pq_build_conf, bparam); param.ivf_pq_build_params = bparam; } - if (conf.contains("ivf_pq_search_params")) { + nlohmann::json ivf_pq_search_conf = collect_conf_with_prefix(conf, "ivf_pq_search_"); + if (!ivf_pq_search_conf.empty()) { typename raft::bench::ann::RaftIvfPQ::SearchParam sparam; - parse_search_param(conf.at("ivf_pq_search_params"), sparam); + parse_search_param(ivf_pq_search_conf, sparam); param.ivf_pq_search_params = sparam.pq_param; param.ivf_pq_refine_rate = sparam.refine_ratio; } - if (conf.contains("nn_descent_params")) { + nlohmann::json nn_descent_conf = collect_conf_with_prefix(conf, "nn_descent_"); + if (!nn_descent_conf.empty()) { raft::neighbors::experimental::nn_descent::index_params nn_param; - parse_build_param(conf.at("nn_descent_params"), nn_param); + nn_param.intermediate_graph_degree = 1.5 * param.cagra_params.intermediate_graph_degree; + parse_build_param(nn_descent_conf, nn_param); + if (nn_param.graph_degree != param.cagra_params.intermediate_graph_degree) { + RAFT_LOG_WARN( + "nn_descent_graph_degree has to be equal to CAGRA intermediate_grpah_degree, overriding"); + nn_param.graph_degree = param.cagra_params.intermediate_graph_degree; + } param.nn_descent_params = nn_param; } } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json deleted file mode 100755 index 1de3757f6d..0000000000 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all.json +++ /dev/null @@ -1,248 +0,0 @@ -{ - "dataset": { - "name": "wiki_all-88M", - "base_file": "wiki/all/base.88M.fbin", - "query_file": "wiki/all/queries.fbin", - "groundtruth_neighbors_file": "wiki/all/groundtruth.88M.neighbors.ibin", - "distance": "euclidean" - }, - "search_basic_param": { - "batch_size": 10000, - "k": 10 - }, - "index": [ - { - "name": "hnswlib.M16.ef50", - "algo": "hnswlib", - "build_param": { "M": 16, "efConstruction": 50, "numThreads": 56 }, - "file": "wiki_all/hnswlib/M16.ef50", - "search_params": [ - { "ef": 10, "numThreads": 56 }, - { "ef": 20, "numThreads": 56 }, - { "ef": 40, "numThreads": 56 }, - { "ef": 60, "numThreads": 56 }, - { "ef": 80, "numThreads": 56 }, - { "ef": 120, "numThreads": 56 }, - { "ef": 200, "numThreads": 56 }, - { "ef": 400, "numThreads": 56 }, - { "ef": 600, "numThreads": 56 }, - { "ef": 800, "numThreads": 56 } - ] - }, - { - "name": "faiss_ivf_pq.M32-nlist16K", - "algo": "faiss_gpu_ivf_pq", - "build_param": { - "M": 32, - "nlist": 16384, - "ratio": 2 - }, - "file": "wiki_all/faiss_ivf_pq/M32-nlist16K_ratio2", - "search_params": [ - { "nprobe": 10 }, - { "nprobe": 20 }, - { "nprobe": 30 }, - { "nprobe": 40 }, - { "nprobe": 50 }, - { "nprobe": 100 }, - { "nprobe": 200 }, - { "nprobe": 500 } - ] - }, - { - "name": "faiss_ivf_pq.M64-nlist16K", - "algo": "faiss_gpu_ivf_pq", - "build_param": { - "M": 64, - "nlist": 16384, - "ratio": 2 - }, - "file": "wiki_all/faiss_ivf_pq/M64-nlist16K_ratio2", - "search_params": [ - { "nprobe": 10 }, - { "nprobe": 20 }, - { "nprobe": 30 }, - { "nprobe": 40 }, - { "nprobe": 50 }, - { "nprobe": 100 }, - { "nprobe": 200 }, - { "nprobe": 500 } - ] - }, - { - "name": "raft_ivf_pq.d128-nlist16K", - "algo": "raft_ivf_pq", - "build_param": { - "pq_dim": 128, - "pq_bits": 8, - "nlist": 16384, - "niter": 10, - "ratio": 10 - }, - "file": "wiki_all/raft_ivf_pq/d128-nlist16K", - "search_params": [ - { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, - { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, - { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, - { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, - { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, - { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 }, - { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 1 } - ] - }, - { - "name": "raft_ivf_pq.d64-nlist16K", - "algo": "raft_ivf_pq", - "build_param": { - "pq_dim": 64, - "pq_bits": 8, - "nlist": 16384, - "niter": 10, - "ratio": 10 - }, - "file": "wiki_all/raft_ivf_pq/d64-nlist16K", - "search_params": [ - { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } - ] - }, - { - "name": "raft_ivf_pq.d32-nlist16K", - "algo": "raft_ivf_pq", - "build_param": { - "pq_dim": 32, - "pq_bits": 8, - "nlist": 16384, - "niter": 10, - "ratio": 10 - }, - "file": "wiki_all/raft_ivf_pq/d32-nlist16K", - "search_params": [ - { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, - { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, - { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, - { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, - { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, - { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 }, - { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 32 } - ] - }, - { - "name": "raft_ivf_pq.d32X-nlist16K", - "algo": "raft_ivf_pq", - "build_param": { - "pq_dim": 32, - "pq_bits": 8, - "nlist": 16384, - "niter": 10, - "ratio": 10 - }, - "file": "wiki_all/raft_ivf_pq/d32-nlist16K", - "search_params": [ - { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, - { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, - { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, - { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, - { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, - { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, - { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 16 }, - { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, - { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, - { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, - { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, - { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, - { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 8 }, - { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, - { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } - - ] - }, - { - "name": "raft_cagra.dim32.single_cta", - "algo": "raft_cagra", - "build_param": { - "graph_degree": 32, - "intermediate_graph_degree": 48 , - "ivf_pq_build_params": { - "pq_dim": 32, - "pq_bits": 8, - "nlist": 16384, - "niter": 10, - "ratio": 10 - }, - "ivf_pq_search_params": { "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }}, - "file": "/workspace1/index/wiki_all/raft_cagra/dim32.ibin", - "search_params": [ - { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "single_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "single_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "single_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "single_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "single_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "single_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "single_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "single_cta" }, - { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "single_cta" }, - { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "single_cta" }, - { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "single_cta" }, - { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "single_cta" }, - { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "single_cta" }, - { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "single_cta" }, - { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "single_cta" }, - { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "single_cta" }, - { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "single_cta" }, - { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "single_cta" } - ] - }, - { - "name": "raft_cagra.dim32.multi_cta", - "algo": "raft_cagra", - "build_param": { - "graph_degree": 32, - "intermediate_graph_degree": 48, - "ivf_pq_build_params": { - "pq_dim": 32, - "pq_bits": 8, - "nlist": 16384, - "niter": 10, - "ratio": 10 - }, - "ivf_pq_search_params": { "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 } - }, - "file": "/workspace1/index/wiki_all/raft_cagra/dim32.ibin", - "search_params": [ - { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 36, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 40, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 44, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 1, "max_iterations": 48, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 16, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 24, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 26, "algo": "multi_cta" }, - { "itopk": 32, "search_width": 2, "max_iterations": 32, "algo": "multi_cta" }, - { "itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta" }, - { "itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta" }, - { "itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta" }, - { "itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta" }, - { "itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta" }, - { "itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta" }, - { "itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta" }, - { "itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta" }, - { "itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta" }, - { "itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta" } - ] - } - - ] -} diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json index 6eb72a65a1..8c8d3c3da0 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json @@ -1,9 +1,10 @@ { "dataset": { "name": "wiki_all_1M", - "base_file": "wiki_all_1M/base.88M.fbin", + "base_file": "wiki_all_1M/base.1M.fbin", + "subset_size": 1000000, "query_file": "wiki_all_1M/queries.fbin", - "groundtruth_neighbors_file": "wiki_all_1M/groundtruth.88M.neighbors.ibin", + "groundtruth_neighbors_file": "wiki_all_1M/groundtruth.1M.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { @@ -169,7 +170,23 @@ { "name": "raft_cagra.dim32.multi_cta", "algo": "raft_cagra", - "build_param": { "graph_degree": 32, "intermediate_graph_degree": 48 }, + "build_param": { "graph_degree": 32, + "intermediate_graph_degree": 48, + "graph_build_algo": "NN_DESCENT", + "ivf_pq_build_pq_dim": 32, + "ivf_pq_build_pq_bits": 8, + "ivf_pq_build_nlist": 16384, + "ivf_pq_build_niter": 10, + "ivf_pq_build_ratio": 10, + "ivf_pq_search_nprobe": 30, + "ivf_pq_search_internalDistanceDtype": "half", + "ivf_pq_search_smemLutDtype": "half", + "ivf_pq_search_refine_ratio": 8, + "nn_descent_max_iterations": 10, + "nn_descent_graph_degree": 48, + "nn_descent_intermediate_graph_degree": 72, + "nn_descent_termination_threshold": 0.001 + }, "file": "wiki_all_1M/raft_cagra/dim32.ibin", "search_params": [ { "itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta" }, From 9a7c36c21b77c24431f5a83dbae20fa8ccf01b14 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 7 Nov 2023 03:03:14 +0100 Subject: [PATCH 08/10] Edited tuning guide --- docs/source/ann_benchmarks_param_tuning.md | 24 +++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index d787a96955..39d0227d1c 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -53,7 +53,6 @@ CAGRA uses a graph-based index, which creates an intermediate, approximate kNN g | `graph_degree` | `build_param` | N | Positive Integer >0 | 64 | Degree of the final kNN graph index. | | `intermediate_graph_degree` | `build_param` | N | Positive Integer >0 | 128 | Degree of the intermediate kNN graph. | | `graph_build_algo` | `build_param` | N | ["IVF_PQ", "NN_DESCENT"] | "IVF_PQ" | Algorithm to use for search | -| `nn_descent_niter` | `build_param` | N | Positive Integer>0 | 20 | Number of iterations if using NN_DESCENT. | | `dataset_memory_type` | `build_param` | N | ["device", "host", "mmap"] | "device" | What memory type should the dataset reside? | | `query_memory_type` | `search_params` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? | | `itopk` | `search_wdith` | N | Positive Integer >0 | 64 | Number of intermediate search results retained during the search. Higher values improve search accuracy at the cost of speed. | @@ -61,6 +60,29 @@ CAGRA uses a graph-based index, which creates an intermediate, approximate kNN g | `max_iterations` | `search_param` | N | Integer >=0 | 0 | Upper limit of search iterations. Auto select when 0. | | `algo` | `search_param` | N | string | "auto" | Algorithm to use for search. Possible values: {"auto", "single_cta", "multi_cta", "multi_kernel"} | +To fine tune CAGRA index building we can customize IVF-PQ index builder options using the following settings. These take effect only if `graph_build_algo == "IVF_PQ"`. It is recommended to experiment using a separate IVF-PQ index to find the config that gives the largest QPS for large batch. Recall does not need to be very high, since CAGRA further optimizes the kNN neighbor graph. Some of the default values are derived from the dataset size which is assumed to be [n_vecs, dim]. + +| Parameter | Type | Required | Data Type | Default | Description | +|------------------------|----------------|---|----------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `ivf_pq_build_nlist` | `build_param` | N | Positive Integer >0 | n_vecs / 2500 | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. | +| `ivf_pq_build_niter` | `build_param` | N | Positive Integer >0 | 25 | Number of k-means iterations to use when training the clusters. | +| `ivf_pq_build_ratio` | `build_param` | N | Positive Integer >0 | 10 | `1/ratio` is the number of training points which should be used to train the clusters. | +| `ivf_pq_build_pq_dim` | `build_param` | N | Positive Integer. Multiple of 8. | dim/2 rounded up to 8 | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8. | +| `ivf_pq_build_pq_bits` | `build_param` | N | Positive Integer. [4-8] | 8 | Bit length of the vector element after quantization. | +| `ivf_pq_build_codebook_kind` | `build_param` | N | ["cluster", "subspace"] | "subspace" | Type of codebook. See the [API docs](https://docs.rapids.ai/api/raft/nightly/cpp_api/neighbors_ivf_pq/#_CPPv412codebook_gen) for more detail | +| `ivf_pq_search_nprobe` | `build_params` | N | Positive Integer >0 | min(2*dim, nlist) | The closest number of clusters to search for each query vector. | +| `ivf_pq_search_internalDistanceDtype` | `build_params` | N | [`float`, `half`] | `fp8` | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy. | +| `ivf_pq_search_smemLutDtype` | `build_params` | N | [`float`, `half`, `fp8`] | `half` | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy. | +| `ivf_pq_search_refine_ratio` | `build_params` | N| Positive Number >=0 | 2 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | + +Alternatively, if `graph_build_algo == "NN_DESCENT"`, then we can customize the following parameters +| Parameter | Type | Required | Data Type | Default | Description | +|-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `nn_descent_niter` | `build_param` | N | Positive Integer>0 | 20 | Number of iterations if using NN_DESCENT. | +| `nn_descent_intermediate_graph_degree` | `build_param` | N | Positive Integer>0 | `intermediate_graph_degree` * 1.5 | Number of iterations if +| `nn_descent_max_iterations` | `build_param` | N | Positive Integer>0 | 20 | Alias for `nn_descent_niter` +| `nn_descent_termination_threshold` | `build_param` | N | Positive float>0 | 0.0001 | Number of iterations if +ch | ## FAISS Indexes From f38a7c2b1110aa3c9e5f6ea3ba8e30a43d82b689 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 7 Nov 2023 03:07:41 +0100 Subject: [PATCH 09/10] Edited tuning guide --- docs/source/ann_benchmarks_param_tuning.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 39d0227d1c..cdc7958714 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -78,11 +78,10 @@ To fine tune CAGRA index building we can customize IVF-PQ index builder options Alternatively, if `graph_build_algo == "NN_DESCENT"`, then we can customize the following parameters | Parameter | Type | Required | Data Type | Default | Description | |-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `nn_descent_niter` | `build_param` | N | Positive Integer>0 | 20 | Number of iterations if using NN_DESCENT. | -| `nn_descent_intermediate_graph_degree` | `build_param` | N | Positive Integer>0 | `intermediate_graph_degree` * 1.5 | Number of iterations if -| `nn_descent_max_iterations` | `build_param` | N | Positive Integer>0 | 20 | Alias for `nn_descent_niter` -| `nn_descent_termination_threshold` | `build_param` | N | Positive float>0 | 0.0001 | Number of iterations if -ch | +| `nn_descent_niter` | `build_param` | N | Positive Integer>0 | 20 | Number of NN Descent iterations. | +| `nn_descent_intermediate_graph_degree` | `build_param` | N | Positive Integer>0 | `intermediate_graph_degree` * 1.5 | Intermadiate graph degree during NN descent iterations | +| `nn_descent_max_iterations` | `build_param` | N | Positive Integer>0 | 20 | Alias for `nn_descent_niter` | +| `nn_descent_termination_threshold` | `build_param` | N | Positive float>0 | 0.0001 | Termination threshold for NN descent. | ## FAISS Indexes From bd48c4fd49b111ba7bc5fa354e11aa42a4098416 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 7 Nov 2023 19:00:38 +0100 Subject: [PATCH 10/10] remove nn_descent_graph_degree from json --- .../raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json | 1 - 1 file changed, 1 deletion(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json index 8c8d3c3da0..2d1ec1e322 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/wiki_all_1M.json @@ -183,7 +183,6 @@ "ivf_pq_search_smemLutDtype": "half", "ivf_pq_search_refine_ratio": 8, "nn_descent_max_iterations": 10, - "nn_descent_graph_degree": 48, "nn_descent_intermediate_graph_degree": 72, "nn_descent_termination_threshold": 0.001 },