From 61f0d943696f1676e1451a035d3da87106529a80 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 21 Jul 2023 05:04:25 +0200 Subject: [PATCH] Cagra ANN benchmark improvements (#1658) This PR improves CAGRA ANN benchmarks: - fixes search itopk parameter handling - adds more search and build parameters - improves logging Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1658 --- cpp/bench/ann/src/raft/raft_benchmark.cu | 7 +++- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 23 ++++++++----- .../neighbors/detail/cagra/cagra_build.cuh | 32 ++++++++++++------- .../raft/spatial/knn/detail/ann_utils.cuh | 2 +- 4 files changed, 42 insertions(+), 22 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 22204c2b61..b43f52eb5c 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -132,13 +132,18 @@ void parse_build_param(const nlohmann::json& conf, param.graph_degree = conf.at("index_dim"); param.intermediate_graph_degree = param.graph_degree * 2; } + if (conf.contains("intermediate_graph_degree")) { + param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); + } } template void parse_search_param(const nlohmann::json& conf, typename raft::bench::ann::RaftCagra::SearchParam& param) { - param.itopk_size = conf.at("itopk"); + if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } + if (conf.contains("search_width")) { param.p.num_parents = conf.at("search_width"); } + if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } } #endif diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 399fd6a0a8..79ae746078 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -47,7 +47,7 @@ class RaftCagra : public ANN { using typename ANN::AnnSearchParam; struct SearchParam : public AnnSearchParam { - unsigned itopk_size; + raft::neighbors::experimental::cagra::search_params p; }; using BuildParam = raft::neighbors::experimental::cagra::index_params; @@ -71,7 +71,7 @@ class RaftCagra : public ANN { AlgoProperty get_property() const override { AlgoProperty property; - property.dataset_memory_type = MemoryType::Device; + property.dataset_memory_type = MemoryType::Host; property.query_memory_type = MemoryType::Device; property.need_dataset_when_search = true; return property; @@ -104,14 +104,24 @@ RaftCagra::RaftCagra(Metric metric, int dim, const BuildParam& param) template void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) { - auto dataset_view = raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); + if (get_property().dataset_memory_type == MemoryType::Host) { + auto dataset_view = raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace( + raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); + } else { + auto dataset_view = + raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace( + raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); + } return; } template void RaftCagra::set_search_param(const AnnSearchParam& param) { + auto search_param = dynamic_cast(param); + search_params_ = search_param.p; return; } @@ -146,11 +156,8 @@ void RaftCagra::search( auto neighbors_view = raft::make_device_matrix_view(neighbors_IdxT, batch_size, k); auto distances_view = raft::make_device_matrix_view(distances, batch_size, k); - raft::neighbors::experimental::cagra::search_params search_params; - search_params.max_queries = batch_size; - search_params.itopk_size = search_params_.max_queries; raft::neighbors::experimental::cagra::search( - handle_, search_params, *index_, queries_view, neighbors_view, distances_view); + handle_, search_params_, *index_, queries_view, neighbors_view, distances_view); if (!std::is_same::value) { raft::linalg::unaryOp(neighbors, diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index 5c196471aa..d2bf7bf1ed 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -135,6 +135,9 @@ void build_knn_graph(raft::resources const& res, resource::get_cuda_stream(res), device_memory); + size_t next_report_offset = 0; + size_t d_report_offset = dataset.extent(0) / 100; // Report progress in 1% steps. + for (const auto& batch : vec_batches) { auto queries_view = raft::make_device_matrix_view( batch.data(), batch.size(), batch.row_width()); @@ -212,18 +215,23 @@ void build_knn_graph(raft::resources const& res, size_t num_queries_done = batch.offset() + batch.size(); const auto end_clock = std::chrono::system_clock::now(); - const auto time = - std::chrono::duration_cast(end_clock - start_clock).count() * 1e-6; - const auto throughput = num_queries_done / time; - RAFT_LOG_DEBUG( - "# Search %12lu / %12lu (%3.2f %%), %e queries/sec, %.2f minutes ETA, self included = " - "%3.2f %% \r", - num_queries_done, - dataset.extent(0), - num_queries_done / static_cast(dataset.extent(0)) * 100, - throughput, - (num_queries - num_queries_done) / throughput / 60, - static_cast(num_self_included) / num_queries_done * 100.); + if (batch.offset() > next_report_offset) { + next_report_offset += d_report_offset; + const auto time = + std::chrono::duration_cast(end_clock - start_clock).count() * + 1e-6; + const auto throughput = num_queries_done / time; + + RAFT_LOG_INFO( + "# Search %12lu / %12lu (%3.2f %%), %e queries/sec, %.2f minutes ETA, self included = " + "%3.2f %% \r", + num_queries_done, + dataset.extent(0), + num_queries_done / static_cast(dataset.extent(0)) * 100, + throughput, + (num_queries - num_queries_done) / throughput / 60, + static_cast(num_self_included) / num_queries_done * 100.); + } first = false; } if (!first) RAFT_LOG_DEBUG("# Finished building kNN graph"); diff --git a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh index 850b741dfd..1ce041d8da 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_utils.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_utils.cuh @@ -466,7 +466,7 @@ struct batch_load_iterator { if (source_ == nullptr) { return; } if (needs_copy_) { if (size() > 0) { - RAFT_LOG_DEBUG("batch_load_iterator::copy(offset = %zu, size = %zu, row_width = %zu)", + RAFT_LOG_TRACE("batch_load_iterator::copy(offset = %zu, size = %zu, row_width = %zu)", size_t(offset()), size_t(size()), size_t(row_width()));