Skip to content

Commit

Permalink
Cagra ANN benchmark improvements (#1658)
Browse files Browse the repository at this point in the history
This PR improves CAGRA ANN benchmarks:
- fixes search itopk parameter handling
- adds more search and build parameters
- improves logging

Authors:
  - Tamas Bela Feher (https://github.com/tfeher)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #1658
  • Loading branch information
tfeher authored Jul 21, 2023
1 parent dad78de commit 61f0d94
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 22 deletions.
7 changes: 6 additions & 1 deletion cpp/bench/ann/src/raft/raft_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,18 @@ void parse_build_param(const nlohmann::json& conf,
param.graph_degree = conf.at("index_dim");
param.intermediate_graph_degree = param.graph_degree * 2;
}
if (conf.contains("intermediate_graph_degree")) {
param.intermediate_graph_degree = conf.at("intermediate_graph_degree");
}
}

template <typename T, typename IdxT>
void parse_search_param(const nlohmann::json& conf,
typename raft::bench::ann::RaftCagra<T, IdxT>::SearchParam& param)
{
param.itopk_size = conf.at("itopk");
if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); }
if (conf.contains("search_width")) { param.p.num_parents = conf.at("search_width"); }
if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); }
}
#endif

Expand Down
23 changes: 15 additions & 8 deletions cpp/bench/ann/src/raft/raft_cagra_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class RaftCagra : public ANN<T> {
using typename ANN<T>::AnnSearchParam;

struct SearchParam : public AnnSearchParam {
unsigned itopk_size;
raft::neighbors::experimental::cagra::search_params p;
};

using BuildParam = raft::neighbors::experimental::cagra::index_params;
Expand All @@ -71,7 +71,7 @@ class RaftCagra : public ANN<T> {
AlgoProperty get_property() const override
{
AlgoProperty property;
property.dataset_memory_type = MemoryType::Device;
property.dataset_memory_type = MemoryType::Host;
property.query_memory_type = MemoryType::Device;
property.need_dataset_when_search = true;
return property;
Expand Down Expand Up @@ -104,14 +104,24 @@ RaftCagra<T, IdxT>::RaftCagra(Metric metric, int dim, const BuildParam& param)
template <typename T, typename IdxT>
void RaftCagra<T, IdxT>::build(const T* dataset, size_t nrow, cudaStream_t)
{
auto dataset_view = raft::make_device_matrix_view<const T, IdxT>(dataset, IdxT(nrow), dimension_);
index_.emplace(raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view));
if (get_property().dataset_memory_type == MemoryType::Host) {
auto dataset_view = raft::make_host_matrix_view<const T, IdxT>(dataset, IdxT(nrow), dimension_);
index_.emplace(
raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view));
} else {
auto dataset_view =
raft::make_device_matrix_view<const T, IdxT>(dataset, IdxT(nrow), dimension_);
index_.emplace(
raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view));
}
return;
}

template <typename T, typename IdxT>
void RaftCagra<T, IdxT>::set_search_param(const AnnSearchParam& param)
{
auto search_param = dynamic_cast<const SearchParam&>(param);
search_params_ = search_param.p;
return;
}

Expand Down Expand Up @@ -146,11 +156,8 @@ void RaftCagra<T, IdxT>::search(
auto neighbors_view = raft::make_device_matrix_view<IdxT, IdxT>(neighbors_IdxT, batch_size, k);
auto distances_view = raft::make_device_matrix_view<float, IdxT>(distances, batch_size, k);

raft::neighbors::experimental::cagra::search_params search_params;
search_params.max_queries = batch_size;
search_params.itopk_size = search_params_.max_queries;
raft::neighbors::experimental::cagra::search(
handle_, search_params, *index_, queries_view, neighbors_view, distances_view);
handle_, search_params_, *index_, queries_view, neighbors_view, distances_view);

if (!std::is_same<IdxT, size_t>::value) {
raft::linalg::unaryOp(neighbors,
Expand Down
32 changes: 20 additions & 12 deletions cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ void build_knn_graph(raft::resources const& res,
resource::get_cuda_stream(res),
device_memory);

size_t next_report_offset = 0;
size_t d_report_offset = dataset.extent(0) / 100; // Report progress in 1% steps.

for (const auto& batch : vec_batches) {
auto queries_view = raft::make_device_matrix_view<const DataT, uint32_t>(
batch.data(), batch.size(), batch.row_width());
Expand Down Expand Up @@ -212,18 +215,23 @@ void build_knn_graph(raft::resources const& res,

size_t num_queries_done = batch.offset() + batch.size();
const auto end_clock = std::chrono::system_clock::now();
const auto time =
std::chrono::duration_cast<std::chrono::microseconds>(end_clock - start_clock).count() * 1e-6;
const auto throughput = num_queries_done / time;
RAFT_LOG_DEBUG(
"# Search %12lu / %12lu (%3.2f %%), %e queries/sec, %.2f minutes ETA, self included = "
"%3.2f %% \r",
num_queries_done,
dataset.extent(0),
num_queries_done / static_cast<double>(dataset.extent(0)) * 100,
throughput,
(num_queries - num_queries_done) / throughput / 60,
static_cast<double>(num_self_included) / num_queries_done * 100.);
if (batch.offset() > next_report_offset) {
next_report_offset += d_report_offset;
const auto time =
std::chrono::duration_cast<std::chrono::microseconds>(end_clock - start_clock).count() *
1e-6;
const auto throughput = num_queries_done / time;

RAFT_LOG_INFO(
"# Search %12lu / %12lu (%3.2f %%), %e queries/sec, %.2f minutes ETA, self included = "
"%3.2f %% \r",
num_queries_done,
dataset.extent(0),
num_queries_done / static_cast<double>(dataset.extent(0)) * 100,
throughput,
(num_queries - num_queries_done) / throughput / 60,
static_cast<double>(num_self_included) / num_queries_done * 100.);
}
first = false;
}
if (!first) RAFT_LOG_DEBUG("# Finished building kNN graph");
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/raft/spatial/knn/detail/ann_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ struct batch_load_iterator {
if (source_ == nullptr) { return; }
if (needs_copy_) {
if (size() > 0) {
RAFT_LOG_DEBUG("batch_load_iterator::copy(offset = %zu, size = %zu, row_width = %zu)",
RAFT_LOG_TRACE("batch_load_iterator::copy(offset = %zu, size = %zu, row_width = %zu)",
size_t(offset()),
size_t(size()),
size_t(row_width()));
Expand Down

0 comments on commit 61f0d94

Please sign in to comment.