From c59c9d194a7d255d1be3dd7083a3098b4a2a1672 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 11 Sep 2023 14:56:28 -0400 Subject: [PATCH] Various fixes to reproducible benchmarks (#1800) Authors: - Corey J. Nolet (https://github.com/cjnolet) - Artem M. Chirkin (https://github.com/achirkin) - Divye Gala (https://github.com/divyegala) - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/1800 --- cpp/bench/ann/src/ggnn/ggnn_benchmark.cu | 3 +-- cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh | 22 ++++----------------- cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h | 6 ++++-- docs/source/ann_benchmarks_param_tuning.md | 2 +- 4 files changed, 10 insertions(+), 23 deletions(-) diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu index 99481c2921..3b2e97062f 100644 --- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu +++ b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu @@ -33,8 +33,7 @@ template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::Ggnn::BuildParam& param) { - param.dataset_size = conf.at("dataset_size"); - param.k = conf.at("k"); + param.k = conf.at("k"); if (conf.contains("k_build")) { param.k_build = conf.at("k_build"); } if (conf.contains("segment_size")) { param.segment_size = conf.at("segment_size"); } diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh index 74c7cddc3c..664ec511dd 100644 --- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh +++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh @@ -38,8 +38,6 @@ class Ggnn : public ANN { int num_layers{4}; // L float tau{0.5}; int refine_iterations{2}; - - size_t dataset_size; int k; // GGNN requires to know k during building }; @@ -182,12 +180,6 @@ GgnnImpl::GgnnImpl(Metric metric, } if (dim != D) { throw std::runtime_error("mis-matched dim"); } - - int device; - RAFT_CUDA_TRY(cudaGetDevice(&device)); - - ggnn_ = std::make_unique( - device, build_param_.dataset_size, build_param_.num_layers, true, build_param_.tau); } template @@ -195,11 +187,10 @@ void GgnnImpl::build(const T* dataset, size_t nrow, cudaStream_t stream) { - if (nrow != build_param_.dataset_size) { - throw std::runtime_error( - "build_param_.dataset_size = " + std::to_string(build_param_.dataset_size) + - " , but nrow = " + std::to_string(nrow)); - } + int device; + RAFT_CUDA_TRY(cudaGetDevice(&device)); + ggnn_ = std::make_unique( + device, nrow, build_param_.num_layers, true, build_param_.tau); ggnn_->set_base_data(dataset); ggnn_->set_stream(stream); @@ -212,11 +203,6 @@ void GgnnImpl::build(const T* dataset, template void GgnnImpl::set_search_dataset(const T* dataset, size_t nrow) { - if (nrow != build_param_.dataset_size) { - throw std::runtime_error( - "build_param_.dataset_size = " + std::to_string(build_param_.dataset_size) + - " , but nrow = " + std::to_string(nrow)); - } ggnn_->set_base_data(dataset); } diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 5cd33ef94d..4d7b993aa1 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -31,6 +31,8 @@ #include #include +#include + #include "../common/ann_types.hpp" #include @@ -164,13 +166,13 @@ class HnswLib : public ANN { struct BuildParam { int M; int ef_construction; - int num_threads{1}; + int num_threads = omp_get_num_procs(); }; using typename ANN::AnnSearchParam; struct SearchParam : public AnnSearchParam { int ef; - int num_threads{1}; + int num_threads = omp_get_num_procs(); }; HnswLib(Metric metric, int dim, const BuildParam& param); diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 712d22f0aa..dd6090c5e2 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -99,4 +99,4 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `ef` | `search_param` | Y | Positive Integer >0 | | Size of the dynamic list for the nearest neighbors used for search. Higher value leads to more accurate but slower search. Cannot be lower than `k`. | | `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. | -Please refer to [HNSW algorithm parameters guide](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) from `hnswlib` to learn more about these arguments. +Please refer to [HNSW algorithm parameters guide] from `hnswlib` to learn more about these arguments. \ No newline at end of file