diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index ecb5e366b5..7a9f0b99f8 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -410,7 +410,6 @@ void register_search(std::shared_ptr> dataset, auto* b = ::benchmark::RegisterBenchmark( index.name + suf, bench_search, index, i, dataset, metric_objective) ->Unit(benchmark::kMillisecond) - ->ThreadRange(threads[0], threads[1]) /** * The following are important for getting accuracy QPS measurements on both CPU * and GPU These make sure that @@ -420,6 +419,8 @@ void register_search(std::shared_ptr> dataset, */ ->MeasureProcessCPUTime() ->UseRealTime(); + + if (metric_objective == Objective::THROUGHPUT) { b->ThreadRange(threads[0], threads[1]); } } } } diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 23cae6352c..364da81f77 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -147,7 +147,6 @@ void HnswLib::build(const T* dataset, size_t nrow, cudaStream_t) char buf[20]; std::time_t now = std::time(nullptr); std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now)); - printf("%s building %zu / %zu\n", buf, i, items_per_thread); fflush(stdout); } @@ -163,13 +162,11 @@ void HnswLib::set_search_param(const AnnSearchParam& param_) auto param = dynamic_cast(param_); appr_alg_->ef_ = param.ef; metric_objective_ = param.metric_objective; + num_threads_ = param.num_threads; - bool use_pool = (metric_objective_ == Objective::LATENCY && param.num_threads > 1) && - (!thread_pool_ || num_threads_ != param.num_threads); - if (use_pool) { - num_threads_ = param.num_threads; - thread_pool_ = std::make_unique(num_threads_); - } + // Create a pool if multiple query threads have been set and the pool hasn't been created already + bool create_pool = (metric_objective_ == Objective::LATENCY && num_threads_ > 1 && !thread_pool_); + if (create_pool) { thread_pool_ = std::make_unique(num_threads_); } } template @@ -180,7 +177,7 @@ void HnswLib::search( // hnsw can only handle a single vector at a time. get_search_knn_results_(query + i * dim_, k, indices + i * k, distances + i * k); }; - if (metric_objective_ == Objective::LATENCY) { + if (metric_objective_ == Objective::LATENCY && num_threads_ > 1) { thread_pool_->submit(f, batch_size); } else { for (int i = 0; i < batch_size; i++) {