From c75cbc4e5a3af65693616c2543cc4e58dad9142c Mon Sep 17 00:00:00 2001 From: achirkin Date: Wed, 20 Nov 2024 15:29:02 +0100 Subject: [PATCH] Add tests --- cpp/test/CMakeLists.txt | 13 + cpp/test/neighbors/dynamic_batching.cuh | 286 ++++++++++++++++++ .../neighbors/dynamic_batching/test_cagra.cu | 82 +++++ .../dynamic_batching/test_ivf_flat.cu | 42 +++ .../neighbors/dynamic_batching/test_ivf_pq.cu | 39 +++ 5 files changed, 462 insertions(+) create mode 100644 cpp/test/neighbors/dynamic_batching.cuh create mode 100644 cpp/test/neighbors/dynamic_batching/test_cagra.cu create mode 100644 cpp/test/neighbors/dynamic_batching/test_ivf_flat.cu create mode 100644 cpp/test/neighbors/dynamic_batching/test_ivf_pq.cu diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 7754a5043..f45e28ae5 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -175,6 +175,19 @@ if(BUILD_TESTS) 100 ) + ConfigureTest( + NAME + NEIGHBORS_DYNAMIC_BATCHING_TEST + PATH + neighbors/dynamic_batching/test_cagra.cu + neighbors/dynamic_batching/test_ivf_flat.cu + neighbors/dynamic_batching/test_ivf_pq.cu + GPUS + 1 + PERCENT + 100 + ) + if(BUILD_CAGRA_HNSWLIB) ConfigureTest(NAME NEIGHBORS_HNSW_TEST PATH neighbors/hnsw.cu GPUS 1 PERCENT 100) target_link_libraries(NEIGHBORS_HNSW_TEST PRIVATE hnswlib::hnswlib) diff --git a/cpp/test/neighbors/dynamic_batching.cuh b/cpp/test/neighbors/dynamic_batching.cuh new file mode 100644 index 000000000..23c11eee4 --- /dev/null +++ b/cpp/test/neighbors/dynamic_batching.cuh @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "ann_utils.cuh" + +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include + +namespace cuvs::neighbors::dynamic_batching { + +struct dynamic_batching_spec { + int64_t n_queries = 1000; + int64_t n_rows = 100000; + int64_t dim = 128; + int64_t k = 10; + int64_t max_batch_size = 64; + size_t n_queues = 3; + bool conservative_dispatch = false; + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded; + int64_t max_concurrent_threads = 128; +}; + +inline ::std::ostream& operator<<(::std::ostream& os, const dynamic_batching_spec& p) +{ + os << "{n_queries=" << p.n_queries; + os << ", dataset shape=" << p.n_rows << "x" << p.dim; + os << ", metric=" << print_metric{p.metric}; + os << ", k=" << p.k; + os << ", max_batch_size=" << p.max_batch_size; + os << ", n_queues=" << p.n_queues; + os << ", conservative_dispatch=" << p.conservative_dispatch; + os << '}' << std::endl; + return os; +} + +template +using build_function = UpstreamT(const raft::resources&, + const typename UpstreamT::index_params_type&, + raft::device_matrix_view); + +template +using search_function = void(const raft::resources&, + const typename UpstreamT::search_params_type& params, + const UpstreamT& index, + raft::device_matrix_view, + raft::device_matrix_view, + raft::device_matrix_view, + const cuvs::neighbors::filtering::base_filter&); + +template UpstreamBuildF, + search_function UpstreamSearchF> +struct dynamic_batching_test : public ::testing::TestWithParam { + using distance_type = float; + using data_type = DataT; + using index_type = IdxT; + using upstream_type = UpstreamT; + + dynamic_batching_spec ps = ::testing::TestWithParam::GetParam(); + raft::resources res; + + // input data + std::optional> dataset = std::nullopt; + std::optional> queries = std::nullopt; + std::optional> neighbors_upsm = std::nullopt; + std::optional> neighbors_dynb = std::nullopt; + std::optional> distances_upsm = std::nullopt; + std::optional> distances_dynb = std::nullopt; + + // build parameters + cuvs::neighbors::index_params build_params_base{ps.metric}; + typename upstream_type::index_params_type build_params_upsm{build_params_base}; + dynamic_batching::index_params build_params_dynb{ + build_params_base, ps.k, ps.max_batch_size, ps.n_queues, ps.conservative_dispatch}; + + // search parameters + typename upstream_type::search_params_type search_params_upsm{}; + dynamic_batching::search_params search_params_dynb{}; + + // indexes + std::optional index_upsm = std::nullopt; + std::optional> index_dynb = std::nullopt; + + void build_all() + { + index_dynb.reset(); + index_upsm.reset(); + index_upsm = UpstreamBuildF(res, build_params_upsm, dataset->view()); + index_dynb.emplace(res, build_params_dynb, index_upsm.value(), search_params_upsm); + } + + void search_all() + { + // Search using upstream index - all queries at once + UpstreamSearchF(res, + search_params_upsm, + index_upsm.value(), + queries->view(), + neighbors_upsm->view(), + distances_upsm->view(), + filtering::none_sample_filter{}); + raft::resource::sync_stream(res); + + // Search with dynamic batching + // Streaming scenario: prepare concurrent resources + rmm::cuda_stream_pool worker_streams(ps.max_concurrent_threads); + std::vector> futures(ps.max_concurrent_threads); + std::vector resource_pool(0); + for (int64_t i = 0; i < ps.max_concurrent_threads; i++) { + resource_pool.push_back(res); // copies the resource + raft::resource::set_cuda_stream(resource_pool[i], worker_streams.get_stream(i)); + } + + for (int64_t i = 0; i < ps.n_queries + ps.max_concurrent_threads; i++) { + auto j = i % ps.max_concurrent_threads; + // wait for previous job in the same slot to finish + if (i >= ps.max_concurrent_threads) { futures[j].wait(); } + // submit a new job + if (i < ps.n_queries) { + futures[j] = + std::async(std::launch::async, + [&res = resource_pool[j], + ¶ms = search_params_dynb, + index = index_dynb.value(), + query_view = raft::make_device_matrix_view( + queries->data_handle() + i * ps.dim, 1, ps.dim), + neighbors_view = raft::make_device_matrix_view( + neighbors_dynb->data_handle() + i * ps.k, 1, ps.k), + distances_view = raft::make_device_matrix_view( + distances_dynb->data_handle() + i * ps.k, 1, ps.k)]() { + dynamic_batching::search( + res, params, index, query_view, neighbors_view, distances_view); + }); + } else { + // finalize all resources + raft::resource::sync_stream(resource_pool[j]); + } + } + + raft::resource::sync_stream(res); + } + + /* + Check the dynamic batching generated neighbors against the upstream index. They both may be + imperfect w.r.t. the ground truth, but they shouldn't differ too much. + */ + void check_neighbors() + { + auto stream = raft::resource::get_cuda_stream(res); + size_t queries_size = ps.n_queries * ps.k; + std::vector neighbors_upsm_host(queries_size); + std::vector neighbors_dynb_host(queries_size); + std::vector distances_upsm_host(queries_size); + std::vector distances_dynb_host(queries_size); + raft::copy(neighbors_upsm_host.data(), neighbors_upsm->data_handle(), queries_size, stream); + raft::copy(neighbors_dynb_host.data(), neighbors_dynb->data_handle(), queries_size, stream); + raft::copy(distances_upsm_host.data(), distances_upsm->data_handle(), queries_size, stream); + raft::copy(distances_dynb_host.data(), distances_dynb->data_handle(), queries_size, stream); + raft::resource::sync_stream(res); + ASSERT_TRUE(eval_neighbours(neighbors_upsm_host, + neighbors_dynb_host, + distances_upsm_host, + distances_dynb_host, + ps.n_queries, + ps.k, + 0.001, + 0.9)) + << ps; + } + + void SetUp() override + { + dataset.emplace(raft::make_device_matrix(res, ps.n_rows, ps.dim)); + queries.emplace(raft::make_device_matrix(res, ps.n_queries, ps.dim)); + neighbors_upsm.emplace(raft::make_device_matrix(res, ps.n_queries, ps.k)); + neighbors_dynb.emplace(raft::make_device_matrix(res, ps.n_queries, ps.k)); + distances_upsm.emplace( + raft::make_device_matrix(res, ps.n_queries, ps.k)); + distances_dynb.emplace( + raft::make_device_matrix(res, ps.n_queries, ps.k)); + + raft::random::RngState rng(666ULL); + if constexpr (std::is_same_v || std::is_same_v) { + raft::random::uniform( + res, rng, dataset->data_handle(), dataset->size(), data_type(0.1), data_type(2.0)); + raft::random::uniform( + res, rng, queries->data_handle(), queries->size(), data_type(0.1), data_type(2.0)); + } else { + raft::random::uniformInt( + res, rng, dataset->data_handle(), dataset->size(), data_type(1), data_type(20)); + raft::random::uniformInt( + res, rng, queries->data_handle(), queries->size(), data_type(1), data_type(20)); + } + raft::resource::sync_stream(res); + } + + void TearDown() override + { + index_dynb.reset(); + index_upsm.reset(); + dataset.reset(); + queries.reset(); + neighbors_upsm.reset(); + neighbors_dynb.reset(); + distances_upsm.reset(); + distances_dynb.reset(); + raft::resource::sync_stream(res); + } +}; + +inline std::vector generate_inputs() +{ + std::vector inputs{dynamic_batching_spec{}}; + + for (auto alt_n_queries : {10, 50, 100}) { + dynamic_batching_spec input{}; + input.n_queries = alt_n_queries; + inputs.push_back(input); + } + + for (auto alt_k : {100, 200}) { + dynamic_batching_spec input{}; + input.k = alt_k; + inputs.push_back(input); + } + + for (auto alt_max_batch_size : {4, 16, 128, 256, 512, 1024}) { + dynamic_batching_spec input{}; + input.max_batch_size = alt_max_batch_size; + inputs.push_back(input); + } + + for (auto alt_n_queues : {1, 2, 16, 32}) { + dynamic_batching_spec input{}; + input.n_queues = alt_n_queues; + inputs.push_back(input); + } + + for (auto alt_max_concurrent_threads : {1, 2, 16, 32}) { + dynamic_batching_spec input{}; + input.max_concurrent_threads = alt_max_concurrent_threads; + inputs.push_back(input); + } + + { + auto n = inputs.size(); + for (size_t i = 0; i < n; i++) { + auto input = inputs[i]; + input.conservative_dispatch = !input.conservative_dispatch; + inputs.push_back(input); + } + } + + return inputs; +} + +const std::vector inputs = generate_inputs(); + +} // namespace cuvs::neighbors::dynamic_batching diff --git a/cpp/test/neighbors/dynamic_batching/test_cagra.cu b/cpp/test/neighbors/dynamic_batching/test_cagra.cu new file mode 100644 index 000000000..03d8b3ced --- /dev/null +++ b/cpp/test/neighbors/dynamic_batching/test_cagra.cu @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../dynamic_batching.cuh" + +namespace cuvs::neighbors::dynamic_batching { + +using cagra_F32 = dynamic_batching_test, + cagra::build, + cagra::search>; + +using cagra_U8 = dynamic_batching_test, + cagra::build, + cagra::search>; + +template +static void set_default_cagra_params(fixture& that) +{ + that.build_params_upsm.intermediate_graph_degree = 128; + that.build_params_upsm.graph_degree = 64; + that.search_params_upsm.itopk_size = + std::clamp(raft::bound_by_power_of_two(that.ps.k) * 16, 128, 512); +} + +TEST_P(cagra_F32, single_cta) +{ + set_default_cagra_params(*this); + search_params_upsm.algo = cagra::search_algo::SINGLE_CTA; + build_all(); + search_all(); + check_neighbors(); +} + +TEST_P(cagra_F32, multi_cta) +{ + set_default_cagra_params(*this); + search_params_upsm.algo = cagra::search_algo::MULTI_CTA; + build_all(); + search_all(); + check_neighbors(); +} + +TEST_P(cagra_F32, multi_kernel) +{ + set_default_cagra_params(*this); + search_params_upsm.algo = cagra::search_algo::MULTI_KERNEL; + build_all(); + search_all(); + check_neighbors(); +} + +TEST_P(cagra_U8, defaults) +{ + set_default_cagra_params(*this); + build_all(); + search_all(); + check_neighbors(); +} + +INSTANTIATE_TEST_CASE_P(dynamic_batching, cagra_F32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(dynamic_batching, cagra_U8, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::dynamic_batching diff --git a/cpp/test/neighbors/dynamic_batching/test_ivf_flat.cu b/cpp/test/neighbors/dynamic_batching/test_ivf_flat.cu new file mode 100644 index 000000000..0c9c5a701 --- /dev/null +++ b/cpp/test/neighbors/dynamic_batching/test_ivf_flat.cu @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../dynamic_batching.cuh" + +namespace cuvs::neighbors::dynamic_batching { + +using ivf_flat_i8 = dynamic_batching_test, + ivf_flat::build, + ivf_flat::search>; + +TEST_P(ivf_flat_i8, defaults) +{ + build_params_upsm.n_lists = std::round(std::sqrt(ps.n_rows)); + search_params_upsm.n_probes = + std::max(std::min(build_params_upsm.n_lists, 10), + raft::div_rounding_up_safe(build_params_upsm.n_lists, 50)); + build_all(); + search_all(); + check_neighbors(); +} + +INSTANTIATE_TEST_CASE_P(dynamic_batching, ivf_flat_i8, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::dynamic_batching diff --git a/cpp/test/neighbors/dynamic_batching/test_ivf_pq.cu b/cpp/test/neighbors/dynamic_batching/test_ivf_pq.cu new file mode 100644 index 000000000..8f4324957 --- /dev/null +++ b/cpp/test/neighbors/dynamic_batching/test_ivf_pq.cu @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "../dynamic_batching.cuh" + +namespace cuvs::neighbors::dynamic_batching { + +using ivf_pq_f16 = + dynamic_batching_test, ivf_pq::build, ivf_pq::search>; + +TEST_P(ivf_pq_f16, defaults) +{ + build_params_upsm.n_lists = std::round(std::sqrt(ps.n_rows)); + search_params_upsm.n_probes = + std::max(std::min(build_params_upsm.n_lists, 10), + raft::div_rounding_up_safe(build_params_upsm.n_lists, 50)); + build_all(); + search_all(); + check_neighbors(); +} + +INSTANTIATE_TEST_CASE_P(dynamic_batching, ivf_pq_f16, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::dynamic_batching