From 28b789404bedfa8dd82675fc4221f6db927c0422 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 11 Sep 2023 21:03:32 -0400 Subject: [PATCH 01/12] Printing actual exception in `require base set` (#1816) Authors: - Corey J. Nolet (https://github.com/cjnolet) - AJ Schmidt (https://github.com/ajschmidt8) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/1816 --- cpp/bench/ann/src/common/benchmark.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 4e91ee0690..4ec977700d 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -211,9 +211,10 @@ void bench_search(::benchmark::State& state, try { algo->set_search_dataset(dataset->base_set(algo_property.dataset_memory_type), dataset->base_set_size()); - } catch (const std::exception&) { + } catch (const std::exception& ex) { state.SkipWithError("The algorithm '" + index.name + - "' requires the base set, but it's not available."); + "' requires the base set, but it's not available. " + + "Exception: " + std::string(ex.what())); return; } } From b9cf9171d2c9db480a845853726387390f3e3d75 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Mon, 18 Sep 2023 17:36:50 +0200 Subject: [PATCH 02/12] matrix::select_k: extra tests and benchmarks (#1821) Add a few extra test and benchmark cases; in particular: 1. Allow specifying non-trivial input indices 2. Allow filling the input data with infinities to see how algorithms perform in edge cases These tests are borrowed from the controversial workaround https://github.com/rapidsai/raft/pull/1742 Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/raft/pull/1821 --- cpp/bench/prims/matrix/select_k.cu | 64 ++++++++- .../raft_internal/matrix/select_k.cuh | 7 +- cpp/test/matrix/select_k.cu | 34 +++++ cpp/test/matrix/select_k.cuh | 123 +++++++++++++++--- 4 files changed, 204 insertions(+), 24 deletions(-) diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu index 1bff66cac4..992fda8a38 100644 --- a/cpp/bench/prims/matrix/select_k.cu +++ b/cpp/bench/prims/matrix/select_k.cu @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,19 @@ namespace raft::matrix { using namespace raft::bench; // NOLINT +template +struct replace_with_mask { + KeyT replacement; + int64_t line_length; + int64_t spared_inputs; + constexpr auto inline operator()(int64_t offset, KeyT x, uint8_t mask) -> KeyT + { + auto i = offset % line_length; + // don't replace all the inputs, spare a few elements at the beginning of the input + return (mask && i >= spared_inputs) ? replacement : x; + } +}; + template struct selection : public fixture { explicit selection(const select::params& p) @@ -67,6 +81,21 @@ struct selection : public fixture { } } raft::random::uniform(handle, state, in_dists_.data(), in_dists_.size(), min_value, max_value); + if (p.frac_infinities > 0.0) { + rmm::device_uvector mask_buf(p.batch_size * p.len, stream); + auto mask = make_device_vector_view(mask_buf.data(), mask_buf.size()); + raft::random::bernoulli(handle, state, mask, p.frac_infinities); + KeyT bound = p.select_min ? raft::upper_bound() : raft::lower_bound(); + auto mask_in = + make_device_vector_view(mask_buf.data(), mask_buf.size()); + auto dists_in = make_device_vector_view(in_dists_.data(), in_dists_.size()); + auto dists_out = make_device_vector_view(in_dists_.data(), in_dists_.size()); + raft::linalg::map_offset(handle, + dists_out, + replace_with_mask{bound, int64_t(p.len), int64_t(p.k / 2)}, + dists_in, + mask_in); + } } void run_benchmark(::benchmark::State& state) override // NOLINT @@ -75,8 +104,12 @@ struct selection : public fixture { std::ostringstream label_stream; label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k; if (params_.use_same_leading_bits) { label_stream << "#same-leading-bits"; } + if (params_.frac_infinities > 0) { label_stream << "#infs-" << params_.frac_infinities; } state.SetLabel(label_stream.str()); - loop_on_state(state, [this]() { + common::nvtx::range case_scope("%s - %s", state.name().c_str(), label_stream.str().c_str()); + int iter = 0; + loop_on_state(state, [&iter, this]() { + common::nvtx::range lap_scope("lap-", iter++); select::select_k_impl(handle, Algo, in_dists_.data(), @@ -149,6 +182,35 @@ const std::vector kInputs{ {10, 1000000, 64, true, false, true}, {10, 1000000, 128, true, false, true}, {10, 1000000, 256, true, false, true}, + + {10, 1000000, 1, true, false, false, true, 0.1}, + {10, 1000000, 16, true, false, false, true, 0.1}, + {10, 1000000, 64, true, false, false, true, 0.1}, + {10, 1000000, 128, true, false, false, true, 0.1}, + {10, 1000000, 256, true, false, false, true, 0.1}, + + {10, 1000000, 1, true, false, false, true, 0.9}, + {10, 1000000, 16, true, false, false, true, 0.9}, + {10, 1000000, 64, true, false, false, true, 0.9}, + {10, 1000000, 128, true, false, false, true, 0.9}, + {10, 1000000, 256, true, false, false, true, 0.9}, + {1000, 10000, 1, true, false, false, true, 0.9}, + {1000, 10000, 16, true, false, false, true, 0.9}, + {1000, 10000, 64, true, false, false, true, 0.9}, + {1000, 10000, 128, true, false, false, true, 0.9}, + {1000, 10000, 256, true, false, false, true, 0.9}, + + {10, 1000000, 1, true, false, false, true, 1.0}, + {10, 1000000, 16, true, false, false, true, 1.0}, + {10, 1000000, 64, true, false, false, true, 1.0}, + {10, 1000000, 128, true, false, false, true, 1.0}, + {10, 1000000, 256, true, false, false, true, 1.0}, + {1000, 10000, 1, true, false, false, true, 1.0}, + {1000, 10000, 16, true, false, false, true, 1.0}, + {1000, 10000, 64, true, false, false, true, 1.0}, + {1000, 10000, 128, true, false, false, true, 1.0}, + {1000, 10000, 256, true, false, false, true, 1.0}, + {1000, 10000, 256, true, false, false, true, 0.999}, }; #define SELECTION_REGISTER(KeyT, IdxT, A) \ diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh index b72e67580a..1d15c5fc03 100644 --- a/cpp/internal/raft_internal/matrix/select_k.cuh +++ b/cpp/internal/raft_internal/matrix/select_k.cuh @@ -33,6 +33,7 @@ struct params { bool use_index_input = true; bool use_same_leading_bits = false; bool use_memory_pool = true; + double frac_infinities = 0.0; }; inline auto operator<<(std::ostream& os, const params& ss) -> std::ostream& @@ -41,8 +42,10 @@ inline auto operator<<(std::ostream& os, const params& ss) -> std::ostream& os << ", len: " << ss.len; os << ", k: " << ss.k; os << (ss.select_min ? ", asc" : ", dsc"); - os << (ss.use_index_input ? "" : ", no-input-index"); - os << (ss.use_same_leading_bits ? ", same-leading-bits}" : "}"); + if (!ss.use_index_input) { os << ", no-input-index"; } + if (ss.use_same_leading_bits) { os << ", same-leading-bits"; } + if (ss.frac_infinities > 0) { os << ", infs: " << ss.frac_infinities; } + os << "}"; return os; } diff --git a/cpp/test/matrix/select_k.cu b/cpp/test/matrix/select_k.cu index 63f020b420..ce4e3e867e 100644 --- a/cpp/test/matrix/select_k.cu +++ b/cpp/test/matrix/select_k.cu @@ -70,6 +70,28 @@ auto inputs_random_largek = testing::Values(select::params{100, 100000, 1000, tr select::params{100, 100000, 2048, false}, select::params{100, 100000, 1237, true}); +auto inputs_random_many_infs = + testing::Values(select::params{10, 100000, 1, true, false, false, true, 0.9}, + select::params{10, 100000, 16, true, false, false, true, 0.9}, + select::params{10, 100000, 64, true, false, false, true, 0.9}, + select::params{10, 100000, 128, true, false, false, true, 0.9}, + select::params{10, 100000, 256, true, false, false, true, 0.9}, + select::params{1000, 10000, 1, true, false, false, true, 0.9}, + select::params{1000, 10000, 16, true, false, false, true, 0.9}, + select::params{1000, 10000, 64, true, false, false, true, 0.9}, + select::params{1000, 10000, 128, true, false, false, true, 0.9}, + select::params{1000, 10000, 256, true, false, false, true, 0.9}, + select::params{10, 100000, 1, true, false, false, true, 0.999}, + select::params{10, 100000, 16, true, false, false, true, 0.999}, + select::params{10, 100000, 64, true, false, false, true, 0.999}, + select::params{10, 100000, 128, true, false, false, true, 0.999}, + select::params{10, 100000, 256, true, false, false, true, 0.999}, + select::params{1000, 10000, 1, true, false, false, true, 0.999}, + select::params{1000, 10000, 16, true, false, false, true, 0.999}, + select::params{1000, 10000, 64, true, false, false, true, 0.999}, + select::params{1000, 10000, 128, true, false, false, true, 0.999}, + select::params{1000, 10000, 256, true, false, false, true, 0.999}); + using ReferencedRandomFloatInt = SelectK::params_random>; TEST_P(ReferencedRandomFloatInt, Run) { run(); } // NOLINT @@ -111,4 +133,16 @@ INSTANTIATE_TEST_CASE_P( // NOLINT select::Algo::kRadix8bits, select::Algo::kRadix11bits, select::Algo::kRadix11bitsExtraPass))); + +using ReferencedRandomFloatIntkWarpsortAsGT = + SelectK::params_random>; +TEST_P(ReferencedRandomFloatIntkWarpsortAsGT, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P( // NOLINT + SelectK, + ReferencedRandomFloatIntkWarpsortAsGT, + testing::Combine(inputs_random_many_infs, + testing::Values(select::Algo::kRadix8bits, + select::Algo::kRadix11bits, + select::Algo::kRadix11bitsExtraPass))); + } // namespace raft::matrix diff --git a/cpp/test/matrix/select_k.cuh b/cpp/test/matrix/select_k.cuh index e0e0cad225..e94a6d029e 100644 --- a/cpp/test/matrix/select_k.cuh +++ b/cpp/test/matrix/select_k.cuh @@ -49,14 +49,16 @@ auto gen_simple_ids(uint32_t batch_size, uint32_t len) -> std::vector template struct io_simple { public: - bool not_supported = false; + bool not_supported = false; + std::optional algo = std::nullopt; io_simple(const select::params& spec, const std::vector& in_dists, + const std::optional>& in_ids, const std::vector& out_dists, const std::vector& out_ids) : in_dists_(in_dists), - in_ids_(gen_simple_ids(spec.batch_size, spec.len)), + in_ids_(in_ids.value_or(gen_simple_ids(spec.batch_size, spec.len))), out_dists_(out_dists), out_ids_(out_ids) { @@ -78,12 +80,14 @@ template struct io_computed { public: bool not_supported = false; + select::Algo algo; io_computed(const select::params& spec, const select::Algo& algo, const std::vector& in_dists, const std::optional>& in_ids = std::nullopt) - : in_dists_(in_dists), + : algo(algo), + in_dists_(in_dists), in_ids_(in_ids.value_or(gen_simple_ids(spec.batch_size, spec.len))), out_dists_(spec.batch_size * spec.k), out_ids_(spec.batch_size * spec.k) @@ -223,32 +227,62 @@ struct SelectK // NOLINT if (ref.not_supported || res.not_supported) { GTEST_SKIP(); } ASSERT_TRUE(hostVecMatch(ref.get_out_dists(), res.get_out_dists(), Compare())); - // If the dists (keys) are the same, different corresponding ids may end up in the selection due - // to non-deterministic nature of some implementations. - auto& in_ids = ref.get_in_ids(); - auto& in_dists = ref.get_in_dists(); - auto compare_ids = [&in_ids, &in_dists](const IdxT& i, const IdxT& j) { + // If the dists (keys) are the same, different corresponding ids may end up in the selection + // due to non-deterministic nature of some implementations. + auto compare_ids = [this](const IdxT& i, const IdxT& j) { if (i == j) return true; + auto& in_ids = ref.get_in_ids(); + auto& in_dists = ref.get_in_dists(); auto ix_i = static_cast(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin()); auto ix_j = static_cast(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin()); - if (static_cast(ix_i) >= in_ids.size() || static_cast(ix_j) >= in_ids.size()) - return false; + auto forgive_i = forgive_algo(ref.algo, i); + auto forgive_j = forgive_algo(res.algo, j); + // Some algorithms return invalid indices in special cases. + // TODO: https://github.com/rapidsai/raft/issues/1822 + if (static_cast(ix_i) >= in_ids.size()) return forgive_i; + if (static_cast(ix_j) >= in_ids.size()) return forgive_j; auto dist_i = in_dists[ix_i]; auto dist_j = in_dists[ix_j]; if (dist_i == dist_j) return true; + const auto bound = spec.select_min ? raft::upper_bound() : raft::lower_bound(); + if (forgive_i && dist_i == bound) return true; + if (forgive_j && dist_j == bound) return true; + // Otherwise really fail std::cout << "ERROR: ref[" << ix_i << "] = " << dist_i << " != " << "res[" << ix_j << "] = " << dist_j << std::endl; return false; }; ASSERT_TRUE(hostVecMatch(ref.get_out_ids(), res.get_out_ids(), compare_ids)); } + + auto forgive_algo(const std::optional& algo, IdxT ix) const -> bool + { + if (!algo.has_value()) { return false; } + switch (algo.value()) { + // not sure which algo this is. + case select::Algo::kPublicApi: return true; + // warp-sort-based algos currently return zero index for inf distances. + case select::Algo::kWarpAuto: + case select::Algo::kWarpImmediate: + case select::Algo::kWarpFiltered: + case select::Algo::kWarpDistributed: + case select::Algo::kWarpDistributedShm: return ix == 0; + // FAISS version returns a special invalid value: + case select::Algo::kFaissBlockSelect: return ix == std::numeric_limits::max(); + // Do not forgive by default + default: return false; + } + } }; template struct params_simple { - using io_t = io_simple; - using input_t = - std::tuple, std::vector, std::vector>; + using io_t = io_simple; + using input_t = std::tuple, + std::optional>, + std::vector, + std::vector>; using params_t = std::tuple; static auto read(params_t ps) -> Params @@ -259,15 +293,17 @@ struct params_simple { std::get<0>(ins), algo, io_simple( - std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins))); + std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins), std::get<4>(ins))); } }; +auto inf_f = std::numeric_limits::max(); auto inputs_simple_f = testing::Values( params_simple::input_t( {5, 5, 5, true, true}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), @@ -275,12 +311,14 @@ auto inputs_simple_f = testing::Values( {5, 5, 3, true, true}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), params_simple::input_t( {5, 5, 5, true, false}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), @@ -288,20 +326,31 @@ auto inputs_simple_f = testing::Values( {5, 5, 3, true, false}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), params_simple::input_t( {5, 7, 3, true, true}, {5.0, 4.0, 3.0, 2.0, 1.3, 7.5, 19.0, 9.0, 2.0, 3.0, 3.0, 5.0, 6.0, 4.0, 2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0, 5.0, 7.0, 2.5, 4.0, 7.0, 8.0, 8.0, 1.0, 3.0, 2.0, 5.0, 4.0, 1.1, 1.2}, + std::nullopt, {1.3, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.5, 4.0, 5.0, 1.0, 1.1, 1.2}, {4, 3, 2, 1, 2, 3, 3, 5, 6, 2, 3, 0, 0, 5, 6}), - params_simple::input_t( - {1, 7, 3, true, true}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {3, 5, 6}), - params_simple::input_t( - {1, 7, 3, false, false}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {5.0, 4.0, 3.0}, {2, 4, 1}), - params_simple::input_t( - {1, 7, 3, false, true}, {2.0, 3.0, 5.0, 9.0, 4.0, 9.0, 9.0}, {9.0, 9.0, 9.0}, {3, 5, 6}), + params_simple::input_t({1, 7, 3, true, true}, + {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, + std::nullopt, + {1.0, 1.0, 1.0}, + {3, 5, 6}), + params_simple::input_t({1, 7, 3, false, false}, + {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, + std::nullopt, + {5.0, 4.0, 3.0}, + {2, 4, 1}), + params_simple::input_t({1, 7, 3, false, true}, + {2.0, 3.0, 5.0, 9.0, 4.0, 9.0, 9.0}, + std::nullopt, + {9.0, 9.0, 9.0}, + {3, 5, 6}), params_simple::input_t( {1, 130, 5, false, true}, {19, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, @@ -309,6 +358,7 @@ auto inputs_simple_f = testing::Values( 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, + std::nullopt, {20, 19, 18, 17, 16}, {129, 0, 117, 116, 115}), params_simple::input_t( @@ -318,8 +368,20 @@ auto inputs_simple_f = testing::Values( 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, + std::nullopt, {20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6}, - {129, 0, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105})); + {129, 0, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105}), + params_simple::input_t( + select::params{1, 32, 31, true, true}, + {0, 1, 2, 3, inf_f, inf_f, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + std::optional{std::vector{31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, + 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, + 9, 8, 7, 6, 75, 74, 3, 2, 1, 0}}, + {0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, inf_f}, + {31, 30, 29, 28, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, + 13, 12, 11, 10, 9, 8, 7, 6, 75, 74, 3, 2, 1, 0, 27})); using SimpleFloatInt = SelectK; TEST_P(SimpleFloatInt, Run) { run(); } // NOLINT @@ -335,6 +397,12 @@ INSTANTIATE_TEST_CASE_P( // NOLINT select::Algo::kWarpFiltered, select::Algo::kWarpDistributed))); +template +struct replace_with_mask { + KeyT replacement; + constexpr auto inline operator()(KeyT x, uint8_t mask) -> KeyT { return mask ? replacement : x; } +}; + template struct with_ref { template @@ -354,6 +422,19 @@ struct with_ref { rmm::device_uvector dists_d(spec.len * spec.batch_size, s); raft::random::RngState r(42); normal(handle, r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0)); + + if (spec.frac_infinities > 0.0) { + rmm::device_uvector mask_buf(dists_d.size(), s); + auto mask = make_device_vector_view(mask_buf.data(), mask_buf.size()); + raft::random::bernoulli(handle, r, mask, spec.frac_infinities); + KeyT bound = spec.select_min ? raft::upper_bound() : raft::lower_bound(); + auto mask_in = + make_device_vector_view(mask_buf.data(), mask_buf.size()); + auto dists_in = make_device_vector_view(dists_d.data(), dists_d.size()); + auto dists_out = make_device_vector_view(dists_d.data(), dists_d.size()); + raft::linalg::map(handle, dists_out, replace_with_mask{bound}, dists_in, mask_in); + } + update_host(dists.data(), dists_d.data(), dists_d.size(), s); s.synchronize(); } From 6bbcf1fef7b60de2f85f055fb1acba7d27515b67 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Sep 2023 07:50:33 -0500 Subject: [PATCH 03/12] Update to clang 16.0.6. (#1829) This PR updates raft to use clang 16.0.6. The previous version 16.0.1 has some minor formatting issues affecting several RAPIDS repos. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ray Douglass (https://github.com/raydouglass) - Dante Gama Dessavre (https://github.com/dantegd) URL: https://github.com/rapidsai/raft/pull/1829 --- .pre-commit-config.yaml | 2 +- .../all_cuda-118_arch-x86_64.yaml | 4 +- .../all_cuda-120_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 4 +- cpp/bench/prims/matrix/select_k.cu | 32 +++++++------- .../raft/cluster/detail/kmeans_balanced.cuh | 2 +- .../core/detail/mdspan_numpy_serializer.hpp | 2 +- cpp/include/raft/core/detail/nvtx.hpp | 2 +- cpp/include/raft/core/kvp.hpp | 4 +- .../raft/core/resource/resource_types.hpp | 2 +- .../custom_epilogue_with_broadcast.h | 4 +- .../predicated_tile_iterator_normvec_smem.h | 4 +- .../raft/distance/detail/fused_l2_nn.cuh | 10 ++--- .../distance/detail/masked_distance_base.cuh | 2 +- .../detail/pairwise_distance_base.cuh | 2 +- .../detail/pairwise_matrix/dispatch-ext.cuh | 2 +- .../detail/predicated_tile_iterator_normvec.h | 4 +- cpp/include/raft/distance/distance-ext.cuh | 4 +- cpp/include/raft/linalg/add.cuh | 4 +- cpp/include/raft/linalg/binary_op.cuh | 4 +- .../raft/linalg/coalesced_reduction.cuh | 4 +- cpp/include/raft/linalg/contractions.cuh | 2 +- .../raft/linalg/detail/cublas_wrappers.hpp | 4 +- cpp/include/raft/linalg/divide.cuh | 4 +- cpp/include/raft/linalg/eig.cuh | 4 +- cpp/include/raft/linalg/gemv.cuh | 4 +- cpp/include/raft/linalg/lstsq.cuh | 4 +- cpp/include/raft/linalg/matrix_vector_op.cuh | 4 +- .../raft/linalg/mean_squared_error.cuh | 4 +- cpp/include/raft/linalg/multiply.cuh | 4 +- cpp/include/raft/linalg/power.cuh | 4 +- cpp/include/raft/linalg/reduce.cuh | 4 +- .../raft/linalg/reduce_cols_by_key.cuh | 4 +- .../raft/linalg/reduce_rows_by_key.cuh | 4 +- cpp/include/raft/linalg/rsvd.cuh | 4 +- cpp/include/raft/linalg/sqrt.cuh | 4 +- cpp/include/raft/linalg/strided_reduction.cuh | 4 +- cpp/include/raft/linalg/subtract.cuh | 4 +- cpp/include/raft/linalg/svd.cuh | 4 +- cpp/include/raft/linalg/ternary_op.cuh | 4 +- cpp/include/raft/linalg/transpose.cuh | 4 +- cpp/include/raft/linalg/unary_op.cuh | 4 +- cpp/include/raft/matrix/col_wise_sort.cuh | 2 +- .../raft/matrix/detail/select_k-ext.cuh | 4 +- .../raft/matrix/detail/select_warpsort.cuh | 2 +- cpp/include/raft/neighbors/ann_types.hpp | 2 +- .../detail/cagra/compute_distance.hpp | 2 +- .../detail/cagra/search_multi_cta.cuh | 6 +-- .../cagra/search_multi_cta_kernel-inl.cuh | 14 +++---- .../detail/cagra/search_multi_kernel.cuh | 38 ++++++++--------- .../neighbors/detail/cagra/search_plan.cuh | 2 +- .../detail/cagra/search_single_cta.cuh | 6 +-- .../cagra/search_single_cta_kernel-ext.cuh | 6 +-- .../cagra/search_single_cta_kernel-inl.cuh | 18 ++++---- .../detail/cagra/topk_for_cagra/topk_core.cuh | 14 +++---- .../neighbors/detail/ivf_flat_search-inl.cuh | 2 +- .../raft/neighbors/detail/ivf_pq_build.cuh | 2 +- .../detail/ivf_pq_compute_similarity-inl.cuh | 2 +- .../raft/neighbors/detail/refine_host-ext.hpp | 2 +- .../neighbors/detail/selection_faiss-ext.cuh | 2 +- cpp/include/raft/neighbors/ivf_flat-ext.cuh | 6 +-- cpp/include/raft/neighbors/ivf_pq-ext.cuh | 2 +- cpp/include/raft/neighbors/refine-ext.cuh | 2 +- cpp/include/raft/sparse/distance/distance.cuh | 6 +-- .../raft/sparse/linalg/detail/norm.cuh | 20 ++++----- .../raft/sparse/linalg/detail/spectral.cuh | 2 +- cpp/include/raft/sparse/linalg/norm.cuh | 8 ++-- .../raft/sparse/neighbors/detail/knn.cuh | 2 +- cpp/include/raft/sparse/solver/mst_solver.cuh | 8 ++-- .../knn/detail/ball_cover/registers-ext.cuh | 2 +- .../raft/stats/adjusted_rand_index.cuh | 4 +- cpp/include/raft/stats/completeness_score.cuh | 4 +- cpp/include/raft/stats/cov.cuh | 4 +- cpp/include/raft/stats/entropy.cuh | 4 +- cpp/include/raft/stats/histogram.cuh | 4 +- cpp/include/raft/stats/homogeneity_score.cuh | 4 +- cpp/include/raft/stats/kl_divergence.cuh | 4 +- cpp/include/raft/stats/mean.cuh | 4 +- cpp/include/raft/stats/mean_center.cuh | 4 +- cpp/include/raft/stats/meanvar.cuh | 2 +- cpp/include/raft/stats/minmax.cuh | 4 +- cpp/include/raft/stats/mutual_info_score.cuh | 4 +- cpp/include/raft/stats/rand_index.cuh | 4 +- cpp/include/raft/stats/stddev.cuh | 4 +- cpp/include/raft/stats/sum.cuh | 4 +- cpp/include/raft/stats/v_measure.cuh | 4 +- cpp/include/raft/stats/weighted_mean.cuh | 4 +- cpp/include/raft/util/cache.cuh | 6 +-- cpp/include/raft/util/cache_util.cuh | 6 +-- cpp/include/raft/util/device_loads_stores.cuh | 2 +- cpp/scripts/run-clang-tidy.py | 4 +- cpp/test/distance/distance_base.cuh | 8 ++-- cpp/test/linalg/rsvd.cu | 42 +++++++++---------- cpp/test/neighbors/ann_cagra.cuh | 2 +- cpp/test/util/bitonic_sort.cu | 22 +++++----- cpp/test/util/reduction.cu | 10 ++--- dependencies.yaml | 4 +- 97 files changed, 270 insertions(+), 270 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a70632497..66862ada5e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: additional_dependencies: [toml] args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.1 + rev: v16.0.6 hooks: - id: clang-format types_or: [c, c++, cuda] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 7e921decd5..65b4232d83 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-python>=11.7.1,<12.0a0 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 2ea685b529..9db38ed1de 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-cudart-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 742040ad50..5a9ef5bd32 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-version=11.8 diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu index 992fda8a38..d3994e59c5 100644 --- a/cpp/bench/prims/matrix/select_k.cu +++ b/cpp/bench/prims/matrix/select_k.cu @@ -219,28 +219,28 @@ const std::vector kInputs{ RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \ } -SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix8bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bitsExtraPass); // NOLINT SELECTION_REGISTER(double, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT // For learning a heuristic of which selection algorithm to use, we // have a couple of additional constraints when generating the dataset: diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh index 866a0ebdfa..ade3a6e348 100644 --- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh @@ -438,7 +438,7 @@ __global__ void __launch_bounds__((WarpSize * BlockDimY)) adjust_centers_kernel(MathT* centers, // [n_clusters, dim] IdxT n_clusters, IdxT dim, - const T* dataset, // [n_rows, dim] + const T* dataset, // [n_rows, dim] IdxT n_rows, const LabelT* labels, // [n_rows] const CounterT* cluster_sizes, // [n_clusters] diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 328080da1f..8e41aa96f3 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -75,7 +75,7 @@ namespace numpy_serializer { #if RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_LITTLE_ENDIAN_CHAR -#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 +#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_BIG_ENDIAN_CHAR #endif // RAFT_SYSTEM_LITTLE_ENDIAN == 1 diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp index e734c99029..f077a49b77 100644 --- a/cpp/include/raft/core/detail/nvtx.hpp +++ b/cpp/include/raft/core/detail/nvtx.hpp @@ -193,7 +193,7 @@ inline void pop_range() } // namespace raft::common::nvtx::detail -#else // NVTX_ENABLED +#else // NVTX_ENABLED namespace raft::common::nvtx::detail { diff --git a/cpp/include/raft/core/kvp.hpp b/cpp/include/raft/core/kvp.hpp index 2e0d1117a1..192d160d45 100644 --- a/cpp/include/raft/core/kvp.hpp +++ b/cpp/include/raft/core/kvp.hpp @@ -32,8 +32,8 @@ struct KeyValuePair { typedef _Key Key; ///< Key data type typedef _Value Value; ///< Value data type - Key key; ///< Item key - Value value; ///< Item value + Key key; ///< Item key + Value value; ///< Item value /// Constructor RAFT_INLINE_FUNCTION KeyValuePair() {} diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp index 2dc4eb1f9d..8e331293bf 100644 --- a/cpp/include/raft/core/resource/resource_types.hpp +++ b/cpp/include/raft/core/resource/resource_types.hpp @@ -42,7 +42,7 @@ enum resource_type { THRUST_POLICY, // thrust execution policy WORKSPACE_RESOURCE, // rmm device memory resource - LAST_KEY // reserved for the last key + LAST_KEY // reserved for the last key }; /** diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h index 10827a8778..f659ed256d 100644 --- a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h +++ b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h @@ -397,7 +397,7 @@ class EpilogueWithBroadcastCustom : public EpilogueBase diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh index 68922943f4..f0f12acdb1 100644 --- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh +++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh @@ -16,11 +16,11 @@ #pragma once -#include // size_t -#include // std::numeric_limits -#include // raft::KeyValuePair -#include // raft::identity_op -#include // ops::l2_exp_distance_op +#include // size_t +#include // std::numeric_limits +#include // raft::KeyValuePair +#include // raft::identity_op +#include // ops::l2_exp_distance_op #include #include // PairwiseDistances #include // Policy diff --git a/cpp/include/raft/distance/detail/masked_distance_base.cuh b/cpp/include/raft/distance/detail/masked_distance_base.cuh index 5a33c9ce4a..55da634145 100644 --- a/cpp/include/raft/distance/detail/masked_distance_base.cuh +++ b/cpp/include/raft/distance/detail/masked_distance_base.cuh @@ -217,7 +217,7 @@ struct MaskedDistances : public BaseClass { } // tile_idx_n } // idx_g rowEpilog_op(tile_idx_m); - } // tile_idx_m + } // tile_idx_m } private: diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh index 58b5daa8ca..c6b09be31e 100644 --- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh +++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh @@ -18,7 +18,7 @@ #include // ceildiv #include // RAFT_CUDA_TRY -#include // size_t +#include // size_t namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh index dd58ab4328..e1dc6f9b37 100644 --- a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -45,7 +45,7 @@ void pairwise_matrix_dispatch(OpT distance_op, cudaStream_t stream, bool is_row_major) RAFT_EXPLICIT; -}; // namespace raft::distance::detail +}; // namespace raft::distance::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h index cd748b9e6b..951f8a0132 100644 --- a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h +++ b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h @@ -57,8 +57,8 @@ namespace threadblock { /// /// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator /// -template diff --git a/cpp/include/raft/distance/distance-ext.cuh b/cpp/include/raft/distance/distance-ext.cuh index 3f7f2b0a23..7171ba605f 100644 --- a/cpp/include/raft/distance/distance-ext.cuh +++ b/cpp/include/raft/distance/distance-ext.cuh @@ -140,8 +140,8 @@ void pairwise_distance(raft::resources const& handle, raft::distance::DistanceType metric, Type metric_arg = 2.0f) RAFT_EXPLICIT; -}; // namespace distance -}; // namespace raft +}; // namespace distance +}; // namespace raft #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh index 30f4a2d167..b2cd736c57 100644 --- a/cpp/include/raft/linalg/add.cuh +++ b/cpp/include/raft/linalg/add.cuh @@ -217,7 +217,7 @@ void add_scalar(raft::resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh index f6889e959b..03beb1d1d1 100644 --- a/cpp/include/raft/linalg/binary_op.cuh +++ b/cpp/include/raft/linalg/binary_op.cuh @@ -82,7 +82,7 @@ void binary_op(raft::resources const& handle, InType in1, InType in2, OutType ou /** @} */ // end of group binary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh index 5609656234..afa58d73fc 100644 --- a/cpp/include/raft/linalg/coalesced_reduction.cuh +++ b/cpp/include/raft/linalg/coalesced_reduction.cuh @@ -160,7 +160,7 @@ void coalesced_reduction(raft::resources const& handle, /** @} */ // end of group coalesced_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh index 3b1e8c41c4..cb6488bedf 100644 --- a/cpp/include/raft/linalg/contractions.cuh +++ b/cpp/include/raft/linalg/contractions.cuh @@ -100,7 +100,7 @@ struct KernelPolicy { SmemSize = 2 * SmemPage * sizeof(DataT), }; // enum -}; // struct KernelPolicy +}; // struct KernelPolicy template struct ColKernelPolicy { diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp index 5a7356a4c2..d15e343c9a 100644 --- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp +++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp @@ -550,7 +550,7 @@ cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - float* const A[], // NOLINT + float* const A[], // NOLINT int lda, int* P, int* info, @@ -564,7 +564,7 @@ inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - double* const A[], // NOLINT + double* const A[], // NOLINT int lda, int* P, int* info, diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh index d617b065da..17ec5c3136 100644 --- a/cpp/include/raft/linalg/divide.cuh +++ b/cpp/include/raft/linalg/divide.cuh @@ -96,7 +96,7 @@ void divide_scalar(raft::resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 954bf19334..57f3b61388 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -220,7 +220,7 @@ void eig_jacobi(raft::resources const& handle, /** @} */ // end of eig -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh index 640964d018..610ea07f96 100644 --- a/cpp/include/raft/linalg/gemv.cuh +++ b/cpp/include/raft/linalg/gemv.cuh @@ -305,6 +305,6 @@ void gemv(raft::resources const& handle, } /** @} */ // end of gemv -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh index 20588cbe17..21575d7806 100644 --- a/cpp/include/raft/linalg/lstsq.cuh +++ b/cpp/include/raft/linalg/lstsq.cuh @@ -245,7 +245,7 @@ void lstsq_qr(raft::resources const& handle, /** @} */ // end of lstsq -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh index e620d227eb..a65f6ed390 100644 --- a/cpp/include/raft/linalg/matrix_vector_op.cuh +++ b/cpp/include/raft/linalg/matrix_vector_op.cuh @@ -240,7 +240,7 @@ void matrix_vector_op(raft::resources const& handle, /** @} */ // end of group matrix_vector_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh index d45f11524d..b59a0fcef7 100644 --- a/cpp/include/raft/linalg/mean_squared_error.cuh +++ b/cpp/include/raft/linalg/mean_squared_error.cuh @@ -79,7 +79,7 @@ void mean_squared_error(raft::resources const& handle, /** @} */ // end of group mean_squared_error -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh index 3ade108235..9973a3cc6c 100644 --- a/cpp/include/raft/linalg/multiply.cuh +++ b/cpp/include/raft/linalg/multiply.cuh @@ -98,7 +98,7 @@ void multiply_scalar( /** @} */ // end of group multiply -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh index 26ac1035ca..5c7dcbd5cf 100644 --- a/cpp/include/raft/linalg/power.cuh +++ b/cpp/include/raft/linalg/power.cuh @@ -154,7 +154,7 @@ void power_scalar( /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh index a3d0ef71d0..3181dd0224 100644 --- a/cpp/include/raft/linalg/reduce.cuh +++ b/cpp/include/raft/linalg/reduce.cuh @@ -162,7 +162,7 @@ void reduce(raft::resources const& handle, /** @} */ // end of group reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh index 6eaf1e2ba7..5ed0fb7407 100644 --- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh @@ -113,7 +113,7 @@ void reduce_cols_by_key( /** @} */ // end of group reduce_cols_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh index fa624b2191..7d93c3946f 100644 --- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh @@ -192,7 +192,7 @@ void reduce_rows_by_key( /** @} */ // end of group reduce_rows_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh index 2dece5b957..163f360481 100644 --- a/cpp/include/raft/linalg/rsvd.cuh +++ b/cpp/include/raft/linalg/rsvd.cuh @@ -876,7 +876,7 @@ void randomized_svd(const raft::resources& handle, /** @} */ // end of group rsvd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh index 99754c4eb2..81b7ab7dec 100644 --- a/cpp/include/raft/linalg/sqrt.cuh +++ b/cpp/include/raft/linalg/sqrt.cuh @@ -84,7 +84,7 @@ void sqrt(raft::resources const& handle, InType in, OutType out) /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh index f971d0e40b..c7ff000e00 100644 --- a/cpp/include/raft/linalg/strided_reduction.cuh +++ b/cpp/include/raft/linalg/strided_reduction.cuh @@ -171,7 +171,7 @@ void strided_reduction(raft::resources const& handle, /** @} */ // end of group strided_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh index 688e60a806..f4243f9dc1 100644 --- a/cpp/include/raft/linalg/subtract.cuh +++ b/cpp/include/raft/linalg/subtract.cuh @@ -223,7 +223,7 @@ void subtract_scalar( /** @} */ // end of group subtract -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index 08f9462ba9..f7071de75b 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -416,7 +416,7 @@ void svd_reconstruction(raft::resources const& handle, /** @} */ // end of group svd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh index f46133abd9..67b04c6791 100644 --- a/cpp/include/raft/linalg/ternary_op.cuh +++ b/cpp/include/raft/linalg/ternary_op.cuh @@ -83,7 +83,7 @@ void ternary_op( /** @} */ // end of group ternary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index afe1962223..1b46082fbe 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -103,7 +103,7 @@ auto transpose(raft::resources const& handle, /** @} */ // end of group transpose -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh index 47a432f415..5ebe27923a 100644 --- a/cpp/include/raft/linalg/unary_op.cuh +++ b/cpp/include/raft/linalg/unary_op.cuh @@ -125,7 +125,7 @@ void write_only_unary_op(const raft::resources& handle, OutType out, Lambda op) /** @} */ // end of group unary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh index 887741ad71..c94b2506d3 100644 --- a/cpp/include/raft/matrix/col_wise_sort.cuh +++ b/cpp/include/raft/matrix/col_wise_sort.cuh @@ -134,6 +134,6 @@ void sort_cols_per_row(Args... args) /** @} */ // end of group col_wise_sort -}; // end namespace raft::matrix +}; // end namespace raft::matrix #endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh index f934d7e3b4..870f0c3240 100644 --- a/cpp/include/raft/matrix/detail/select_k-ext.cuh +++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh @@ -16,8 +16,8 @@ #pragma once -#include // uint32_t -#include // __half +#include // uint32_t +#include // __half #include #include // RAFT_EXPLICIT #include // rmm:cuda_stream_view diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh index dc86a04733..2927604e7d 100644 --- a/cpp/include/raft/matrix/detail/select_warpsort.cuh +++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh @@ -959,7 +959,7 @@ void calc_launch_parameter( if (batch_size >= size_t(another_min_grid_size) // still have enough work && another_block_size < block_size // protect against an infinite loop && another_min_grid_size * another_block_size > - min_grid_size * block_size // improve occupancy + min_grid_size * block_size // improve occupancy ) { block_size = another_block_size; min_grid_size = another_min_grid_size; diff --git a/cpp/include/raft/neighbors/ann_types.hpp b/cpp/include/raft/neighbors/ann_types.hpp index 469d3c09d4..c17be4a8ff 100644 --- a/cpp/include/raft/neighbors/ann_types.hpp +++ b/cpp/include/raft/neighbors/ann_types.hpp @@ -49,4 +49,4 @@ struct search_params {}; /** @} */ // end group ann_types -}; // namespace raft::neighbors::ann +}; // namespace raft::neighbors::ann diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 2758148942..47e976e252 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -53,7 +53,7 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( INDEX_T* const result_indices_ptr, // [num_pickup] DISTANCE_T* const result_distances_ptr, // [num_pickup] const float* const query_buffer, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] + const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] const std::size_t dataset_dim, const std::size_t dataset_size, const std::size_t dataset_ld, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 314ab6e6a6..6ea1e34032 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -190,9 +190,9 @@ struct search : public search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 0015b4a791..4fc051ac09 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -48,7 +48,7 @@ namespace multi_cta_search { template __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [search_width] const uint32_t search_width, - INDEX_T* const itopk_indices, // [num_itopk] + INDEX_T* const itopk_indices, // [num_itopk] const size_t num_itopk, uint32_t* const terminate_flag) { @@ -86,8 +86,8 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [sea } template -__device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] - INDEX_T* indices, // [num_elements] +__device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] + INDEX_T* indices, // [num_elements] const uint32_t num_elements, const uint32_t num_itopk // num_itopk <= num_elements ) @@ -144,7 +144,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( const uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const uint32_t hash_bitlen, @@ -454,9 +454,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index e664764721..f312226f42 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -98,7 +98,7 @@ __global__ void random_pickup_kernel( const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const result_indices_ptr, // [num_queries, ldr] DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] @@ -170,7 +170,7 @@ void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_d const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const result_indices_ptr, // [num_queries, ldr] DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] @@ -310,18 +310,18 @@ template = search_width * graph_degree + INDEX_T* const result_indices_ptr, // [num_queries, ldd] + DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd // (*) ldd >= search_width * graph_degree ) { const uint32_t ldb = hashmap::get_size(hash_bitlen); @@ -371,19 +371,19 @@ template = search_width * graph_degree + INDEX_T* const result_indices_ptr, // [num_queries, ldd] + DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree cudaStream_t cuda_stream = 0) { const auto block_size = 128; @@ -437,7 +437,7 @@ void remove_parent_bit(const std::uint32_t num_queries, } template -__global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] +__global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, const T* const src, // [batch_size, ld_src] const uint64_t ld_src, @@ -452,7 +452,7 @@ __global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_ds } template -void batched_memcpy(T* const dst, // [batch_size, ld_dst] +void batched_memcpy(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, const T* const src, // [batch_size, ld_src] const uint64_t ld_src, @@ -596,9 +596,9 @@ struct search : search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index e6966987c8..33c77db61e 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -111,7 +111,7 @@ struct search_plan_impl : public search_plan_impl_base { DISTANCE_T* const result_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const std::uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] uint32_t topk){}; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index 96de83369d..45dd535e1d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -228,9 +228,9 @@ struct search : search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const result_indices_ptr, // [num_queries, topk] - DISTANCE_T* const result_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const result_indices_ptr, // [num_queries, topk] + DISTANCE_T* const result_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const std::uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index f7c43fe11c..5f5df1a818 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -29,9 +29,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 31d9c9fffa..81325fd5da 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -192,8 +192,8 @@ __device__ inline void topk_by_bitonic_sort_1st(float* candidate_distances, // } template -__device__ inline void topk_by_bitonic_sort_2nd(float* itopk_distances, // [num_itopk] - IdxT* itopk_indices, // [num_itopk] +__device__ inline void topk_by_bitonic_sort_2nd(float* itopk_distances, // [num_itopk] + IdxT* itopk_indices, // [num_itopk] const std::uint32_t num_itopk, float* candidate_distances, // [num_candidates] IdxT* candidate_indices, // [num_candidates] @@ -401,8 +401,8 @@ template -__device__ void topk_by_bitonic_sort(float* itopk_distances, // [num_itopk] - IdxT* itopk_indices, // [num_itopk] +__device__ void topk_by_bitonic_sort(float* itopk_distances, // [num_itopk] + IdxT* itopk_indices, // [num_itopk] const std::uint32_t num_itopk, float* candidate_distances, // [num_candidates] IdxT* candidate_indices, // [num_candidates] @@ -463,7 +463,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] const std::uint32_t top_k, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] + const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] const std::size_t dataset_dim, const std::size_t dataset_size, const std::size_t dataset_ld, // stride of dataset @@ -472,7 +472,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ const std::uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t internal_topk, @@ -830,9 +830,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh index dd73558f86..0fcfe2cc16 100644 --- a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh @@ -183,9 +183,9 @@ __device__ inline void update_histogram(int itr, uint32_t threshold, uint32_t& num_bins, uint32_t& shift, - const T* x, // [nx,] + const T* x, // [nx,] uint32_t nx, - uint32_t* hist, // [num_bins] + uint32_t* hist, // [num_bins] uint8_t* state, uint32_t* output, // [topk] uint32_t* output_count) @@ -761,16 +761,16 @@ __launch_bounds__(1024, 1) __global__ void kern_topk_cta_11(uint32_t topk, uint32_t size_batch, uint32_t len_x, - const uint32_t* _x, // [size_batch, ld_x,] + const uint32_t* _x, // [size_batch, ld_x,] uint32_t ld_x, const ValT* _in_vals, // [size_batch, ld_iv,] uint32_t ld_iv, - uint32_t* _y, // [size_batch, ld_y,] + uint32_t* _y, // [size_batch, ld_y,] uint32_t ld_y, - ValT* _out_vals, // [size_batch, ld_ov,] + ValT* _out_vals, // [size_batch, ld_ov,] uint32_t ld_ov, - uint8_t* _state, // [size_batch, ...,] - uint32_t* _hints, // [size_batch,] + uint8_t* _state, // [size_batch, ...,] + uint32_t* _hints, // [size_batch,] bool sort) { const uint32_t i_batch = blockIdx.x; diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh index 93eeb0dead..c0f856103a 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh @@ -16,7 +16,7 @@ #pragma once -#include // RAFT_LOG_TRACE +#include // RAFT_LOG_TRACE #include #include // raft::resources #include // is_min_close, DistanceType diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 199cb74fbe..47c10de200 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -220,7 +220,7 @@ void select_residuals(raft::resources const& handle, template void flat_compute_residuals( raft::resources const& handle, - float* residuals, // [n_rows, rot_dim] + float* residuals, // [n_rows, rot_dim] IdxT n_rows, device_matrix_view rotation_matrix, // [rot_dim, dim] device_matrix_view centers, // [n_lists, dim_ext] diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh index 2ab216b13b..7c5b523a8b 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh @@ -898,7 +898,7 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props, } { - if (selected_perf.occupancy <= 0.0 // no candidate yet + if (selected_perf.occupancy <= 0.0 // no candidate yet || (selected_perf.occupancy < cur.occupancy * kTargetOccupancy && selected_perf.shmem_use >= cur.shmem_use) // much improved occupancy ) { diff --git a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp index 3ce2dc3eb5..2a863b47b3 100644 --- a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp +++ b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::host_matrix_view #include // raft::distance::DistanceType diff --git a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh index c000a4810b..a6ed17e251 100644 --- a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh +++ b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh @@ -36,7 +36,7 @@ void select_k(const key_t* inK, bool select_min, int k, cudaStream_t stream) RAFT_EXPLICIT; -}; // namespace raft::neighbors::detail +}; // namespace raft::neighbors::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/neighbors/ivf_flat-ext.cuh b/cpp/include/raft/neighbors/ivf_flat-ext.cuh index 848703c9b5..8dbe7587ff 100644 --- a/cpp/include/raft/neighbors/ivf_flat-ext.cuh +++ b/cpp/include/raft/neighbors/ivf_flat-ext.cuh @@ -16,10 +16,10 @@ #pragma once -#include // int64_t +#include // int64_t -#include // raft::device_matrix_view -#include // raft::resources +#include // raft::device_matrix_view +#include // raft::resources #include #include // raft::neighbors::ivf_flat::index #include // RAFT_EXPLICIT diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh index fcfe837e2d..4a60cfc09d 100644 --- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh +++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::device_matrix_view #include // raft::resources diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh index c1fd4676dc..3ccd3891b7 100644 --- a/cpp/include/raft/neighbors/refine-ext.cuh +++ b/cpp/include/raft/neighbors/refine-ext.cuh @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::device_matrix_view #include // // raft::host_matrix_view diff --git a/cpp/include/raft/sparse/distance/distance.cuh b/cpp/include/raft/sparse/distance/distance.cuh index b60940341a..702846f586 100644 --- a/cpp/include/raft/sparse/distance/distance.cuh +++ b/cpp/include/raft/sparse/distance/distance.cuh @@ -218,8 +218,8 @@ void pairwise_distance(raft::resources const& handle, /** @} */ // end of sparse_distance -}; // namespace distance -}; // namespace sparse -}; // namespace raft +}; // namespace distance +}; // namespace sparse +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 3cb4a3e353..56ca2ebfa7 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -49,10 +49,10 @@ __global__ void csr_row_normalize_l1_kernel( // over each row and then divide the values in parallel. const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result) -{ // output array +{ // output array // row-based matrix 1 thread per row int row = (blockIdx.x * TPB_X) + threadIdx.x; @@ -95,8 +95,8 @@ __global__ void csr_row_normalize_l1_kernel( template void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result, cudaStream_t stream) { // output array @@ -115,10 +115,10 @@ __global__ void csr_row_normalize_max_kernel( // over each row and then divide the values in parallel. const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result) -{ // output array +{ // output array // row-based matrix 1 thread per row int row = (blockIdx.x * TPB_X) + threadIdx.x; @@ -163,8 +163,8 @@ __global__ void csr_row_normalize_max_kernel( template void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result, cudaStream_t stream) { diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 545f218e63..51836ca9aa 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -69,7 +69,7 @@ void fit_embedding(raft::resources const& handle, handle, ro, ci, vs, n, nnz}; index_type neigvs = n_components + 1; - index_type maxiter = 4000; // default reset value (when set to 0); + index_type maxiter = 4000; // default reset value (when set to 0); value_type tol = 0.01; index_type restart_iter = 15 + neigvs; // what cugraph is using diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index f7ebf50db0..43dd182fe5 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -39,8 +39,8 @@ namespace linalg { template void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result, cudaStream_t stream) { // output array @@ -60,8 +60,8 @@ void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) template void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result, cudaStream_t stream) { diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh index f2be427367..ff644c000e 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh @@ -58,7 +58,7 @@ struct csr_batcher_t { void set_batch(int batch_num) { batch_start_ = batch_num * batch_size_; - batch_stop_ = batch_start_ + batch_size_ - 1; // zero-based indexing + batch_stop_ = batch_start_ + batch_size_ - 1; // zero-based indexing if (batch_stop_ >= total_rows_) batch_stop_ = total_rows_ - 1; // zero-based indexing diff --git a/cpp/include/raft/sparse/solver/mst_solver.cuh b/cpp/include/raft/sparse/solver/mst_solver.cuh index bfedb9ce2a..193431251f 100644 --- a/cpp/include/raft/sparse/solver/mst_solver.cuh +++ b/cpp/include/raft/sparse/solver/mst_solver.cuh @@ -78,10 +78,10 @@ class MST_solver { rmm::device_uvector altered_weights; // weights to be used for mst rmm::device_scalar mst_edge_count; // total number of edges added after every iteration rmm::device_scalar - prev_mst_edge_count; // total number of edges up to the previous iteration - rmm::device_uvector mst_edge; // mst output - true if the edge belongs in mst - rmm::device_uvector next_color; // next iteration color - rmm::device_uvector color; // index of color that vertex points to + prev_mst_edge_count; // total number of edges up to the previous iteration + rmm::device_uvector mst_edge; // mst output - true if the edge belongs in mst + rmm::device_uvector next_color; // next iteration color + rmm::device_uvector color; // index of color that vertex points to // new src-dst pairs found per iteration rmm::device_uvector temp_src; diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh index 95aeca64e5..70c5cec23f 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh @@ -61,7 +61,7 @@ void rbc_low_dim_pass_two(raft::resources const& handle, float weight, value_int* post_dists_counter) RAFT_EXPLICIT; -}; // namespace raft::spatial::knn::detail +}; // namespace raft::spatial::knn::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 7daa1792b1..1f97cd5f76 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -83,7 +83,7 @@ double adjusted_rand_index(raft::resources const& handle, /** @} */ // end group stats_adj_rand_index -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 07fd61411d..b669e0de32 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -85,7 +85,7 @@ double completeness_score(raft::resources const& handle, /** @} */ // end group stats_completeness -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 67f44b0fde..ad5d233c0e 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -116,7 +116,7 @@ void cov(raft::resources const& handle, /** @} */ // end group stats_cov -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index fcc49fefd2..fe432569ee 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -80,7 +80,7 @@ double entropy(raft::resources const& handle, /** @} */ // end group stats_entropy -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index c374251359..8480d16316 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -115,7 +115,7 @@ void histogram(raft::resources const& handle, /** @} */ // end group stats_histogram -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index ce7872d55d..311cd599f8 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -88,7 +88,7 @@ double homogeneity_score(raft::resources const& handle, /** @} */ // end group stats_homogeneity_score -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index 2e01918d2a..b417cbd509 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -76,7 +76,7 @@ value_t kl_divergence(raft::resources const& handle, /** @} */ // end group kl_divergence -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 96c9ca3b5c..43d39cfd6c 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -93,7 +93,7 @@ void mean(raft::resources const& handle, /** @} */ // end group stats_mean -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 48f5eb667f..83f9a8a941 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -160,7 +160,7 @@ void mean_add(raft::resources const& handle, /** @} */ // end group stats_mean_center -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index f6127df701..5c27a6caf6 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -107,6 +107,6 @@ void meanvar(raft::resources const& handle, /** @} */ // end group stats_mean_var -}; // namespace raft::stats +}; // namespace raft::stats #endif diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 0c5a62257d..5c5ff346a4 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -139,6 +139,6 @@ void minmax(raft::resources const& handle, /** @} */ // end group stats_minmax -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 5c4ae43e09..5a334e9280 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -86,7 +86,7 @@ double mutual_info_score(raft::resources const& handle, /** @} */ // end group stats_mutual_info -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index 6a208c5492..a21a0c0dc5 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -72,7 +72,7 @@ double rand_index(raft::resources const& handle, /** @} */ // end group stats_rand_index -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 6349f8fd11..0a67bd2325 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -182,7 +182,7 @@ void vars(raft::resources const& handle, /** @} */ // end group stats_variance -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 2ac9cd9eb5..2c3ed1b83e 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -85,7 +85,7 @@ void sum(raft::resources const& handle, /** @} */ // end group stats_sum -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index 8ea5c65600..041adb5e38 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -92,7 +92,7 @@ double v_measure(raft::resources const& handle, /** @} */ // end group stats_vmeasure -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 7d06d5dff1..da22f0163c 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -186,7 +186,7 @@ void col_weighted_mean(raft::resources const& handle, /** @} */ // end group stats_weighted_mean -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/util/cache.cuh b/cpp/include/raft/util/cache.cuh index a9cfe64568..255dea16bb 100644 --- a/cpp/include/raft/util/cache.cuh +++ b/cpp/include/raft/util/cache.cuh @@ -362,9 +362,9 @@ class Cache { int GetSize() const { return cached_keys.size(); } protected: - int n_vec; //!< Number of elements in a cached vector - float cache_size; //!< in MiB - int n_cache_sets; //!< number of cache sets + int n_vec; //!< Number of elements in a cached vector + float cache_size; //!< in MiB + int n_cache_sets; //!< number of cache sets const int TPB = 256; //!< threads per block for kernel launch int n_iter = 0; //!< Counter for time stamping cache operation diff --git a/cpp/include/raft/util/cache_util.cuh b/cpp/include/raft/util/cache_util.cuh index a7dcc22b02..79a94d9563 100644 --- a/cpp/include/raft/util/cache_util.cuh +++ b/cpp/include/raft/util/cache_util.cuh @@ -46,7 +46,7 @@ __global__ void get_vecs( const math_t* cache, int_t n_vec, const idx_t* cache_idx, int_t n, math_t* out) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - int row = tid % n_vec; // row idx + int row = tid % n_vec; // row idx if (tid < n_vec * n) { size_t out_col = tid / n_vec; // col idx size_t cache_col = cache_idx[out_col]; @@ -93,7 +93,7 @@ __global__ void store_vecs(const math_t* tile, int n_cache_vecs) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - int row = tid % n_vec; // row idx + int row = tid % n_vec; // row idx if (tid < n_vec * n) { int tile_col = tid / n_vec; // col idx int data_col = tile_idx ? tile_idx[tile_col] : tile_col; @@ -357,7 +357,7 @@ __global__ void get_cache_idx(int* keys, cache_time[cidx] = time; // update time stamp cache_idx[tid] = cidx; // exact cache idx } else { - cache_idx[tid] = sidx; // assign cache set + cache_idx[tid] = sidx; // assign cache set } } } diff --git a/cpp/include/raft/util/device_loads_stores.cuh b/cpp/include/raft/util/device_loads_stores.cuh index e3d54c51f5..65936b2f66 100644 --- a/cpp/include/raft/util/device_loads_stores.cuh +++ b/cpp/include/raft/util/device_loads_stores.cuh @@ -16,7 +16,7 @@ #pragma once -#include // uintX_t +#include // uintX_t #include #include // DI diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 49f96aa18b..3d8bbcec4a 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ import subprocess -EXPECTED_VERSIONS = ("11.1.0",) +EXPECTED_VERSIONS = ("16.0.6",) VERSION_REGEX = re.compile(r"clang version ([0-9.]+)") CMAKE_COMPILER_REGEX = re.compile( r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE) diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index 20d78c7bb5..8f616ada98 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -19,13 +19,13 @@ #include // common::nvtx::range #include -#include // make_device_matrix_view -#include // raft::sqrt -#include // raft::resources +#include // make_device_matrix_view +#include // raft::sqrt +#include // raft::resources #include #include // raft::distance::DistanceType #include -#include // rmm::device_uvector +#include // rmm::device_uvector namespace raft { namespace distance { diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu index 0c66f47c7f..b5d10d215c 100644 --- a/cpp/test/linalg/rsvd.cu +++ b/cpp/test/linalg/rsvd.cu @@ -160,24 +160,24 @@ class RsvdTest : public ::testing::TestWithParam> { const std::vector> inputs_fx = { // Test with ratios - {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT - {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT + {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Tall + non-BBT + {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Tall + non-BBT - {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT - {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT + {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL} // Tall + non-BBT + {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL} // Tall + non-BBT - , // Test with fixed ranks - {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT - {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT + , // Test with fixed ranks + {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT + {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT - {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Square + non-BBT - {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Tall + non-BBT + {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Square + non-BBT + {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Tall + non-BBT {0.60f, 2048, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT {1.00f, 16384, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT @@ -188,14 +188,14 @@ const std::vector> inputs_fx = { const std::vector> inputs_dx = { // Test with ratios - {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT - {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Tall + non-BBT - {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT - {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL} // Tall + non-BBT + {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT + {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Tall + non-BBT + {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT + {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL} // Tall + non-BBT , // Test with fixed ranks {0.10, 256, 256, 0.25f, 0.0, 0.0, 100, 5, true, 4321ULL}, // Square + BBT diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index ea905d2089..eadc88085f 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -372,7 +372,7 @@ inline std::vector generate_inputs() {100}, {1000}, {1, 8, 17}, - {1, 16}, // k + {1, 16}, // k {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL}, {0, 1, 10, 100}, // query size {0}, diff --git a/cpp/test/util/bitonic_sort.cu b/cpp/test/util/bitonic_sort.cu index d1f03f78b5..2cf5420334 100644 --- a/cpp/test/util/bitonic_sort.cu +++ b/cpp/test/util/bitonic_sort.cu @@ -103,12 +103,12 @@ struct bitonic_launch { }; template -class BitonicTest : public testing::TestWithParam { // NOLINT +class BitonicTest : public testing::TestWithParam { // NOLINT protected: - const test_spec spec; // NOLINT - std::vector in; // NOLINT - std::vector out; // NOLINT - std::vector ref; // NOLINT + const test_spec spec; // NOLINT + std::vector in; // NOLINT + std::vector out; // NOLINT + std::vector ref; // NOLINT void segmented_sort(std::vector& vec, int k, bool ascending) // NOLINT { @@ -184,13 +184,13 @@ auto inputs = ::testing::Values(test_spec{1, 1, 1, true}, test_spec{70, 1, 64, true}, test_spec{70, 2, 128, false}); -using Floats = BitonicTest; // NOLINT -TEST_P(Floats, Run) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs); // NOLINT +using Floats = BitonicTest; // NOLINT +TEST_P(Floats, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs); // NOLINT -using Ints = BitonicTest; // NOLINT -TEST_P(Ints, Run) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs); // NOLINT +using Ints = BitonicTest; // NOLINT +TEST_P(Ints, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs); // NOLINT using Doubles = BitonicTest; // NOLINT TEST_P(Doubles, Run) { run(); } // NOLINT diff --git a/cpp/test/util/reduction.cu b/cpp/test/util/reduction.cu index 17deaf99eb..548d3b9d53 100644 --- a/cpp/test/util/reduction.cu +++ b/cpp/test/util/reduction.cu @@ -147,9 +147,9 @@ struct reduction_launch { template class ReductionTest : public testing::TestWithParam> { // NOLINT protected: - const std::vector input; // NOLINT - rmm::cuda_stream_view stream; // NOLINT - rmm::device_uvector arr_d; // NOLINT + const std::vector input; // NOLINT + rmm::cuda_stream_view stream; // NOLINT + rmm::device_uvector arr_d; // NOLINT public: explicit ReductionTest() @@ -184,8 +184,8 @@ const std::vector binary_test_vector{ auto reduction_input = ::testing::Values(test_vector); auto binary_reduction_input = ::testing::Values(binary_test_vector); -using ReductionTestInt = ReductionTest; // NOLINT -using BinaryReductionTestInt = ReductionTest; // NOLINT +using ReductionTestInt = ReductionTest; // NOLINT +using BinaryReductionTestInt = ReductionTest; // NOLINT TEST_P(ReductionTestInt, REDUCTIONS) { run_reduction(); } INSTANTIATE_TEST_CASE_P(ReductionTest, ReductionTestInt, reduction_input); // NOLINT TEST_P(BinaryReductionTestInt, BINARY_REDUCTION) { run_binary_reduction(); } // NOLINT diff --git a/dependencies.yaml b/dependencies.yaml index 6f64287f54..700a6db1bf 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -169,10 +169,10 @@ dependencies: common: - output_types: [conda, requirements] packages: - - clang=16.0.1 + - clang=16.0.6 - output_types: [conda] packages: - - clang-tools=16.0.1 + - clang-tools=16.0.6 nn_bench: common: - output_types: [conda, pyproject, requirements] From 8292ef10ca93d444f0b89970b852c861ee2c0882 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Thu, 21 Sep 2023 13:53:10 -0500 Subject: [PATCH 04/12] Update image names (#1835) PR updates `rapidsai/ci` references to `rapidsai/ci-conda` Authors: - Jake Awe (https://github.com/AyodeAwe) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/raft/pull/1835 --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 00004c4e4d..107823d5ee 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -62,7 +62,7 @@ jobs: arch: "amd64" branch: ${{ inputs.branch }} build_type: ${{ inputs.build_type || 'branch' }} - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" date: ${{ inputs.date }} node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4437e0dc85..4fa3c5df86 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -62,7 +62,7 @@ jobs: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" wheel-build-pylibraft: needs: checks From 4f0a2d2d6e30eea0c036ca3b531e03e44e760fbe Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Thu, 21 Sep 2023 15:39:33 -0700 Subject: [PATCH 05/12] Add NVTX ranges for cagra search/serialize functions (#1737) * Add NVTX ranges for cagra search/serialize functions * Add warn_non_pool_workspace for cagra build/search Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1737 --- cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh | 2 ++ cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh | 8 +++++++- .../raft/neighbors/detail/cagra/cagra_serialize.cuh | 5 +++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index d19d7e7904..80e964df57 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,7 @@ void build_knn_graph(raft::resources const& res, std::optional build_params = std::nullopt, std::optional search_params = std::nullopt) { + resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::build"); RAFT_EXPECTS(!build_params || build_params->metric == distance::DistanceType::L2Expanded, "Currently only L2Expanded metric is supported"); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 8190817b5b..b484fa55f9 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -22,6 +22,8 @@ #include #include +#include +#include #include #include #include @@ -60,17 +62,21 @@ void search_main(raft::resources const& res, raft::device_matrix_view neighbors, raft::device_matrix_view distances) { + resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::search"); RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", static_cast(index.dataset().extent(0)), static_cast(index.dataset().extent(1))); RAFT_LOG_DEBUG("# query size = %lu, dim = %lu\n", static_cast(queries.extent(0)), static_cast(queries.extent(1))); - RAFT_EXPECTS(queries.extent(1) == index.dim(), "Querise and index dim must match"); + RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); if (params.max_queries == 0) { params.max_queries = queries.extent(0); } + common::nvtx::range fun_scope( + "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); + std::unique_ptr> plan = factory::create( res, params, index.dim(), index.graph_degree(), topk); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 2c9cbd2563..234911e15c 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -54,6 +55,8 @@ void serialize(raft::resources const& res, const index& index_, bool include_dataset) { + common::nvtx::range fun_scope("cagra::serialize"); + RAFT_LOG_DEBUG( "Saving CAGRA index, size %zu, dim %u", static_cast(index_.size()), index_.dim()); @@ -113,6 +116,8 @@ void serialize(raft::resources const& res, template auto deserialize(raft::resources const& res, std::istream& is) -> index { + common::nvtx::range fun_scope("cagra::deserialize"); + char dtype_string[4]; is.read(dtype_string, 4); From e1db49dcce59fa141470e4d5a374c9bcb067d7cc Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Fri, 22 Sep 2023 12:15:19 +0200 Subject: [PATCH 06/12] Add ivf-flat notebook (#1758) Example jupyter notebook to demonstrate usage of the IVF-Flat API Authors: - Tamas Bela Feher (https://github.com/tfeher) Approvers: - Artem M. Chirkin (https://github.com/achirkin) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1758 --- notebooks/ivf_flat_example.ipynb | 674 +++++++++++++++++++++++++++++++ notebooks/tutorial_ivf_pq.ipynb | 42 +- notebooks/utils.py | 103 +++++ 3 files changed, 788 insertions(+), 31 deletions(-) create mode 100644 notebooks/ivf_flat_example.ipynb create mode 100644 notebooks/utils.py diff --git a/notebooks/ivf_flat_example.ipynb b/notebooks/ivf_flat_example.ipynb new file mode 100644 index 0000000000..08b9d78169 --- /dev/null +++ b/notebooks/ivf_flat_example.ipynb @@ -0,0 +1,674 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4f49c5c4-1170-42a7-9d6a-b90acd00c3c3", + "metadata": {}, + "source": [ + "# RAFT IVF Flat Example Notebook" + ] + }, + { + "cell_type": "markdown", + "id": "4bcfe810-f120-422c-b2bb-72cc43d0c4ca", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "This notebook demonstrates how to run approximate nearest neighbor search using RAFT IVF-Flat algorithm.\n", + "It builds and searches an index using a dataset from the ann-benchmarks million-scale datasets, saves/loads the index to disk, and explores important parameters for fine-tuning the search performance and accuracy of the index." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "fe73ada7-7b7f-4005-9440-85428194311b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import cupy as cp\n", + "import numpy as np\n", + "from pylibraft.common import DeviceResources\n", + "from pylibraft.neighbors import ivf_flat\n", + "import matplotlib.pyplot as plt\n", + "import tempfile\n", + "from utils import BenchmarkTimer, calc_recall, load_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "da9e8615-ea9f-4735-b70f-15ccab36c0d9", + "metadata": {}, + "source": [ + "For best performance it is recommended to use an RMM pooling allocator, to minimize the overheads of repeated CUDA allocations." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5350e4d9-0993-406a-80af-29538b5677c2", + "metadata": {}, + "outputs": [], + "source": [ + "import rmm\n", + "from rmm.allocators.cupy import rmm_cupy_allocator\n", + "mr = rmm.mr.PoolMemoryResource(\n", + " rmm.mr.CudaMemoryResource(),\n", + " initial_pool_size=2**30\n", + ")\n", + "rmm.mr.set_current_device_resource(mr)\n", + "cp.cuda.set_allocator(rmm_cupy_allocator)" + ] + }, + { + "cell_type": "markdown", + "id": "b0d935f2-ba24-44fc-bdfe-a769b7fcd8e6", + "metadata": {}, + "source": [ + "The following GPU is used for this notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a5daa4b4-96de-4e74-bfd6-505b13595f62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thu Sep 21 02:30:53 2023 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 NVIDIA H100 PCIe On | 00000000:41:00.0 Off | 0 |\n", + "| N/A 35C P0 69W / 350W | 1487MiB / 81559MiB | 0% Default |\n", + "| | | Disabled |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| 0 N/A N/A 3940 C /opt/conda/envs/rapids/bin/python 1474MiB |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "# Report the GPU in use\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "id": "88a654cc-6389-4526-a3e6-826de5606a09", + "metadata": {}, + "source": [ + "## Load dataset\n", + "\n", + "The ANN benchmarks website provides the datasets in HDF5 format.\n", + "\n", + "The list of prepared datasets can be found at https://github.com/erikbern/ann-benchmarks/#data-sets" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5f529ad6-b0bd-495c-bf7c-43f10fb6aa14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The index and data will be saved in /tmp/raft_example\n" + ] + } + ], + "source": [ + "WORK_FOLDER = os.path.join(tempfile.gettempdir(), \"raft_example\")\n", + "f = load_dataset(\"http://ann-benchmarks.com/sift-128-euclidean.hdf5\", work_folder=WORK_FOLDER)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3d68a7db-bcf4-449c-96c3-1e8ab146c84d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded dataset of size (1000000, 128), 0.5 GiB; metric: 'euclidean'.\n", + "Number of test queries: 10000\n" + ] + } + ], + "source": [ + "metric = f.attrs['distance']\n", + "\n", + "dataset = cp.array(f['train'])\n", + "queries = cp.array(f['test'])\n", + "gt_neighbors = cp.array(f['neighbors'])\n", + "gt_distances = cp.array(f['distances'])\n", + "\n", + "itemsize = dataset.dtype.itemsize \n", + "\n", + "print(f\"Loaded dataset of size {dataset.shape}, {dataset.size*itemsize/(1<<30):4.1f} GiB; metric: '{metric}'.\")\n", + "print(f\"Number of test queries: {queries.shape[0]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9f463c50-d1d3-49be-bcfe-952602efa603", + "metadata": {}, + "source": [ + "## Build index\n", + "We set [IndexParams](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.IndexParams) and build the index. The index parameters will be discussed in more detail in later sections of this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "737f8841-93f9-4c8e-b2e1-787d4474ef94", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 120 ms, sys: 5.33 ms, total: 125 ms\n", + "Wall time: 124 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=1024,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=0.1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True\n", + " )\n", + "\n", + "index = ivf_flat.build(build_params, dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "a16a0cf6-3b05-4afd-9bb8-54431e0d7439", + "metadata": {}, + "source": [ + "The index is built. We can print some basic information of the index" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1aec7024-6e5d-4d2c-82e6-7b5734aec958", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(type=IVF-FLAT, metric=euclidean, size=1000000, dim=128, n_lists=1024, adaptive_centers=False)\n" + ] + } + ], + "source": [ + "print(index)" + ] + }, + { + "cell_type": "markdown", + "id": "df7d4958-56a3-48ea-bd64-3486fdb57fb7", + "metadata": {}, + "source": [ + "## Search neighbors" + ] + }, + { + "cell_type": "markdown", + "id": "89ba2eaa-4c85-4e1c-b07c-920394e55dce", + "metadata": {}, + "source": [ + "It is recommended to reuse [device recosources](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/common/#pylibraft.common.DeviceResources) across multiple invocations of search, since constructing these can be time consuming. We will reuse the resources by passing the same handle to each RAFT API call." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "46e0421b-9335-47a2-8451-a91f56c2f086", + "metadata": {}, + "outputs": [], + "source": [ + "handle = DeviceResources()" + ] + }, + { + "cell_type": "markdown", + "id": "a6365229-18fd-468f-af30-e24b950cbd6e", + "metadata": {}, + "source": [ + "After setting [SearchParams](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.SearchParams) we search for for `k=10` neighbors." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "595454e1-7240-4b43-9a73-963d5670b00c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 171 ms, sys: 52.6 ms, total: 224 ms\n", + "Wall time: 236 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "n_queries=10000\n", + "# n_probes is the number of clusters we select in the first (coarse) search step. This is the only hyper parameter for search.\n", + "search_params = ivf_flat.SearchParams(n_probes=30)\n", + "\n", + "# Search 10 nearest neighbors.\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "# RAFT calls are asynchronous (when handle arg is provided), we need to sync before accessing the results.\n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" + ] + }, + { + "cell_type": "markdown", + "id": "43d20ca7-7b9e-4046-bb52-640a2744db75", + "metadata": {}, + "source": [ + "The returned arrays have shape {n_queries x 10] and store the distance values and the indices of the searched vectors. We check how accurate the search is. The accuracy of the search is quantified as `recall`, which is a value between 0 and 1 and tells us what fraction of the returned neighbors are actual k nearest neighbors. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8cd9cd20-ca00-4a35-a0a0-86636521b31a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.97406" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "cde5079c-9777-45a1-9545-cffbcc59988f", + "metadata": {}, + "source": [ + "## Save and load the index\n", + "You can serialize the index to file using [save](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.save), and [load](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.load) it later." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "bf94e45c-e7fb-4aa3-a611-ddaee7ac41ae", + "metadata": {}, + "outputs": [], + "source": [ + "index_file = os.path.join(WORK_FOLDER, \"my_ivf_flat_index.bin\")\n", + "ivf_flat.save(index_file, index)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "1622d9be-be41-4d25-be99-d348c5e54957", + "metadata": {}, + "outputs": [], + "source": [ + "index = ivf_flat.load(index_file)" + ] + }, + { + "cell_type": "markdown", + "id": "15d503e5-05e8-47ce-8501-e13fc512099c", + "metadata": {}, + "source": [ + "## Tune search parameters\n", + "Search has a single hyper parameter: `n_probes`, which describes how many neighboring cluster is searched (probed) for each query. Within a probed cluster, the distance is computed between all the vectors in the cluster and the query point, and the top-k neighbors are selected. Finally, the top-k neighbors are selected from all the neighbor candidates from the probed clusters.\n", + "\n", + "Let's see how search accuracy and latency changes when we change the `n_probes` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ace0c31f-af75-4352-a438-123a9a03612c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Benchmarking search with n_probes = 10\n", + "recall 0.86625\n", + "Average search time: 0.026 +/- 0.000259 s\n", + "Queries per second (QPS): 384968\n", + "\n", + "Benchmarking search with n_probes = 20\n", + "recall 0.94705\n", + "Average search time: 0.050 +/- 5.43e-05 s\n", + "Queries per second (QPS): 198880\n", + "\n", + "Benchmarking search with n_probes = 30\n", + "recall 0.97406\n", + "Average search time: 0.075 +/- 8.59e-05 s\n", + "Queries per second (QPS): 133954\n", + "\n", + "Benchmarking search with n_probes = 50\n", + "recall 0.99169\n", + "Average search time: 0.123 +/- 4.78e-05 s\n", + "Queries per second (QPS): 80997\n", + "\n", + "Benchmarking search with n_probes = 100\n", + "recall 0.99844\n", + "Average search time: 0.244 +/- 0.000249 s\n", + "Queries per second (QPS): 40934\n", + "\n", + "Benchmarking search with n_probes = 200\n", + "recall 0.99932\n", + "Average search time: 0.468 +/- 0.000367 s\n", + "Queries per second (QPS): 21382\n", + "\n", + "Benchmarking search with n_probes = 500\n", + "recall 0.99933\n", + "Average search time: 1.039 +/- 0.000209 s\n", + "Queries per second (QPS): 9625\n", + "\n", + "Benchmarking search with n_probes = 1024\n", + "recall 0.99935\n", + "Average search time: 0.701 +/- 0.00579 s\n", + "Queries per second (QPS): 14273\n" + ] + } + ], + "source": [ + "n_probes = np.asarray([10, 20, 30, 50, 100, 200, 500, 1024]);\n", + "qps = np.zeros(n_probes.shape);\n", + "recall = np.zeros(n_probes.shape);\n", + "\n", + "for i in range(len(n_probes)):\n", + " print(\"\\nBenchmarking search with n_probes =\", n_probes[i])\n", + " timer = BenchmarkTimer(reps=1, warmup=1)\n", + " for rep in timer.benchmark_runs():\n", + " distances, neighbors = ivf_flat.search(\n", + " ivf_flat.SearchParams(n_probes=n_probes[i]),\n", + " index,\n", + " cp.asarray(queries),\n", + " k=10,\n", + " handle=handle,\n", + " )\n", + " handle.sync()\n", + " \n", + " recall[i] = calc_recall(cp.asnumpy(neighbors), gt_neighbors)\n", + " print(\"recall\", recall[i])\n", + "\n", + " timings = np.asarray(timer.timings)\n", + " avg_time = timings.mean()\n", + " std_time = timings.std()\n", + " qps[i] = queries.shape[0] / avg_time\n", + " print(\"Average search time: {0:7.3f} +/- {1:7.3} s\".format(avg_time, std_time))\n", + " print(\"Queries per second (QPS): {0:8.0f}\".format(qps[i]))" + ] + }, + { + "cell_type": "markdown", + "id": "20b2498c-7231-4211-990e-600d5c26a9a1", + "metadata": {}, + "source": [ + "The plots below illustrate how the accuracy (recall) and the throughput (queries per second) depends on the `n_probes` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1ac370f-91c8-4054-95c7-a749df5f16d2", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(12,3))\n", + "ax = fig.add_subplot(131)\n", + "ax.plot(n_probes, recall,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('n_probes')\n", + "ax.grid()\n", + "ax.set_ylabel('recall (@k=10)')\n", + "\n", + "ax = fig.add_subplot(132)\n", + "ax.plot(n_probes, qps,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('n_probes')\n", + "ax.grid()\n", + "ax.set_ylabel('queries per second');\n", + "\n", + "ax = fig.add_subplot(133)\n", + "ax.plot(recall, qps,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('recall')\n", + "ax.grid()\n", + "ax.set_ylabel('queries per second');\n", + "#ax.set_yscale('log')" + ] + }, + { + "cell_type": "markdown", + "id": "81e7ad6a-bddc-45de-9cce-0fb913f91efe", + "metadata": {}, + "source": [ + "## Adjust build parameters\n", + "### n_lists\n", + "The number of clusters (or lists) is set by the n_list parameter. Let's change it to 100 clusters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "addbfff3-7773-4290-9608-5489edf4886d", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=100,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True\n", + " )\n", + "\n", + "index = ivf_flat.build(build_params, dataset, handle=handle)" + ] + }, + { + "cell_type": "markdown", + "id": "48db27f9-54c8-4dac-839b-af94ada8885f", + "metadata": {}, + "source": [ + "The ratio of n_probes / n_list will determine how large fraction of the dataset is searched for each query. The right combination depends on the use case. Here we will search 10 of the clusters for each query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a0149ad-de38-4195-97a5-ce5d5d877036", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "n_queries=10000\n", + "\n", + "search_params = ivf_flat.SearchParams(n_probes=10)\n", + "\n", + "# Search 10 nearest neighbors.\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eedc3ec4-06af-42c5-8cdf-490a5c2bc49a", + "metadata": {}, + "outputs": [], + "source": [ + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "0c44800f-1e9e-4f7b-87fe-0f25e6590faa", + "metadata": {}, + "source": [ + "### trainset_fraction\n", + "During clustering we can sub-sample the dataset. The parameter `trainset_fraction` determines what fraction to use. Often we get good results by using only 1/10th of the dataset for clustering. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a54d190-64d4-4cd4-a497-365cbffda871", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams( \n", + " n_lists=100, \n", + " metric=\"sqeuclidean\", \n", + " kmeans_trainset_fraction=0.1, \n", + " kmeans_n_iters=20 \n", + " ) \n", + "index = ivf_flat.build(build_params, dataset, handle=handle)" + ] + }, + { + "cell_type": "markdown", + "id": "9d86a213-d6ae-4fca-9082-cb5a4d1dab36", + "metadata": {}, + "source": [ + "We see only a minimal change in the recall" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cc992e8-a5e5-4508-b790-0e934160b660", + "metadata": {}, + "outputs": [], + "source": [ + "search_params = ivf_flat.SearchParams(n_probes=10)\n", + "\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)\n", + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "25289ebc-7d89-4fa6-bc62-e25b6e77750c", + "metadata": {}, + "source": [ + "### Add vectors on build\n", + "Currently you cannot configure how RAFT sub-samples the input. If you want to have a fine control on how the training set is selected, then create the index in two steps:\n", + "1. Define cluster centers on a training set, but do not add any vector to the index\n", + "2. Add vectors to the index (extend)\n", + "\n", + "This workflow shall be familiar to FAISS users. Note that raft does not require adding the data in batches, internal batching is used when necessary.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ebcf970-94ed-4825-9885-277bd984b90c", + "metadata": {}, + "outputs": [], + "source": [ + "# subsample the dataset\n", + "n_train = 10000\n", + "train_set = dataset[cp.random.choice(dataset.shape[0], n_train, replace=False),:]\n", + "\n", + "# build using training set\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=1024,\n", + " metric=\"sqeuclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=False\n", + " )\n", + "index = ivf_flat.build(build_params, train_set)\n", + "\n", + "print(\"Index before adding vectors\", index)\n", + "\n", + "ivf_flat.extend(index, dataset, cp.arange(dataset.shape[0], dtype=cp.int64))\n", + "\n", + "print(\"Index after adding vectors\", index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "029d48a9-baf7-4263-af43-9e500ef3cce4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorial_ivf_pq.ipynb b/notebooks/tutorial_ivf_pq.ipynb index 6aa8cd6495..397e39bfba 100644 --- a/notebooks/tutorial_ivf_pq.ipynb +++ b/notebooks/tutorial_ivf_pq.ipynb @@ -79,6 +79,7 @@ "from pylibraft.common import DeviceResources\n", "from pylibraft.neighbors import ivf_pq, refine\n", "from adjustText import adjust_text\n", + "from utils import calc_recall, load_dataset\n", "\n", "%matplotlib inline" ] @@ -194,15 +195,18 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The index and data will be saved in /tmp/raft_example\n" + ] + } + ], "source": [ "DATASET_URL = \"http://ann-benchmarks.com/sift-128-euclidean.hdf5\"\n", - "DATASET_FILENAME = DATASET_URL.split('/')[-1]\n", - "\n", - "## download the dataset\n", - "dataset_path = os.path.join(WORK_FOLDER, DATASET_FILENAME)\n", - "if not os.path.exists(dataset_path):\n", - " urllib.request.urlretrieve(DATASET_URL, dataset_path)" + "f = load_dataset(DATASET_URL)" ] }, { @@ -227,8 +231,6 @@ } ], "source": [ - "f = h5py.File(dataset_path, \"r\")\n", - "\n", "metric = f.attrs['distance']\n", "\n", "dataset = cp.array(f['train'])\n", @@ -456,28 +458,6 @@ } ], "source": [ - "## Check the quality of the prediction (recall)\n", - "def calc_recall(found_indices, ground_truth):\n", - " found_indices = cp.asarray(found_indices)\n", - " bs, k = found_indices.shape\n", - " if bs != ground_truth.shape[0]:\n", - " raise RuntimeError(\n", - " \"Batch sizes do not match {} vs {}\".format(\n", - " bs, ground_truth.shape[0])\n", - " )\n", - " if k > ground_truth.shape[1]:\n", - " raise RuntimeError(\n", - " \"Not enough indices in the ground truth ({} > {})\".format(\n", - " k, ground_truth.shape[1])\n", - " )\n", - " n = 0\n", - " # Go over the batch\n", - " for i in range(bs):\n", - " # Note, ivf-pq does not guarantee the ordered input, hence the use of intersect1d\n", - " n += cp.intersect1d(found_indices[i, :k], ground_truth[i, :k]).size\n", - " recall = n / found_indices.size\n", - " return recall\n", - "\n", "recall_first_try = calc_recall(neighbors, gt_neighbors)\n", "print(f\"Got recall = {recall_first_try} with the default parameters (k = {k}).\")" ] diff --git a/notebooks/utils.py b/notebooks/utils.py new file mode 100644 index 0000000000..1c2e44a6ae --- /dev/null +++ b/notebooks/utils.py @@ -0,0 +1,103 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import cupy as cp +import h5py +import os +import tempfile +import time +import urllib + +## Check the quality of the prediction (recall) +def calc_recall(found_indices, ground_truth): + found_indices = cp.asarray(found_indices) + bs, k = found_indices.shape + if bs != ground_truth.shape[0]: + raise RuntimeError( + "Batch sizes do not match {} vs {}".format( + bs, ground_truth.shape[0] + ) + ) + if k > ground_truth.shape[1]: + raise RuntimeError( + "Not enough indices in the ground truth ({} > {})".format( + k, ground_truth.shape[1] + ) + ) + n = 0 + # Go over the batch + for i in range(bs): + # Note, ivf-pq does not guarantee the ordered input, hence the use of intersect1d + n += cp.intersect1d(found_indices[i, :k], ground_truth[i, :k]).size + recall = n / found_indices.size + return recall + + +class BenchmarkTimer: + """Provides a context manager that runs a code block `reps` times + and records results to the instance variable `timings`. Use like: + .. code-block:: python + timer = BenchmarkTimer(rep=5) + for _ in timer.benchmark_runs(): + ... do something ... + print(np.min(timer.timings)) + + This class is borrowed from the rapids/cuml benchmark suite + """ + + def __init__(self, reps=1, warmup=0): + self.warmup = warmup + self.reps = reps + self.timings = [] + + def benchmark_runs(self): + for r in range(self.reps + self.warmup): + t0 = time.time() + yield r + t1 = time.time() + self.timings.append(t1 - t0) + if r >= self.warmup: + self.timings.append(t1 - t0) + + +def load_dataset(dataset_url, work_folder=None): + """Download dataset from url. It is expected that the dataset contains a hdf5 file in ann-benchmarks format + + Parameters + ---------- + dataset_url address of hdf5 file + work_folder name of the local folder to store the dataset + + """ + dataset_url = "http://ann-benchmarks.com/sift-128-euclidean.hdf5" + dataset_filename = dataset_url.split("/")[-1] + + # We'll need to load store some data in this tutorial + if work_folder is None: + work_folder = os.path.join(tempfile.gettempdir(), "raft_example") + + if not os.path.exists(work_folder): + os.makedirs(work_folder) + print("The index and data will be saved in", work_folder) + + ## download the dataset + dataset_path = os.path.join(work_folder, dataset_filename) + if not os.path.exists(dataset_path): + urllib.request.urlretrieve(dataset_url, dataset_path) + + f = h5py.File(dataset_path, "r") + + return f From 9785617acde3608ba67451c79b37369b59bf7927 Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Fri, 22 Sep 2023 10:55:25 -0400 Subject: [PATCH 07/12] Fix update-version.sh for all pyproject.toml files [skip ci] (#1839) `python/raft-ann-bench/pyproject.toml` was missed in `update-version.sh`. This PR refactors a bit to update all `pyproject.toml` files which will capture future ones as well. Authors: - Ray Douglass (https://github.com/raydouglass) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Jake Awe (https://github.com/AyodeAwe) --- ci/release/update-version.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 6a7e319f5d..7a69b95da1 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -47,10 +47,6 @@ sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cma sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pylibraft/__init__.py sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/raft_dask/__init__.py -# Python pyproject.toml updates -sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pyproject.toml -sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/pyproject.toml - # Wheel testing script sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_raft_dask.sh @@ -74,6 +70,7 @@ for FILE in python/*/pyproject.toml; do for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE} done + sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" "${FILE}" sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\"/g" ${FILE} done From d35a0a9e56fa12330dd8becde34b1e1c46c2b600 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 22 Sep 2023 12:45:35 -0500 Subject: [PATCH 08/12] Make RMM a run dependency of the raft-ann-bench conda package (#1838) RMM was listed as a `host` dependency but not `run`, made it so that installing `raft-ann-bench` does not automatically install `RMM` for end users. Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Ray Douglass (https://github.com/raydouglass) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1838 --- conda/recipes/raft-ann-bench/meta.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index 91d0fdb729..a2ab0af643 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -78,11 +78,11 @@ requirements: - h5py {{ h5py_version }} - benchmark - matplotlib - # rmm is needed to determine if package is gpu-enabled - - rmm ={{ minor_version }} - python - pandas - pyyaml + # rmm is needed to determine if package is gpu-enabled + - rmm ={{ minor_version }} run: - python @@ -104,6 +104,8 @@ requirements: - python - pandas - pyyaml + # rmm is needed to determine if package is gpu-enabled + - rmm ={{ minor_version }} about: home: https://rapids.ai/ license: Apache-2.0 From ba923cc09175c793ada63aa930d8d7d486a98c69 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Sun, 24 Sep 2023 22:29:08 -0500 Subject: [PATCH 09/12] raft-ann-bench package fixes for plotting and conf files (#1844) Number of improvements and fixes for different million scale datasets that are supported by default by the python benchmarking package. Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Divye Gala (https://github.com/divyegala) URL: https://github.com/rapidsai/raft/pull/1844 --- .../src/raft-ann-bench/plot/__main__.py | 41 +- .../run/conf/deep-image-96-inner.json | 1 + .../run/conf/fashion-mnist-784-euclidean.json | 1 + .../run/conf/glove-100-inner.json | 1766 +++++++++++------ .../run/conf/glove-50-inner.json | 1351 +++++++++++++ .../run/conf/mnist-784-euclidean.json | 1 + .../run/conf/nytimes-256-angular.json | 1 + .../run/conf/nytimes-256-inner.json | 1352 +++++++++++++ 8 files changed, 3923 insertions(+), 591 deletions(-) create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json create mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json diff --git a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py index 198d0a2b14..233607c281 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py @@ -254,18 +254,18 @@ def create_plot_build( xn = "k-nn" yn = "qps" - # recall_85 = [-1] * len(linestyles) qps_85 = [-1] * len(linestyles) bt_85 = [0] * len(linestyles) i_85 = [-1] * len(linestyles) - # recall_90 = [-1] * len(linestyles) + qps_90 = [-1] * len(linestyles) bt_90 = [0] * len(linestyles) i_90 = [-1] * len(linestyles) - # recall_95 = [-1] * len(linestyles) + qps_95 = [-1] * len(linestyles) bt_95 = [0] * len(linestyles) i_95 = [-1] * len(linestyles) + data = OrderedDict() colors = OrderedDict() @@ -303,7 +303,7 @@ def mean_y(algo): plt.figure(figsize=(12, 9)) ax = df.plot.bar(rot=0, color=colors) fig = ax.get_figure() - print(f"writing search output to {fn_out}") + print(f"writing build output to {fn_out}") plt.title("Build Time for Highest QPS") plt.suptitle(f"{dataset} k={k} batch_size={batch_size}") plt.ylabel("Build Time (s)") @@ -313,35 +313,22 @@ def mean_y(algo): def load_lines(results_path, result_files, method, index_key): results = dict() - linebreaker = "name,iterations" - for result_filename in result_files: if result_filename.endswith(".csv"): with open(os.path.join(results_path, result_filename), "r") as f: lines = f.readlines() lines = lines[:-1] if lines[-1] == "\n" else lines - idx = 0 - for pos, line in enumerate(lines): - if linebreaker in line: - idx = pos - break if method == "build": - if "hnswlib" in result_filename: - key_idx = [2] - else: - key_idx = [10] + key_idx = [2] elif method == "search": - if "hnswlib" in result_filename: - key_idx = [10, 6] - else: - key_idx = [12, 10] + key_idx = [2, 3] - for line in lines[idx + 1 :]: + for line in lines[1:]: split_lines = line.split(",") - algo_name = split_lines[0].split(".")[0].strip('"') - index_name = split_lines[0].split("/")[0].strip('"') + algo_name = split_lines[0] + index_name = split_lines[1] if index_key == "algo": dict_key = algo_name @@ -394,9 +381,7 @@ def main(): ) parser.add_argument( "--dataset-path", - help="path to dataset folder, by default will look in " - "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets " - "subdirectory from the calling directory", + help="path to dataset folder", default=default_dataset_path, ) parser.add_argument( @@ -460,10 +445,12 @@ def main(): search = args.search search_output_filepath = os.path.join( - args.output_filepath, f"search-{args.dataset}-{k}-{batch_size}.png" + args.output_filepath, + f"search-{args.dataset}-k{k}-batch_size{batch_size}.png", ) build_output_filepath = os.path.join( - args.output_filepath, f"build-{args.dataset}-{k}-{batch_size}.png" + args.output_filepath, + f"build-{args.dataset}-k{k}-batch_size{batch_size}.png", ) search_results = load_all_results( diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json index f1c033e415..ab82405439 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json @@ -3,6 +3,7 @@ "name": "deep-image-96-inner", "base_file": "deep-image-96-inner/base.fbin", "query_file": "deep-image-96-inner/query.fbin", + "groundtruth_neighbors_file": "deep-image-96-inner/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json index 65f28fc81a..0efe1fc498 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json @@ -3,6 +3,7 @@ "name": "fashion-mnist-784-euclidean", "base_file": "fashion-mnist-784-euclidean/base.fbin", "query_file": "fashion-mnist-784-euclidean/query.fbin", + "groundtruth_neighbors_file": "fashion-mnist-784-euclidean/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json index 7c95ceb439..54c8bf908c 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json @@ -3,711 +3,1349 @@ "name": "glove-100-inner", "base_file": "glove-100-inner/base.fbin", "query_file": "glove-100-inner/query.fbin", - "groundtruth_neighbors_file": "glove-100-inner/groundtruth.neighbors.ibin", - "distance": "inner_product" + "distance": "euclidean" }, - "search_basic_param": { - "batch_size": 1, - "k": 10 + "batch_size": 5000, + "k": 10, + "run_count": 3 }, - "index": [ { - "name": "hnswlib.M4", - "algo": "hnswlib", - "build_param": {"M":4, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M4", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M8", - "algo": "hnswlib", - "build_param": {"M":8, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M8", - "search_params": [ + "name" : "hnswlib.M12", + "algo" : "hnswlib", + "build_param": {"M":12, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M12", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M12" }, { - "name": "hnswlib.M12", - "algo": "hnswlib", - "build_param": {"M":12, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M12", - "search_params": [ + "name" : "hnswlib.M16", + "algo" : "hnswlib", + "build_param": {"M":16, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M16", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M16" }, { - "name": "hnswlib.M16", - "algo": "hnswlib", - "build_param": {"M":16, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M16", - "search_params": [ + "name" : "hnswlib.M24", + "algo" : "hnswlib", + "build_param": {"M":24, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M24", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M24" }, { - "name": "hnswlib.M24", - "algo": "hnswlib", - "build_param": {"M":24, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M24", - "search_params": [ + "name" : "hnswlib.M36", + "algo" : "hnswlib", + "build_param": {"M":36, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-100-inner/hnswlib/M36", + "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, {"ef":80, "numThreads":1}, {"ef":120, "numThreads":1}, {"ef":200, "numThreads":1}, {"ef":400, "numThreads":1}, {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M36", - "algo": "hnswlib", - "build_param": {"M":36, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M36", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M48", - "algo": "hnswlib", - "build_param": {"M":48, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M48", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] - }, - { - "name": "hnswlib.M64", - "algo": "hnswlib", - "build_param": {"M":64, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M64", - "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] + ], + "search_result_file" : "result/glove-100-inner/hnswlib/M36" }, + + + + { - "name": "hnswlib.M96", - "algo": "hnswlib", - "build_param": {"M":96, "efConstruction":500, "numThreads":4}, - "file": "glove-100-inner/hnswlib/M96", + "name": "raft_bfknn", + "algo": "raft_bfknn", + "build_param": {}, + "file": "index/glove-100-inner/raft_bfknn/bfknn", "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} - ] + { + "probe": 1 + } + ], + "search_result_file": "result/glove-100-inner/raft_bfknn/bfknn" }, { "name": "faiss_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":1024}, - "file": "glove-100-inner/faiss_ivf_flat/nlist1024", + "build_param": { + "nlist": 1024 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist1024", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist1024" }, { "name": "faiss_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":2048}, - "file": "glove-100-inner/faiss_ivf_flat/nlist2048", + "build_param": { + "nlist": 2048 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist2048", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist2048" }, { "name": "faiss_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":4096}, - "file": "glove-100-inner/faiss_ivf_flat/nlist4096", + "build_param": { + "nlist": 4096 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist4096", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist4096" }, { "name": "faiss_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":8192}, - "file": "glove-100-inner/faiss_ivf_flat/nlist8192", + "build_param": { + "nlist": 8192 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist8192", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist8192" }, { "name": "faiss_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":16384}, - "file": "glove-100-inner/faiss_ivf_flat/nlist16384", + "build_param": { + "nlist": 16384 + }, + "file": "index/glove-100-inner/faiss_ivf_flat/nlist16384", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_flat/nlist16384" }, - - - { - "name": "faiss_ivf_pq.M2-nlist1024", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":1024, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist1024", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist2048", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":2048, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist2048", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist4096", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":4096, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist4096", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist8192", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":8192, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist8192", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M2-nlist16384", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":16384, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist16384", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist1024", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":1024, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist1024", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist2048", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":2048, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist2048", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist4096", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":4096, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist4096", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist8192", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":8192, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist8192", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M4-nlist16384", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":16384, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist16384", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist1024", + "name": "faiss_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":1024, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist1024", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist2048", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":2048, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist2048", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist4096", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":4096, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist4096", - "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist8192", - "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":8192, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist8192", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": true + }, + "file": "index/glove-100-inner/faiss_ivf_pq/M64-nlist1024", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] - }, - { - "name": "faiss_ivf_pq.M20-nlist16384", + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":16384, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist16384", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": false + }, + "file": "index/glove-100-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_pq/M64-nlist1024" }, - - { "name": "faiss_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":1024, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist1024-fp16", + "build_param": { + "nlist": 1024, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist1024-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist1024-fp16" }, { "name": "faiss_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":2048, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist2048-fp16", + "build_param": { + "nlist": 2048, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist2048-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16" }, { "name": "faiss_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":4096, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist4096-fp16", + "build_param": { + "nlist": 4096, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist4096-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16" }, { "name": "faiss_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":8192, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist8192-fp16", + "build_param": { + "nlist": 8192, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist8192-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16" }, { "name": "faiss_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":16384, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist16384-fp16", + "build_param": { + "nlist": 16384, + "quantizer_type": "fp16" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist16384-fp16", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16" }, { "name": "faiss_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":1024, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist1024-int8", + "build_param": { + "nlist": 1024, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist1024-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8" }, { "name": "faiss_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":2048, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist2048-int8", + "build_param": { + "nlist": 2048, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist2048-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8" }, { "name": "faiss_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":4096, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist4096-int8", + "build_param": { + "nlist": 4096, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist4096-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8" }, { "name": "faiss_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":8192, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist8192-int8", + "build_param": { + "nlist": 8192, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist8192-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8" }, { "name": "faiss_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", - "build_param": {"nlist":16384, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist16384-int8", + "build_param": { + "nlist": 16384, + "quantizer_type": "int8" + }, + "file": "index/glove-100-inner/faiss_ivf_sq/nlist16384-int8", "search_params": [ - {"nprobe":1}, - {"nprobe":5}, - {"nprobe":10}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000}, - {"nprobe":2000} - ] + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8" }, { "name": "faiss_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "glove-100-inner/faiss_flat/flat", - "search_params": [{}] + "file": "index/glove-100-inner/faiss_flat/flat", + "search_params": [ + {} + ], + "search_result_file": "result/glove-100-inner/faiss_flat/flat" + }, + + { + "name": "raft_ivf_pq.dimpq128-cluster1024", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 5, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-float" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-half" }, { - "name": "ggnn.kbuild96-segment64-refine2-k10", - "algo": "ggnn", + "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", + "algo": "raft_ivf_pq", "build_param": { - "k_build": 96, - "segment_size": 64, - "refine_iterations": 2, - "dataset_size": 1183514, - "k": 10 + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 }, - "file": "glove-100-inner/ggnn/kbuild96-segment64-refine2-k10", + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8", "search_params": [ - {"tau":0.001, "block_dim":64, "sorted_size":32}, - {"tau":0.005, "block_dim":64, "sorted_size":32}, - {"tau":0.01, "block_dim":64, "sorted_size":32}, - {"tau":0.02, "block_dim":64, "sorted_size":32}, - {"tau":0.03, "block_dim":64, "sorted_size":32}, - {"tau":0.04, "block_dim":64, "sorted_size":32}, - {"tau":0.05, "block_dim":64, "sorted_size":32}, - {"tau":0.06, "block_dim":64, "sorted_size":32}, - {"tau":0.09, "block_dim":64, "sorted_size":32}, - {"tau":0.12, "block_dim":64, "sorted_size":32}, - {"tau":0.18, "block_dim":64, "sorted_size":32}, - {"tau":0.21, "block_dim":64, "sorted_size":32}, - {"tau":0.24, "block_dim":64, "sorted_size":32}, - {"tau":0.27, "block_dim":64, "sorted_size":32}, - {"tau":0.3, "block_dim":64, "sorted_size":32}, - {"tau":0.4, "block_dim":64, "sorted_size":32}, - {"tau":0.01, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.02, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.03, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.04, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.05, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.06, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.09, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.12, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.18, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.21, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.24, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.27, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.3, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.4, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}, - {"tau":0.5, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32} - ] + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq64-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 32, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 16, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq128-cluster1024-half-float" + }, + { + "name": "raft_ivf_pq.dimpq512-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 512, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_pq/dimpq512-cluster1024-float-float" + }, + { + "name": "raft_ivf_flat.nlist1024", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 1024, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-100-inner/raft_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_flat/nlist1024" + }, + { + "name": "raft_ivf_flat.nlist16384", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 16384, + "ratio": 2, + "niter": 20 + }, + "file": "index/glove-100-inner/raft_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-100-inner/raft_ivf_flat/nlist16384" + }, + + { + "name" : "raft_cagra.dim32", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 32 + }, + "file" : "index/glove-100-inner/raft_cagra/dim32", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-100-inner/raft_cagra/dim32" + }, + + { + "name" : "raft_cagra.dim64", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 64 + }, + "file" : "index/glove-100-inner/raft_cagra/dim64", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-100-inner/raft_cagra/dim64" } ] } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json new file mode 100644 index 0000000000..0fe7ac24df --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json @@ -0,0 +1,1351 @@ +{ + "dataset": { + "name": "glove-50-inner", + "base_file": "glove-50-inner/base.fbin", + "query_file": "glove-50-inner/query.fbin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 5000, + "k": 10, + "run_count": 3 + }, + "index": [ + { + "name" : "hnswlib.M12", + "algo" : "hnswlib", + "build_param": {"M":12, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M12", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M12" + }, + { + "name" : "hnswlib.M16", + "algo" : "hnswlib", + "build_param": {"M":16, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M16", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M16" + }, + { + "name" : "hnswlib.M24", + "algo" : "hnswlib", + "build_param": {"M":24, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M24", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M24" + }, + { + "name" : "hnswlib.M36", + "algo" : "hnswlib", + "build_param": {"M":36, "efConstruction":500, "numThreads":32}, + "file" : "index/glove-50-inner/hnswlib/M36", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/glove-50-inner/hnswlib/M36" + }, + + + + + { + "name": "raft_bfknn", + "algo": "raft_bfknn", + "build_param": {}, + "file": "index/glove-50-inner/raft_bfknn/bfknn", + "search_params": [ + { + "probe": 1 + } + ], + "search_result_file": "result/glove-50-inner/raft_bfknn/bfknn" + }, + { + "name": "faiss_ivf_flat.nlist1024", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 1024 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist1024" + }, + { + "name": "faiss_ivf_flat.nlist2048", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 2048 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist2048", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist2048" + }, + { + "name": "faiss_ivf_flat.nlist4096", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 4096 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist4096", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist4096" + }, + { + "name": "faiss_ivf_flat.nlist8192", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 8192 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist8192", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist8192" + }, + { + "name": "faiss_ivf_flat.nlist16384", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 16384 + }, + "file": "index/glove-50-inner/faiss_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_flat/nlist16384" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": true + }, + "file": "index/glove-50-inner/faiss_ivf_pq/M64-nlist1024", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": false + }, + "file": "index/glove-50-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_sq.nlist1024-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist1024-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist1024-fp16" + }, + { + "name": "faiss_ivf_sq.nlist2048-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist2048-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist2048-fp16" + }, + { + "name": "faiss_ivf_sq.nlist4096-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist4096-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist4096-fp16" + }, + { + "name": "faiss_ivf_sq.nlist8192-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist8192-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist8192-fp16" + }, + { + "name": "faiss_ivf_sq.nlist16384-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "fp16" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist16384-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist16384-fp16" + }, + { + "name": "faiss_ivf_sq.nlist1024-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist1024-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist1024-int8" + }, + { + "name": "faiss_ivf_sq.nlist2048-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist2048-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist2048-int8" + }, + { + "name": "faiss_ivf_sq.nlist4096-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist4096-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist4096-int8" + }, + { + "name": "faiss_ivf_sq.nlist8192-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist8192-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist8192-int8" + }, + { + "name": "faiss_ivf_sq.nlist16384-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "int8" + }, + "file": "index/glove-50-inner/faiss_ivf_sq/nlist16384-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/faiss_ivf_sq/nlist16384-int8" + }, + { + "name": "faiss_flat", + "algo": "faiss_gpu_flat", + "build_param": {}, + "file": "index/glove-50-inner/faiss_flat/flat", + "search_params": [ + {} + ], + "search_result_file": "result/glove-50-inner/faiss_flat/flat" + }, + + { + "name": "raft_ivf_pq.dimpq128-cluster1024", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 5, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-float" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq64-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 32, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 16, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq128-cluster1024-half-float" + }, + { + "name": "raft_ivf_pq.dimpq512-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 512, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_pq/dimpq512-cluster1024-float-float" + }, + { + "name": "raft_ivf_flat.nlist1024", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 1024, + "ratio": 1, + "niter": 25 + }, + "file": "index/glove-50-inner/raft_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_flat/nlist1024" + }, + { + "name": "raft_ivf_flat.nlist16384", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 16384, + "ratio": 2, + "niter": 20 + }, + "file": "index/glove-50-inner/raft_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/glove-50-inner/raft_ivf_flat/nlist16384" + }, + + { + "name" : "raft_cagra.dim32", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 32 + }, + "file" : "index/glove-50-inner/raft_cagra/dim32", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-50-inner/raft_cagra/dim32" + }, + + { + "name" : "raft_cagra.dim64", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 64 + }, + "file" : "index/glove-50-inner/raft_cagra/dim64", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/glove-50-inner/raft_cagra/dim64" + } + ] +} diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json index 2a493edeed..343deb8927 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json @@ -3,6 +3,7 @@ "name": "mnist-784-euclidean", "base_file": "mnist-784-euclidean/base.fbin", "query_file": "mnist-784-euclidean/query.fbin", + "groundtruth_neighbors_file": "mnist-784-euclidean/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json index 630b700ba5..e94a9969d9 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json @@ -3,6 +3,7 @@ "name": "nytimes-256-angular", "base_file": "nytimes-256-angular/base.fbin", "query_file": "nytimes-256-angular/query.fbin", + "groundtruth_neighbors_file": "nytimes-256-angular/groundtruth.neighbors.ibin", "distance": "euclidean" }, "search_basic_param": { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json new file mode 100644 index 0000000000..f849abad35 --- /dev/null +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-inner.json @@ -0,0 +1,1352 @@ +{ + "dataset": { + "name": "nytimes-256-inner", + "base_file": "nytimes-256-inner/base.fbin", + "query_file": "nytimes-256-inner/query.fbin", + "groundtruth_neighbors_file": "nytimes-256-inner/groundtruth.neighbors.ibin", + "distance": "euclidean" + }, + "search_basic_param": { + "batch_size": 5000, + "k": 10, + "run_count": 3 + }, + "index": [ + { + "name" : "hnswlib.M12", + "algo" : "hnswlib", + "build_param": {"M":12, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M12", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M12" + }, + { + "name" : "hnswlib.M16", + "algo" : "hnswlib", + "build_param": {"M":16, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M16", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M16" + }, + { + "name" : "hnswlib.M24", + "algo" : "hnswlib", + "build_param": {"M":24, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M24", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M24" + }, + { + "name" : "hnswlib.M36", + "algo" : "hnswlib", + "build_param": {"M":36, "efConstruction":500, "numThreads":32}, + "file" : "index/nytimes-256-inner/hnswlib/M36", + "search_params" : [ + {"ef":10, "numThreads":1}, + {"ef":20, "numThreads":1}, + {"ef":40, "numThreads":1}, + {"ef":60, "numThreads":1}, + {"ef":80, "numThreads":1}, + {"ef":120, "numThreads":1}, + {"ef":200, "numThreads":1}, + {"ef":400, "numThreads":1}, + {"ef":600, "numThreads":1}, + {"ef":800, "numThreads":1} + ], + "search_result_file" : "result/nytimes-256-inner/hnswlib/M36" + }, + + + + + { + "name": "raft_bfknn", + "algo": "raft_bfknn", + "build_param": {}, + "file": "index/nytimes-256-inner/raft_bfknn/bfknn", + "search_params": [ + { + "probe": 1 + } + ], + "search_result_file": "result/nytimes-256-inner/raft_bfknn/bfknn" + }, + { + "name": "faiss_ivf_flat.nlist1024", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 1024 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist1024" + }, + { + "name": "faiss_ivf_flat.nlist2048", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 2048 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist2048", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist2048" + }, + { + "name": "faiss_ivf_flat.nlist4096", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 4096 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist4096", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist4096" + }, + { + "name": "faiss_ivf_flat.nlist8192", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 8192 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist8192", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist8192" + }, + { + "name": "faiss_ivf_flat.nlist16384", + "algo": "faiss_gpu_ivf_flat", + "build_param": { + "nlist": 16384 + }, + "file": "index/nytimes-256-inner/faiss_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_flat/nlist16384" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": true + }, + "file": "index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "algo": "faiss_gpu_ivf_pq", + "build_param": { + "nlist": 1024, + "M": 64, + "useFloat16": true, + "usePrecomputed": false + }, + "file": "index/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", + "search_params": [ + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_pq/M64-nlist1024" + }, + { + "name": "faiss_ivf_sq.nlist1024-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist1024-fp16" + }, + { + "name": "faiss_ivf_sq.nlist2048-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist2048-fp16" + }, + { + "name": "faiss_ivf_sq.nlist4096-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist4096-fp16" + }, + { + "name": "faiss_ivf_sq.nlist8192-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist8192-fp16" + }, + { + "name": "faiss_ivf_sq.nlist16384-fp16", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "fp16" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist16384-fp16" + }, + { + "name": "faiss_ivf_sq.nlist1024-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 1024, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist1024-int8" + }, + { + "name": "faiss_ivf_sq.nlist2048-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 2048, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist2048-int8" + }, + { + "name": "faiss_ivf_sq.nlist4096-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 4096, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist4096-int8" + }, + { + "name": "faiss_ivf_sq.nlist8192-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 8192, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist8192-int8" + }, + { + "name": "faiss_ivf_sq.nlist16384-int8", + "algo": "faiss_gpu_ivf_sq", + "build_param": { + "nlist": 16384, + "quantizer_type": "int8" + }, + "file": "index/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/faiss_ivf_sq/nlist16384-int8" + }, + { + "name": "faiss_flat", + "algo": "faiss_gpu_flat", + "build_param": {}, + "file": "index/nytimes-256-inner/faiss_flat/flat", + "search_params": [ + {} + ], + "search_result_file": "result/nytimes-256-inner/faiss_flat/flat" + }, + + { + "name": "raft_ivf_pq.dimpq128-cluster1024", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 5, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-float" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 64, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "half" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq64-cluster1024-float-half" + }, + { + "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 32, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 16, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "fp8" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + }, + { + "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 128, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "half", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq128-cluster1024-half-float" + }, + { + "name": "raft_ivf_pq.dimpq512-cluster1024-float-float", + "algo": "raft_ivf_pq", + "build_param": { + "nlist": 1024, + "pq_dim": 512, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float", + "search_params": [ + { + "k": 10, + "numProbes": 10, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 50, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 100, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 200, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 500, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + }, + { + "k": 10, + "numProbes": 1024, + "internalDistanceDtype": "float", + "smemLutDtype": "float" + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_pq/dimpq512-cluster1024-float-float" + }, + { + "name": "raft_ivf_flat.nlist1024", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 1024, + "ratio": 1, + "niter": 25 + }, + "file": "index/nytimes-256-inner/raft_ivf_flat/nlist1024", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_flat/nlist1024" + }, + { + "name": "raft_ivf_flat.nlist16384", + "algo": "raft_ivf_flat", + "build_param": { + "nlist": 16384, + "ratio": 2, + "niter": 20 + }, + "file": "index/nytimes-256-inner/raft_ivf_flat/nlist16384", + "search_params": [ + { + "nprobe": 1 + }, + { + "nprobe": 5 + }, + { + "nprobe": 10 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + }, + { + "nprobe": 2000 + } + ], + "search_result_file": "result/nytimes-256-inner/raft_ivf_flat/nlist16384" + }, + + { + "name" : "raft_cagra.dim32", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 32 + }, + "file" : "index/nytimes-256-inner/raft_cagra/dim32", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/nytimes-256-inner/raft_cagra/dim32" + }, + + { + "name" : "raft_cagra.dim64", + "algo" : "raft_cagra", + "build_param": { + "graph_degree" : 64 + }, + "file" : "index/nytimes-256-inner/raft_cagra/dim64", + "search_params" : [ + {"itopk": 32}, + {"itopk": 64}, + {"itopk": 128} + ], + "search_result_file" : "result/nytimes-256-inner/raft_cagra/dim64" + } + ] +} From dfde3b499a403af433e1e27b9174461ec086adc9 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 25 Sep 2023 10:02:07 -0700 Subject: [PATCH 10/12] Fixes for OOM during CAGRA benchmarks (#1832) Running the CAGRA benchmarks and there could be OOM errors on GPU memory with large datasets. This is caused by holding multiple copies of the dataset in GPU memory. Fix by: * Free existing memory for the dataset/graph before allocating new memory during update_dataset/update_grph * On deserialize, if the serialized index doesn't contain the dataset - don't allocate GPU memory for it * Don't call update_dataset repeatedly in the benchmarking code with the same dataset Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1832 --- cpp/include/raft/neighbors/cagra_types.hpp | 19 +++++++++++++++---- .../detail/cagra/cagra_serialize.cuh | 19 +++++++++++++------ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 02e3f5338e..4728178194 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -165,9 +165,10 @@ struct index : ann::index { ~index() = default; /** Construct an empty index. */ - index(raft::resources const& res) + index(raft::resources const& res, + raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded) : ann::index(), - metric_(raft::distance::DistanceType::L2Expanded), + metric_(metric), dataset_(make_device_matrix(res, 0, 0)), graph_(make_device_matrix(res, 0, 0)) { @@ -296,7 +297,11 @@ struct index : ann::index { raft::host_matrix_view knn_graph) { RAFT_LOG_DEBUG("Copying CAGRA knn graph from host to device"); - graph_ = make_device_matrix(res, knn_graph.extent(0), knn_graph.extent(1)); + if ((graph_.extent(0) != knn_graph.extent(0)) || (graph_.extent(1) != knn_graph.extent(1))) { + // clear existing memory before allocating to prevent OOM errors on large graphs + if (graph_.size()) { graph_ = make_device_matrix(res, 0, 0); } + graph_ = make_device_matrix(res, knn_graph.extent(0), knn_graph.extent(1)); + } raft::copy(graph_.data_handle(), knn_graph.data_handle(), knn_graph.size(), @@ -311,7 +316,13 @@ struct index : ann::index { mdspan, row_major, data_accessor> dataset) { size_t padded_dim = round_up_safe(dataset.extent(1) * sizeof(T), 16) / sizeof(T); - dataset_ = make_device_matrix(res, dataset.extent(0), padded_dim); + + if ((dataset_.extent(0) != dataset.extent(0)) || + (static_cast(dataset_.extent(1)) != padded_dim)) { + // clear existing memory before allocating to prevent OOM errors on large datasets + if (dataset_.size()) { dataset_ = make_device_matrix(res, 0, 0); } + dataset_ = make_device_matrix(res, dataset.extent(0), padded_dim); + } if (dataset_.extent(1) == dataset.extent(1)) { raft::copy(dataset_.data_handle(), dataset.data_handle(), diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 234911e15c..8261f637e1 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -130,15 +130,22 @@ auto deserialize(raft::resources const& res, std::istream& is) -> index auto graph_degree = deserialize_scalar(res, is); auto metric = deserialize_scalar(res, is); - auto dataset = raft::make_host_matrix(n_rows, dim); - auto graph = raft::make_host_matrix(n_rows, graph_degree); + auto graph = raft::make_host_matrix(n_rows, graph_degree); deserialize_mdspan(res, is, graph.view()); bool has_dataset = deserialize_scalar(res, is); - if (has_dataset) { deserialize_mdspan(res, is, dataset.view()); } - - return index( - res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view())); + if (has_dataset) { + auto dataset = raft::make_host_matrix(n_rows, dim); + deserialize_mdspan(res, is, dataset.view()); + return index( + res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view())); + } else { + // create a new index with no dataset - the user must supply via update_dataset themselves + // later (this avoids allocating GPU memory in the meantime) + index idx(res, metric); + idx.update_graph(res, raft::make_const_mdspan(graph.view())); + return idx; + } } template From 8522a143ba9fd25d1054b93edc308d219043751b Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 25 Sep 2023 14:25:23 -0500 Subject: [PATCH 11/12] Fix conf file for benchmarking glove datasets (#1846) Authors: - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1846 --- .../run/conf/glove-100-angular.json | 126 +++++++++--------- .../run/conf/glove-100-inner.json | 126 +++++++++--------- .../run/conf/glove-50-angular.json | 126 +++++++++--------- .../run/conf/glove-50-inner.json | 126 +++++++++--------- 4 files changed, 252 insertions(+), 252 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json index 526aef2db0..3595084d19 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json index 54c8bf908c..8b9f1cfb35 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json index 9b3f192c9f..0f02620cb2 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json index 0fe7ac24df..41dec5adb3 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-inner.json @@ -735,37 +735,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half" } @@ -785,55 +785,55 @@ "search_params": [ { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1, + "nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 5, + "nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } @@ -853,37 +853,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -903,37 +903,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -953,37 +953,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1003,37 +1003,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half" } @@ -1053,37 +1053,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1103,37 +1103,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8" } @@ -1153,37 +1153,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float" } @@ -1203,37 +1203,37 @@ "search_params": [ { "k": 10, - "numProbes": 10, + "nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 50, + "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 100, + "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 200, + "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 500, + "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" }, { "k": 10, - "numProbes": 1024, + "nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float" } From cb24d998771a72ea6bad12a65cfb4aaf6ab0122e Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Tue, 26 Sep 2023 04:09:43 +0800 Subject: [PATCH 12/12] [FEA] Add pre-filtering to CAGRA (#1811) This PR adds the pre-filtering feature to the CAGRA search implementations. Rel: taken over from https://github.com/rapidsai/raft/pull/1765 ## Algorithm The pre-filtering algorithm removes a node that should not be in the final result after it has behaved as a parent node. This way, the nodes that should not be in the final result are also used in the graph traversal, avoiding potential performance degradation. ## Changes - Add filtering operation on a parent node after internal top-M buffer candidate calculation. - Add filtering operation to result buffer before storing them in the device memory. Authors: - tsuki (https://github.com/enp1s0) - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Tamas Bela Feher (https://github.com/tfeher) - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/1811 --- cpp/include/raft/neighbors/cagra.cuh | 75 +++++- .../neighbors/detail/cagra/cagra_search.cuh | 60 ++++- .../detail/cagra/compute_distance.hpp | 13 +- .../raft/neighbors/detail/cagra/factory.cuh | 42 ++-- .../detail/cagra/search_multi_cta.cuh | 80 +++---- .../cagra/search_multi_cta_kernel-ext.cuh | 94 ++++---- .../cagra/search_multi_cta_kernel-inl.cuh | 89 ++++++-- .../detail/cagra/search_multi_kernel.cuh | 215 +++++++++++++----- .../neighbors/detail/cagra/search_plan.cuh | 9 +- .../detail/cagra/search_single_cta.cuh | 70 +++--- .../cagra/search_single_cta_kernel-ext.cuh | 96 ++++---- .../cagra/search_single_cta_kernel-inl.cuh | 214 ++++++++++++++--- cpp/include/raft/neighbors/ivf_flat-inl.cuh | 12 +- cpp/include/raft/neighbors/ivf_pq-inl.cuh | 12 +- .../raft/neighbors/sample_filter_types.hpp | 12 + .../cagra/search_multi_cta_00_generate.py | 58 ++--- ...arch_multi_cta_float_uint32_dim1024_t32.cu | 55 +++-- ...search_multi_cta_float_uint32_dim128_t8.cu | 55 +++-- ...earch_multi_cta_float_uint32_dim256_t16.cu | 55 +++-- ...earch_multi_cta_float_uint32_dim512_t32.cu | 55 +++-- ...arch_multi_cta_float_uint64_dim1024_t32.cu | 55 +++-- ...search_multi_cta_float_uint64_dim128_t8.cu | 55 +++-- ...earch_multi_cta_float_uint64_dim256_t16.cu | 55 +++-- ...earch_multi_cta_float_uint64_dim512_t32.cu | 55 +++-- ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 55 +++-- .../search_multi_cta_int8_uint32_dim128_t8.cu | 55 +++-- ...search_multi_cta_int8_uint32_dim256_t16.cu | 55 +++-- ...search_multi_cta_int8_uint32_dim512_t32.cu | 55 +++-- ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 55 +++-- ...search_multi_cta_uint8_uint32_dim128_t8.cu | 55 +++-- ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 55 +++-- ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 55 +++-- .../cagra/search_single_cta_00_generate.py | 59 ++--- ...rch_single_cta_float_uint32_dim1024_t32.cu | 58 ++--- ...earch_single_cta_float_uint32_dim128_t8.cu | 58 ++--- ...arch_single_cta_float_uint32_dim256_t16.cu | 58 ++--- ...arch_single_cta_float_uint32_dim512_t32.cu | 58 ++--- ...rch_single_cta_float_uint64_dim1024_t32.cu | 58 ++--- ...earch_single_cta_float_uint64_dim128_t8.cu | 58 ++--- ...arch_single_cta_float_uint64_dim256_t16.cu | 58 ++--- ...arch_single_cta_float_uint64_dim512_t32.cu | 58 ++--- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 58 ++--- ...search_single_cta_int8_uint32_dim128_t8.cu | 58 ++--- ...earch_single_cta_int8_uint32_dim256_t16.cu | 58 ++--- ...earch_single_cta_int8_uint32_dim512_t32.cu | 58 ++--- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 58 ++--- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 58 ++--- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 58 ++--- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 58 ++--- cpp/test/neighbors/ann_cagra.cuh | 177 +++++++++++++- .../ann_cagra/search_kernel_uint64_t.cuh | 200 ++++++++-------- .../neighbors/ann_cagra/test_float_int64_t.cu | 4 +- .../ann_cagra/test_float_uint32_t.cu | 8 +- .../ann_cagra/test_int8_t_uint32_t.cu | 7 +- .../ann_cagra/test_uint8_t_uint32_t.cu | 8 +- 55 files changed, 2142 insertions(+), 1280 deletions(-) diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index 903d0571dc..1bd7010c83 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -54,14 +54,14 @@ namespace raft::neighbors::cagra { * // use default index parameters * cagra::index_params build_params; * cagra::search_params search_params - * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); + * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); * // create knn graph * cagra::build_knn_graph(res, dataset, knn_graph.view(), 2, build_params, search_params); - * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); + * auto optimized_gaph = raft::make_host_matrix(dataset.extent(0), 64); * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph * auto index = cagra::index(res, build_params.metric(), dataset, - * optimized_graph.view()); + * optimized_graph.view()); * @endcode * * @tparam DataT data element type @@ -106,7 +106,7 @@ void build_knn_graph(raft::resources const& res, * @code{.cpp} * using namespace raft::neighbors; * cagra::index_params build_params; - * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); + * auto knn_graph = raft::make_host_matrix(dataset.extent(0), 128); * // build KNN graph not using `cagra::build_knn_graph` * // build(knn_graph, dataset, ...); * // sort graph index @@ -115,7 +115,7 @@ void build_knn_graph(raft::resources const& res, * cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); * // Construct an index from dataset and optimized knn_graph * auto index = cagra::index(res, build_params.metric(), dataset, - * optimized_graph.view()); + * optimized_graph.view()); * @endcode * * @tparam DataT type of the data in the source dataset @@ -316,9 +316,70 @@ void search(raft::resources const& res, auto distances_internal = raft::make_device_matrix_view( distances.data_handle(), distances.extent(0), distances.extent(1)); - cagra::detail::search_main( - res, params, idx, queries_internal, neighbors_internal, distances_internal); + cagra::detail::search_main(res, + params, + idx, + queries_internal, + neighbors_internal, + distances_internal, + raft::neighbors::filtering::none_cagra_sample_filter()); } + +/** + * @brief Search ANN using the constructed index with the given sample filter. + * + * See the [cagra::build](#cagra::build) documentation for a usage example. + * + * @tparam T data element type + * @tparam IdxT type of the indices + * @tparam CagraSampleFilterT Device filter function, with the signature + * `(uint32_t query ix, uint32_t sample_ix) -> bool` + * + * @param[in] res raft resources + * @param[in] params configure the search + * @param[in] idx cagra index + * @param[in] queries a device matrix view to a row-major matrix [n_queries, index->dim()] + * @param[out] neighbors a device matrix view to the indices of the neighbors in the source dataset + * [n_queries, k] + * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, + * k] + * @param[in] sample_filter a device filter function that greenlights samples for a given query + */ +template +void search_with_filtering(raft::resources const& res, + const search_params& params, + const index& idx, + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) +{ + RAFT_EXPECTS( + queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), + "Number of rows in output neighbors and distances matrices must equal the number of queries."); + + RAFT_EXPECTS(neighbors.extent(1) == distances.extent(1), + "Number of columns in output neighbors and distances matrices must equal k"); + RAFT_EXPECTS(queries.extent(1) == idx.dim(), + "Number of query dimensions should equal number of dimensions in the index."); + + using internal_IdxT = typename std::make_unsigned::type; + auto queries_internal = raft::make_device_matrix_view( + queries.data_handle(), queries.extent(0), queries.extent(1)); + auto neighbors_internal = raft::make_device_matrix_view( + reinterpret_cast(neighbors.data_handle()), + neighbors.extent(0), + neighbors.extent(1)); + auto distances_internal = raft::make_device_matrix_view( + distances.data_handle(), distances.extent(0), distances.extent(1)); + + cagra::detail::search_main( + res, params, idx, queries_internal, neighbors_internal, distances_internal, sample_filter); +} + /** @} */ // end group cagra } // namespace raft::neighbors::cagra diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index b484fa55f9..81e714dc4e 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -34,6 +35,48 @@ namespace raft::neighbors::cagra::detail { +template +struct CagraSampleFilterWithQueryIdOffset { + const uint32_t offset; + CagraSampleFilterT filter; + + CagraSampleFilterWithQueryIdOffset(const uint32_t offset, const CagraSampleFilterT filter) + : offset(offset), filter(filter) + { + } + + _RAFT_DEVICE auto operator()(const uint32_t query_id, const uint32_t sample_id) + { + return filter(query_id + offset, sample_id); + } +}; + +template +struct CagraSampleFilterT_Selector { + using type = CagraSampleFilterWithQueryIdOffset; +}; +template <> +struct CagraSampleFilterT_Selector { + using type = raft::neighbors::filtering::none_cagra_sample_filter; +}; + +// A helper function to set a query id offset +template +inline typename CagraSampleFilterT_Selector::type set_offset( + CagraSampleFilterT filter, const uint32_t offset) +{ + typename CagraSampleFilterT_Selector::type new_filter(offset, filter); + return new_filter; +} +template <> +inline + typename CagraSampleFilterT_Selector::type + set_offset( + raft::neighbors::filtering::none_cagra_sample_filter filter, const uint32_t) +{ + return filter; +} + /** * @brief Search ANN using the constructed index. * @@ -54,13 +97,18 @@ namespace raft::neighbors::cagra::detail { * k] */ -template +template void search_main(raft::resources const& res, search_params params, const index& index, raft::device_matrix_view queries, raft::device_matrix_view neighbors, - raft::device_matrix_view distances) + raft::device_matrix_view distances, + CagraSampleFilterT sample_filter = CagraSampleFilterT()) { resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::search"); RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", @@ -77,8 +125,9 @@ void search_main(raft::resources const& res, common::nvtx::range fun_scope( "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); - std::unique_ptr> plan = - factory::create( + using CagraSampleFilterT_s = typename CagraSampleFilterT_Selector::type; + std::unique_ptr> plan = + factory::create( res, params, index.dim(), index.graph_degree(), topk); plan->check(neighbors.extent(1)); @@ -119,7 +168,8 @@ void search_main(raft::resources const& res, n_queries, _seed_ptr, _num_executed_iterations, - topk); + topk, + set_offset(sample_filter, qid)); } static_assert(std::is_same_v, diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 47e976e252..624c1a35d6 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -155,17 +155,20 @@ _RAFT_DEVICE void compute_distance_to_child_nodes(INDEX_T* const result_child_in INDEX_T* const visited_hashmap_ptr, const std::uint32_t hash_bitlen, const INDEX_T* const parent_indices, + const INDEX_T* const internal_topk_list, const std::uint32_t search_width) { - const INDEX_T invalid_index = utils::get_max_value(); + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); // Read child indices of parents from knn graph and check if the distance // computaiton is necessary. for (uint32_t i = threadIdx.x; i < knn_k * search_width; i += BLOCK_SIZE) { - const INDEX_T parent_id = parent_indices[i / knn_k]; - INDEX_T child_id = invalid_index; - if (parent_id != invalid_index) { - child_id = knn_graph[(i % knn_k) + ((uint64_t)knn_k * parent_id)]; + const INDEX_T smem_parent_id = parent_indices[i / knn_k]; + INDEX_T child_id = invalid_index; + if (smem_parent_id != invalid_index) { + const auto parent_id = internal_topk_list[smem_parent_id] & ~index_msb_1_mask; + child_id = knn_graph[(i % knn_k) + ((uint64_t)knn_k * parent_id)]; } if (child_id != invalid_index) { if (hashmap::insert(visited_hashmap_ptr, hash_bitlen, child_id) == 0) { diff --git a/cpp/include/raft/neighbors/detail/cagra/factory.cuh b/cpp/include/raft/neighbors/detail/cagra/factory.cuh index 625040194b..78111a9310 100644 --- a/cpp/include/raft/neighbors/detail/cagra/factory.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/factory.cuh @@ -20,20 +20,25 @@ #include "search_multi_kernel.cuh" #include "search_plan.cuh" #include "search_single_cta.cuh" +#include namespace raft::neighbors::cagra::detail { -template +template class factory { public: /** * Create a search structure for dataset with dim features. */ - static std::unique_ptr> create(raft::resources const& res, - search_params const& params, - int64_t dim, - int64_t graph_degree, - uint32_t topk) + static std::unique_ptr> create( + raft::resources const& res, + search_params const& params, + int64_t dim, + int64_t graph_degree, + uint32_t topk) { search_plan_impl_base plan(params, dim, graph_degree, topk); switch (plan.max_dim) { @@ -63,26 +68,29 @@ class factory { break; default: RAFT_LOG_DEBUG("Incorrect max_dim (%lu)\n", plan.max_dim); } - return std::unique_ptr>(); + return std::unique_ptr>(); } private: template - static std::unique_ptr> dispatch_kernel( + static std::unique_ptr> dispatch_kernel( raft::resources const& res, search_plan_impl_base& plan) { if (plan.algo == search_algo::SINGLE_CTA) { - return std::unique_ptr>( - new single_cta_search::search( - res, plan, plan.dim, plan.graph_degree, plan.topk)); + return std::unique_ptr>( + new single_cta_search:: + search( + res, plan, plan.dim, plan.graph_degree, plan.topk)); } else if (plan.algo == search_algo::MULTI_CTA) { - return std::unique_ptr>( - new multi_cta_search::search( - res, plan, plan.dim, plan.graph_degree, plan.topk)); + return std::unique_ptr>( + new multi_cta_search:: + search( + res, plan, plan.dim, plan.graph_degree, plan.topk)); } else { - return std::unique_ptr>( - new multi_kernel_search::search( - res, plan, plan.dim, plan.graph_degree, plan.topk)); + return std::unique_ptr>( + new multi_kernel_search:: + search( + res, plan, plan.dim, plan.graph_degree, plan.topk)); } } }; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 6ea1e34032..9a722a6dfe 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -48,42 +48,43 @@ template - -struct search : public search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::max_dim; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + typename DISTANCE_T, + typename SAMPLE_FILTER_T> + +struct search : public search_plan_impl { + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::max_dim; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_cta_per_query; rmm::device_uvector intermediate_indices; @@ -96,7 +97,8 @@ struct search : public search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl(res, params, dim, graph_degree, topk), + : search_plan_impl( + res, params, dim, graph_degree, topk), intermediate_indices(0, resource::get_cuda_stream(res)), intermediate_distances(0, resource::get_cuda_stream(res)), topk_workspace(0, resource::get_cuda_stream(res)) @@ -196,7 +198,8 @@ struct search : public search_plan_impl { const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk) + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); @@ -223,6 +226,7 @@ struct search : public search_plan_impl { search_width, min_iterations, max_iterations, + sample_filter, stream); RAFT_CUDA_TRY(cudaPeekAtLastError()); diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index de83acbb64..ee525587d7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -15,7 +15,8 @@ */ #pragma once -#include // RAFT_EXPLICIT +#include // none_cagra_sample_filter +#include // RAFT_EXPLICIT namespace raft::neighbors::cagra::detail { namespace multi_cta_search { @@ -26,7 +27,8 @@ template + class DISTANCE_T, + class SAMPLE_FILTER_T> void select_and_run(raft::device_matrix_view dataset, raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, @@ -49,47 +51,63 @@ void select_and_run(raft::device_matrix_view( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, float, uint32_t, float); -instantiate_kernel_selection(8, 128, float, uint32_t, float); -instantiate_kernel_selection(16, 256, float, uint32_t, float); -instantiate_kernel_selection(32, 512, float, uint32_t, float); -instantiate_kernel_selection(32, 1024, int8_t, uint32_t, float); -instantiate_kernel_selection(8, 128, int8_t, uint32_t, float); -instantiate_kernel_selection(16, 256, int8_t, uint32_t, float); -instantiate_kernel_selection(32, 512, int8_t, uint32_t, float); -instantiate_kernel_selection(32, 1024, uint8_t, uint32_t, float); -instantiate_kernel_selection(8, 128, uint8_t, uint32_t, float); -instantiate_kernel_selection(16, 256, uint8_t, uint32_t, float); -instantiate_kernel_selection(32, 512, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection } // namespace multi_cta_search diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 4fc051ac09..8bfbc48898 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -75,7 +76,7 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [sea if (new_parent) { const auto i = __popc(ballot_mask & ((1 << lane_id) - 1)) + num_new_parents; if (i < search_width) { - next_parent_indices[i] = index; + next_parent_indices[i] = j; itopk_indices[j] |= index_msb_1_mask; // set most significant bit as used node } } @@ -131,7 +132,8 @@ template + class LOAD_T, + class SAMPLE_FILTER_T> __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( INDEX_T* const result_indices_ptr, // [num_queries, num_cta_per_query, itopk_size] DISTANCE_T* const result_distances_ptr, // [num_queries, num_cta_per_query, itopk_size] @@ -152,8 +154,8 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( const uint32_t search_width, const uint32_t min_iteration, const uint32_t max_iteration, - uint32_t* const num_executed_iterations /* stats */ -) + uint32_t* const num_executed_iterations, /* stats */ + SAMPLE_FILTER_T sample_filter) { assert(blockDim.x == BLOCK_SIZE); assert(dataset_dim <= MAX_DATASET_DIM); @@ -287,13 +289,57 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( local_visited_hashmap_ptr, hash_bitlen, parent_indices_buffer, + result_indices_buffer, search_width); _CLK_REC(clk_compute_distance); __syncthreads(); + // Filtering + if constexpr (!std::is_same::value) { + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) { + if (parent_indices_buffer[p] != invalid_index) { + const auto parent_id = + result_indices_buffer[parent_indices_buffer[p]] & ~index_msb_1_mask; + if (!sample_filter(query_id, parent_id)) { + // If the parent must not be in the resulting top-k list, remove from the parent list + result_distances_buffer[parent_indices_buffer[p]] = utils::get_max_value(); + result_indices_buffer[parent_indices_buffer[p]] = invalid_index; + } + } + } + __syncthreads(); + } + iter++; } + // Post process for filtering + if constexpr (!std::is_same::value) { + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned i = threadIdx.x; i < itopk_size + search_width * graph_degree; i += blockDim.x) { + const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; + if (node_id != (invalid_index & ~index_msb_1_mask) && !sample_filter(query_id, node_id)) { + // If the parent must not be in the resulting top-k list, remove from the parent list + result_distances_buffer[i] = utils::get_max_value(); + result_indices_buffer[i] = invalid_index; + } + } + + __syncthreads(); + topk_by_bitonic_sort(result_distances_buffer, + result_indices_buffer, + itopk_size + (search_width * graph_degree), + itopk_size); + __syncthreads(); + } + for (uint32_t i = threadIdx.x; i < itopk_size; i += BLOCK_SIZE) { uint32_t j = i + (itopk_size * (cta_id + (num_cta_per_query * query_id))); if (result_distances_ptr != nullptr) { result_distances_ptr[j] = result_distances_buffer[i]; } @@ -361,7 +407,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> struct search_kernel_config { // Search kernel function type. Note that the actual values for the template value // parameters do not matter, because they are not part of the function signature. The @@ -374,7 +421,8 @@ struct search_kernel_config { DATA_T, DISTANCE_T, INDEX_T, - device::LOAD_128BIT_T>); + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>); static auto choose_buffer_size(unsigned result_buffer_size, unsigned block_size) -> kernel_t { @@ -401,7 +449,8 @@ struct search_kernel_config { DATA_T, DISTANCE_T, INDEX_T, - device::LOAD_128BIT_T>; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else if (block_size == 128) { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else if (block_size == 256) { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else if (block_size == 512) { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } else { return search_kernel; + device::LOAD_128BIT_T, + SAMPLE_FILTER_T>; } } }; @@ -450,7 +503,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> void select_and_run( // raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -475,10 +529,12 @@ void select_and_run( // raft::resources const& res, size_t search_width, size_t min_iterations, size_t max_iterations, + SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { - auto kernel = search_kernel_config:: - choose_buffer_size(result_buffer_size, block_size); + auto kernel = + search_kernel_config:: + choose_buffer_size(result_buffer_size, block_size); RAFT_CUDA_TRY( cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); @@ -489,7 +545,7 @@ void select_and_run( // raft::resources const& res, dim3 block_dims(block_size, 1, 1); dim3 grid_dims(num_cta_per_query, num_queries, 1); - RAFT_LOG_DEBUG("Launching kernel with %u threads, (%u, %u) blocks %lu smem", + RAFT_LOG_DEBUG("Launching kernel with %u threads, (%u, %u) blocks %u smem", block_size, num_cta_per_query, num_queries, @@ -513,7 +569,8 @@ void select_and_run( // raft::resources const& res, search_width, min_iterations, max_iterations, - num_executed_iterations); + num_executed_iterations, + sample_filter); } } // namespace multi_cta_search diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index f312226f42..ff1bb969e7 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -242,7 +243,7 @@ __global__ void pickup_next_parents_kernel( if (new_parent) { const auto i = __popc(ballot_mask & ((1 << threadIdx.x) - 1)) + num_new_parents; if (i < parent_list_size) { - parent_list_ptr[i + (ldd * query_id)] = index; + parent_list_ptr[i + (ldd * query_id)] = j; parent_candidates_ptr[j + (lds * query_id)] |= index_msb_1_mask; // set most significant bit as used node } @@ -306,9 +307,13 @@ template + class DISTANCE_T, + class SAMPLE_FILTER_T> __global__ void compute_distance_to_child_nodes_kernel( const INDEX_T* const parent_node_list, // [num_queries, search_width] + INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] + DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const std::size_t lds, const std::uint32_t search_width, const DATA_T* const dataset_ptr, // [dataset_size, data_dim] const std::uint32_t data_dim, @@ -321,16 +326,25 @@ __global__ void compute_distance_to_child_nodes_kernel( const std::uint32_t hash_bitlen, INDEX_T* const result_indices_ptr, // [num_queries, ldd] DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] - const std::uint32_t ldd // (*) ldd >= search_width * graph_degree -) + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + SAMPLE_FILTER_T sample_filter) { const uint32_t ldb = hashmap::get_size(hash_bitlen); const auto tid = threadIdx.x + blockDim.x * blockIdx.x; const auto global_team_id = tid / TEAM_SIZE; + const auto query_id = blockIdx.y; + if (global_team_id >= search_width * graph_degree) { return; } - const std::size_t parent_index = + const std::size_t parent_list_index = parent_node_list[global_team_id / graph_degree + (search_width * blockIdx.y)]; + + if (parent_list_index == utils::get_max_value()) { return; } + + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const auto parent_index = + parent_candidates_ptr[parent_list_index + (lds * query_id)] & ~index_msb_1_mask; + if (parent_index == utils::get_max_value()) { result_distances_ptr[ldd * blockIdx.y + global_team_id] = utils::get_max_value(); return; @@ -361,15 +375,28 @@ __global__ void compute_distance_to_child_nodes_kernel( result_distances_ptr[ldd * blockIdx.y + global_team_id] = utils::get_max_value(); } } + + if constexpr (!std::is_same::value) { + if (!sample_filter(query_id, parent_index)) { + parent_candidates_ptr[parent_list_index + (lds * query_id)] = utils::get_max_value(); + parent_distance_ptr[parent_list_index + (lds * query_id)] = + utils::get_max_value(); + } + } } template + class DISTANCE_T, + class SAMPLE_FILTER_T> void compute_distance_to_child_nodes( const INDEX_T* const parent_node_list, // [num_queries, search_width] + INDEX_T* const parent_candidates_ptr, // [num_queries, search_width] + DISTANCE_T* const parent_distance_ptr, // [num_queries, search_width] + const std::size_t lds, const uint32_t search_width, const DATA_T* const dataset_ptr, // [dataset_size, data_dim] const std::uint32_t data_dim, @@ -384,6 +411,7 @@ void compute_distance_to_child_nodes( INDEX_T* const result_indices_ptr, // [num_queries, ldd] DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree + SAMPLE_FILTER_T sample_filter, cudaStream_t cuda_stream = 0) { const auto block_size = 128; @@ -392,6 +420,9 @@ void compute_distance_to_child_nodes( num_queries); compute_distance_to_child_nodes_kernel <<>>(parent_node_list, + parent_candidates_ptr, + parent_distance_ptr, + lds, search_width, dataset_ptr, data_dim, @@ -404,7 +435,8 @@ void compute_distance_to_child_nodes( hash_bitlen, result_indices_ptr, result_distances_ptr, - ldd); + ldd, + sample_filter); } template @@ -436,6 +468,50 @@ void remove_parent_bit(const std::uint32_t num_queries, num_queries, num_topk, topk_indices_ptr, ld); } +// This function called after the `remove_parent_bit` function +template +__global__ void apply_filter_kernel(INDEX_T* const result_indices_ptr, + DISTANCE_T* const result_distances_ptr, + const std::size_t lds, + const std::uint32_t result_buffer_size, + const std::uint32_t num_queries, + const INDEX_T query_id_offset, + SAMPLE_FILTER_T sample_filter) +{ + const auto tid = threadIdx.x + blockIdx.x * blockDim.x; + if (tid >= result_buffer_size * num_queries) { return; } + const auto i = tid % result_buffer_size; + const auto j = tid / result_buffer_size; + const auto index = i + j * lds; + + if (!sample_filter(query_id_offset + j, result_indices_ptr[index])) { + result_indices_ptr[index] = utils::get_max_value(); + result_distances_ptr[index] = utils::get_max_value(); + } +} + +template +void apply_filter(INDEX_T* const result_indices_ptr, + DISTANCE_T* const result_distances_ptr, + const std::size_t lds, + const std::uint32_t result_buffer_size, + const std::uint32_t num_queries, + const INDEX_T query_id_offset, + SAMPLE_FILTER_T sample_filter, + cudaStream_t cuda_stream) +{ + const std::uint32_t block_size = 256; + const std::uint32_t grid_size = ceildiv(num_queries * result_buffer_size, block_size); + + apply_filter_kernel<<>>(result_indices_ptr, + result_distances_ptr, + lds, + result_buffer_size, + num_queries, + query_id_offset, + sample_filter); +} + template __global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, @@ -508,41 +584,42 @@ template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; - - using search_plan_impl::max_dim; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; - - using search_plan_impl::hash_bitlen; - - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; - - using search_plan_impl::smem_size; - - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + typename DISTANCE_T, + typename SAMPLE_FILTER_T> +struct search : search_plan_impl { + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; + + using search_plan_impl::max_dim; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; + + using search_plan_impl::hash_bitlen; + + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; + + using search_plan_impl::smem_size; + + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; size_t result_buffer_allocation_size; rmm::device_uvector result_indices; // results_indices_buffer @@ -557,7 +634,8 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl(res, params, dim, graph_degree, topk), + : search_plan_impl( + res, params, dim, graph_degree, topk), result_indices(0, resource::get_cuda_stream(res)), result_distances(0, resource::get_cuda_stream(res)), parent_node_list(0, resource::get_cuda_stream(res)), @@ -602,7 +680,8 @@ struct search : search_plan_impl { const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] - uint32_t topk) + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { // Init hashmap cudaStream_t stream = resource::get_cuda_stream(res); @@ -684,6 +763,9 @@ struct search : search_plan_impl { // Compute distance to child nodes that are adjacent to the parent node compute_distance_to_child_nodes( parent_node_list.data(), + result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size, + result_buffer_allocation_size, search_width, dataset.data_handle(), dataset.extent(1), @@ -698,22 +780,53 @@ struct search : search_plan_impl { result_indices.data() + itopk_size, result_distances.data() + itopk_size, result_buffer_allocation_size, + sample_filter, stream); iter++; } // while ( 1 ) + auto result_indices_ptr = result_indices.data() + (iter & 0x1) * result_buffer_size; + auto result_distances_ptr = result_distances.data() + (iter & 0x1) * result_buffer_size; // Remove parent bit in search results - remove_parent_bit(num_queries, - itopk_size, - result_indices.data() + (iter & 0x1) * result_buffer_size, - result_buffer_allocation_size, - stream); + remove_parent_bit( + num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); + + if (!std::is_same::value) { + apply_filter( + result_indices.data() + (iter & 0x1) * itopk_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_buffer_size, + num_queries, + 0, + sample_filter, + stream); + + result_indices_ptr = result_indices.data() + (1 - (iter & 0x1)) * result_buffer_size; + result_distances_ptr = result_distances.data() + (1 - (iter & 0x1)) * result_buffer_size; + _cuann_find_topk(itopk_size, + num_queries, + result_buffer_size, + result_distances.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + result_distances_ptr, + result_buffer_allocation_size, + result_indices_ptr, + result_buffer_allocation_size, + topk_workspace.data(), + true, + topk_hint.data(), + stream); + } // Copy results from working buffer to final buffer batched_memcpy(topk_indices_ptr, topk, - result_indices.data() + (iter & 0x1) * result_buffer_size, + result_indices_ptr, result_buffer_allocation_size, topk, num_queries, @@ -721,7 +834,7 @@ struct search : search_plan_impl { if (topk_distances_ptr) { batched_memcpy(topk_distances_ptr, topk, - result_distances.data() + (iter & 0x1) * result_buffer_size, + result_distances_ptr, result_buffer_allocation_size, topk, num_queries, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index 33c77db61e..9419385836 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -65,7 +65,7 @@ struct search_plan_impl_base : public search_params { } }; -template +template struct search_plan_impl : public search_plan_impl_base { int64_t hash_bitlen; @@ -113,7 +113,8 @@ struct search_plan_impl : public search_plan_impl_base { const std::uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] - uint32_t topk){}; + uint32_t topk, + SAMPLE_FILTER_T sample_filter){}; void adjust_search_params() { @@ -129,13 +130,13 @@ struct search_plan_impl : public search_plan_impl_base { if (max_iterations < min_iterations) { _max_iterations = min_iterations; } if (max_iterations < _max_iterations) { RAFT_LOG_DEBUG( - "# max_iterations is increased from %u to %u.", max_iterations, _max_iterations); + "# max_iterations is increased from %lu to %u.", max_iterations, _max_iterations); max_iterations = _max_iterations; } if (itopk_size % 32) { uint32_t itopk32 = itopk_size; itopk32 += 32 - (itopk_size % 32); - RAFT_LOG_DEBUG("# internal_topk is increased from %u to %u, as it must be multiple of 32.", + RAFT_LOG_DEBUG("# internal_topk is increased from %lu to %u, as it must be multiple of 32.", itopk_size, itopk32); itopk_size = itopk32; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index 45dd535e1d..27d54f72cb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -49,41 +49,42 @@ template -struct search : search_plan_impl { - using search_plan_impl::max_queries; - using search_plan_impl::itopk_size; - using search_plan_impl::algo; - using search_plan_impl::team_size; - using search_plan_impl::search_width; - using search_plan_impl::min_iterations; - using search_plan_impl::max_iterations; - using search_plan_impl::thread_block_size; - using search_plan_impl::hashmap_mode; - using search_plan_impl::hashmap_min_bitlen; - using search_plan_impl::hashmap_max_fill_rate; - using search_plan_impl::num_random_samplings; - using search_plan_impl::rand_xor_mask; + typename DISTANCE_T, + typename SAMPLE_FILTER_T> +struct search : search_plan_impl { + using search_plan_impl::max_queries; + using search_plan_impl::itopk_size; + using search_plan_impl::algo; + using search_plan_impl::team_size; + using search_plan_impl::search_width; + using search_plan_impl::min_iterations; + using search_plan_impl::max_iterations; + using search_plan_impl::thread_block_size; + using search_plan_impl::hashmap_mode; + using search_plan_impl::hashmap_min_bitlen; + using search_plan_impl::hashmap_max_fill_rate; + using search_plan_impl::num_random_samplings; + using search_plan_impl::rand_xor_mask; - using search_plan_impl::max_dim; - using search_plan_impl::dim; - using search_plan_impl::graph_degree; - using search_plan_impl::topk; + using search_plan_impl::max_dim; + using search_plan_impl::dim; + using search_plan_impl::graph_degree; + using search_plan_impl::topk; - using search_plan_impl::hash_bitlen; + using search_plan_impl::hash_bitlen; - using search_plan_impl::small_hash_bitlen; - using search_plan_impl::small_hash_reset_interval; - using search_plan_impl::hashmap_size; - using search_plan_impl::dataset_size; - using search_plan_impl::result_buffer_size; + using search_plan_impl::small_hash_bitlen; + using search_plan_impl::small_hash_reset_interval; + using search_plan_impl::hashmap_size; + using search_plan_impl::dataset_size; + using search_plan_impl::result_buffer_size; - using search_plan_impl::smem_size; + using search_plan_impl::smem_size; - using search_plan_impl::hashmap; - using search_plan_impl::num_executed_iterations; - using search_plan_impl::dev_seed; - using search_plan_impl::num_seeds; + using search_plan_impl::hashmap; + using search_plan_impl::num_executed_iterations; + using search_plan_impl::dev_seed; + using search_plan_impl::num_seeds; uint32_t num_itopk_candidates; @@ -92,7 +93,8 @@ struct search : search_plan_impl { int64_t dim, int64_t graph_degree, uint32_t topk) - : search_plan_impl(res, params, dim, graph_degree, topk) + : search_plan_impl( + res, params, dim, graph_degree, topk) { set_params(res); } @@ -111,7 +113,7 @@ struct search : search_plan_impl { RAFT_EXPECTS(itopk_size <= max_itopk, "itopk_size cannot be larger than %u", max_itopk); RAFT_LOG_DEBUG("# num_itopk_candidates: %u", num_itopk_candidates); - RAFT_LOG_DEBUG("# num_itopk: %u", itopk_size); + RAFT_LOG_DEBUG("# num_itopk: %lu", itopk_size); // // Determine the thread block size // @@ -234,7 +236,8 @@ struct search : search_plan_impl { const std::uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] - uint32_t topk) + uint32_t topk, + SAMPLE_FILTER_T sample_filter) { cudaStream_t stream = resource::get_cuda_stream(res); select_and_run( @@ -261,6 +264,7 @@ struct search : search_plan_impl { search_width, min_iterations, max_iterations, + sample_filter, stream); } }; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index 5f5df1a818..35d239563a 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -15,7 +15,9 @@ */ #pragma once +#include #include // RAFT_EXPLICIT + namespace raft::neighbors::cagra::detail { namespace single_cta_search { @@ -25,7 +27,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> void select_and_run( // raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -50,50 +53,65 @@ void select_and_run( // raft::resources const& res, size_t search_width, size_t min_iterations, size_t max_iterations, + SAMPLE_FILTER_T sample_filter, cudaStream_t stream) RAFT_EXPLICIT; #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, float, uint32_t, float); -instantiate_single_cta_select_and_run(8, 128, float, uint32_t, float); -instantiate_single_cta_select_and_run(16, 256, float, uint32_t, float); -instantiate_single_cta_select_and_run(32, 512, float, uint32_t, float); -instantiate_single_cta_select_and_run(32, 1024, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(8, 128, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(16, 256, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(32, 512, int8_t, uint32_t, float); -instantiate_single_cta_select_and_run(32, 1024, uint8_t, uint32_t, float); -instantiate_single_cta_select_and_run(8, 128, uint8_t, uint32_t, float); -instantiate_single_cta_select_and_run(16, 256, uint8_t, uint32_t, float); -instantiate_single_cta_select_and_run(32, 512, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_select_and_run diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 81325fd5da..128dc8d116 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ __device__ void pickup_next_parents(std::uint32_t* const terminate_flag, if (new_parent) { const auto i = __popc(ballot_mask & ((1 << threadIdx.x) - 1)) + num_new_parents; if (i < search_width) { - next_parent_indices[i] = index; + next_parent_indices[i] = jj; // set most significant bit as used node internal_topk_indices[jj] |= index_msb_1_mask; } @@ -458,7 +459,8 @@ template + class INDEX_T, + class SAMPLE_FILTER_T> __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] @@ -482,7 +484,8 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ std::uint32_t* const num_executed_iterations, // [num_queries] const std::uint32_t hash_bitlen, const std::uint32_t small_hash_bitlen, - const std::uint32_t small_hash_reset_interval) + const std::uint32_t small_hash_reset_interval, + SAMPLE_FILTER_T sample_filter) { using LOAD_T = device::LOAD_128BIT_T; const auto query_id = blockIdx.y; @@ -527,6 +530,9 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ auto terminate_flag = reinterpret_cast(topk_ws + 3); auto smem_working_ptr = reinterpret_cast(terminate_flag + 1); + // A flag for filtering. + auto filter_flag = terminate_flag; + const DATA_T* const query_ptr = queries_ptr + query_id * dataset_dim; for (unsigned i = threadIdx.x; i < MAX_DATASET_DIM; i += BLOCK_SIZE) { unsigned j = device::swizzling(i); @@ -576,7 +582,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ std::uint32_t iter = 0; while (1) { // sort - if (TOPK_BY_BITONIC_SORT) { + if constexpr (TOPK_BY_BITONIC_SORT) { // [Notice] // It is good to use multiple warps in topk_by_bitonic_sort() when // batch size is small (short-latency), but it might not be always good @@ -614,17 +620,28 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ // topk with bitonic sort _CLK_START(); - topk_by_bitonic_sort( - result_distances_buffer, - result_indices_buffer, - internal_topk, - result_distances_buffer + internal_topk, - result_indices_buffer + internal_topk, - search_width * graph_degree, - topk_ws, - (iter == 0)); + if (std::is_same::value || + *filter_flag == 0) { + topk_by_bitonic_sort( + result_distances_buffer, + result_indices_buffer, + internal_topk, + result_distances_buffer + internal_topk, + result_indices_buffer + internal_topk, + search_width * graph_degree, + topk_ws, + (iter == 0)); + __syncthreads(); + } else { + topk_by_bitonic_sort_1st( + result_distances_buffer, + result_indices_buffer, + internal_topk + search_width * graph_degree, + internal_topk); + if (threadIdx.x == 0) { *terminate_flag = 0; } + } _CLK_REC(clk_topk); - } else { _CLK_START(); // topk with radix block sort @@ -693,12 +710,61 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ local_visited_hashmap_ptr, hash_bitlen, parent_list_buffer, + result_indices_buffer, search_width); __syncthreads(); _CLK_REC(clk_compute_distance); + // Filtering + if constexpr (!std::is_same::value) { + if (threadIdx.x == 0) { *filter_flag = 0; } + __syncthreads(); + + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned p = threadIdx.x; p < search_width; p += blockDim.x) { + if (parent_list_buffer[p] != invalid_index) { + const auto parent_id = result_indices_buffer[parent_list_buffer[p]] & ~index_msb_1_mask; + if (!sample_filter(query_id, parent_id)) { + // If the parent must not be in the resulting top-k list, remove from the parent list + result_distances_buffer[parent_list_buffer[p]] = utils::get_max_value(); + result_indices_buffer[parent_list_buffer[p]] = invalid_index; + *filter_flag = 1; + } + } + } + __syncthreads(); + } + iter++; } + + // Post process for filtering + if constexpr (!std::is_same::value) { + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const INDEX_T invalid_index = utils::get_max_value(); + + for (unsigned i = threadIdx.x; i < internal_topk + search_width * graph_degree; + i += blockDim.x) { + const auto node_id = result_indices_buffer[i] & ~index_msb_1_mask; + if (node_id != (invalid_index & ~index_msb_1_mask) && !sample_filter(query_id, node_id)) { + result_distances_buffer[i] = utils::get_max_value(); + result_indices_buffer[i] = invalid_index; + } + } + + __syncthreads(); + topk_by_bitonic_sort_1st( + result_distances_buffer, + result_indices_buffer, + internal_topk + search_width * graph_degree, + top_k); + __syncthreads(); + } + for (std::uint32_t i = threadIdx.x; i < top_k; i += BLOCK_SIZE) { unsigned j = i + (top_k * query_id); unsigned ii = i; @@ -737,9 +803,15 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ #endif } -template +template struct search_kernel_config { - using kernel_t = decltype(&search_kernel); + using kernel_t = + decltype(&search_kernel); template static auto choose_block_size(unsigned block_size) -> kernel_t @@ -747,24 +819,104 @@ struct search_kernel_config { constexpr unsigned BS = USE_BITONIC_SORT; if constexpr (BS) { if (block_size == 64) { - return search_kernel; + return search_kernel; } else if (block_size == 128) { - return search_kernel; + return search_kernel; } else if (block_size == 256) { - return search_kernel; + return search_kernel; } else if (block_size == 512) { - return search_kernel; + return search_kernel; } else { - return search_kernel; + return search_kernel; } } else { if (block_size == 256) { - return search_kernel; + return search_kernel; } else if (block_size == 512) { - return search_kernel; + return search_kernel; } else { - return search_kernel; + return search_kernel; } } } @@ -826,7 +978,8 @@ template + typename DISTANCE_T, + typename SAMPLE_FILTER_T> void select_and_run( // raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, @@ -851,16 +1004,18 @@ void select_and_run( // raft::resources const& res, size_t search_width, size_t min_iterations, size_t max_iterations, + SAMPLE_FILTER_T sample_filter, cudaStream_t stream) { - auto kernel = search_kernel_config:: - choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); + auto kernel = + search_kernel_config:: + choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); RAFT_CUDA_TRY( cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size)); dim3 thread_dims(block_size, 1, 1); dim3 block_dims(1, num_queries, 1); RAFT_LOG_DEBUG( - "Launching kernel with %u threads, %u block %lu smem", block_size, num_queries, smem_size); + "Launching kernel with %u threads, %u block %u smem", block_size, num_queries, smem_size); kernel<<>>(topk_indices_ptr, topk_distances_ptr, topk, @@ -883,7 +1038,8 @@ void select_and_run( // raft::resources const& res, num_executed_iterations, hash_bitlen, small_hash_bitlen, - small_hash_reset_interval); + small_hash_reset_interval, + sample_filter); RAFT_CUDA_TRY(cudaPeekAtLastError()); } } // namespace single_cta_search diff --git a/cpp/include/raft/neighbors/ivf_flat-inl.cuh b/cpp/include/raft/neighbors/ivf_flat-inl.cuh index a18ee065bf..6641346a67 100644 --- a/cpp/include/raft/neighbors/ivf_flat-inl.cuh +++ b/cpp/include/raft/neighbors/ivf_flat-inl.cuh @@ -342,7 +342,7 @@ void extend(raft::resources const& handle, /** @} */ /** - * @brief Search ANN using the constructed index. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * @@ -374,6 +374,8 @@ void extend(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -386,7 +388,7 @@ void extend(raft::resources const& handle, * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] * @param[in] mr an optional memory resource to use across the searches (you can provide a large * enough memory pool here to avoid memory allocations within search). - * @param[in] sample_filter a filter the greenlights samples for a given query + * @param[in] sample_filter a device filter function that greenlights samples for a given query */ template void search_with_filtering(raft::resources const& handle, @@ -475,7 +477,7 @@ void search(raft::resources const& handle, */ /** - * @brief Search ANN using the constructed index using the given filter. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_flat::build](#ivf_flat::build) documentation for a usage example. * @@ -501,6 +503,8 @@ void search(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -509,7 +513,7 @@ void search(raft::resources const& handle, * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset * [n_queries, k] * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] - * @param[in] sample_filter a filter the greenlights samples for a given query + * @param[in] sample_filter a device filter function that greenlights samples for a given query */ template void search_with_filtering(raft::resources const& handle, diff --git a/cpp/include/raft/neighbors/ivf_pq-inl.cuh b/cpp/include/raft/neighbors/ivf_pq-inl.cuh index ccf8717486..9f203d92fb 100644 --- a/cpp/include/raft/neighbors/ivf_pq-inl.cuh +++ b/cpp/include/raft/neighbors/ivf_pq-inl.cuh @@ -134,7 +134,7 @@ void extend(raft::resources const& handle, } /** - * @brief Search ANN using the constructed index using the given filter. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example. * @@ -148,6 +148,8 @@ void extend(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -157,7 +159,7 @@ void extend(raft::resources const& handle, * [n_queries, k] * @param[out] distances a device matrix view to the distances to the selected neighbors [n_queries, * k] - * @param[in] sample_filter a filter the greenlights samples for a given query. + * @param[in] sample_filter a device filter function that greenlights samples for a given query. */ template void search_with_filtering(raft::resources const& handle, @@ -343,7 +345,7 @@ void extend(raft::resources const& handle, } /** - * @brief Search ANN using the constructed index using the given filter. + * @brief Search ANN using the constructed index with the given filter. * * See the [ivf_pq::build](#ivf_pq::build) documentation for a usage example. * @@ -372,6 +374,8 @@ void extend(raft::resources const& handle, * * @tparam T data element type * @tparam IdxT type of the indices + * @tparam IvfSampleFilterT Device filter function, with the signature + * `(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool` * * @param[in] handle * @param[in] params configure the search @@ -382,7 +386,7 @@ void extend(raft::resources const& handle, * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset * [n_queries, k] * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] - * @param[in] sample_filter a filter the greenlights samples for a given query + * @param[in] sample_filter a device filter function that greenlights samples for a given query */ template void search_with_filtering(raft::resources const& handle, diff --git a/cpp/include/raft/neighbors/sample_filter_types.hpp b/cpp/include/raft/neighbors/sample_filter_types.hpp index 5a301e9d2f..10c5e99372 100644 --- a/cpp/include/raft/neighbors/sample_filter_types.hpp +++ b/cpp/include/raft/neighbors/sample_filter_types.hpp @@ -37,6 +37,18 @@ struct none_ivf_sample_filter { } }; +/* A filter that filters nothing. This is the default behavior. */ +struct none_cagra_sample_filter { + inline _RAFT_HOST_DEVICE bool operator()( + // query index + const uint32_t query_ix, + // the index of the current sample + const uint32_t sample_ix) const + { + return true; + } +}; + /** * If the filtering depends on the index of a sample, then the following * filter template can be used: diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 784d116503..15eb0a9e65 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -39,41 +39,45 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \\ - template void select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t block_size, \\ - uint32_t result_buffer_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - uint32_t num_cta_per_query, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ - cudaStream_t stream); +#define instantiate_kernel_selection( \\ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ + template void \\ + select_and_run( \\ + raft::device_matrix_view dataset, \\ + raft::device_matrix_view graph, \\ + INDEX_T* const topk_indices_ptr, \\ + DISTANCE_T* const topk_distances_ptr, \\ + const DATA_T* const queries_ptr, \\ + const uint32_t num_queries, \\ + const INDEX_T* dev_seed_ptr, \\ + uint32_t* const num_executed_iterations, \\ + uint32_t topk, \\ + uint32_t block_size, \\ + uint32_t result_buffer_size, \\ + uint32_t smem_size, \\ + int64_t hash_bitlen, \\ + INDEX_T* hashmap_ptr, \\ + uint32_t num_cta_per_query, \\ + uint32_t num_random_samplings, \\ + uint64_t rand_xor_mask, \\ + uint32_t num_seeds, \\ + size_t itopk_size, \\ + size_t search_width, \\ + size_t min_iterations, \\ + size_t max_iterations, \\ + SAMPLE_FILTER_T sample_filter, \\ + cudaStream_t stream); """ trailer = """ #undef instantiate_kernel_selection -} // namespace raft::neighbors::cagra::detail::namespace multi_cta_search +} // namespace raft::neighbors::cagra::detail::multi_cta_search """ mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] @@ -97,7 +101,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_kernel_selection({team}, {mxdim}, {data_t}, {idx_t}, {distance_t});\n" + f"instantiate_kernel_selection(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) # For pasting into CMakeLists.txt diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 2a4e7ac607..1a3b2284bd 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_widthhhhhhhhh, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, float, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index 115ce3b48b..36e86d9ed6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, float, uint32_t, float); +instantiate_kernel_selection( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index c5e704a85f..6f1af2d93f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, float, uint32_t, float); +instantiate_kernel_selection( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index 3469facf39..1279f8e415 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, float, uint32_t, float); +instantiate_kernel_selection( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index 327bfc73b4..0dabff0df5 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, float, uint64_t, float); +instantiate_kernel_selection( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index 1abe0cd8af..72bb74cdb8 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, float, uint64_t, float); +instantiate_kernel_selection( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index dd61810d06..dceea10b5d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, float, uint64_t, float); +instantiate_kernel_selection( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index 8e12bab514..acb8bd6a12 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, float, uint64_t, float); +instantiate_kernel_selection( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index d946ac9c79..0254f09ff0 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, int8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index e4d7b44d1e..2b67e7e968 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, int8_t, uint32_t, float); +instantiate_kernel_selection( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index b8dc3b38a8..17d6722e58 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, int8_t, uint32_t, float); +instantiate_kernel_selection( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 749b35bad6..38f02812e2 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, int8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index 428d460ba8..fa111196c6 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_widthh, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 1024, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 28a20b865e..1ef3c28aa3 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(8, 128, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index e85a84ae8e..d26cb44843 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(16, 256, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 232b62ebcd..4d4322f261 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -25,36 +25,41 @@ */ #include +#include namespace raft::neighbors::cagra::detail::multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_kernel_selection(32, 512, uint8_t, uint32_t, float); +instantiate_kernel_selection( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_kernel_selection diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index cf61a45b4a..249555082e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -39,35 +39,38 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \\ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \\ - template void select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ - INDEX_T* const topk_indices_ptr, \\ - DISTANCE_T* const topk_distances_ptr, \\ - const DATA_T* const queries_ptr, \\ - const uint32_t num_queries, \\ - const INDEX_T* dev_seed_ptr, \\ - uint32_t* const num_executed_iterations, \\ - uint32_t topk, \\ - uint32_t num_itopk_candidates, \\ - uint32_t block_size, \\ - uint32_t smem_size, \\ - int64_t hash_bitlen, \\ - INDEX_T* hashmap_ptr, \\ - size_t small_hash_bitlen, \\ - size_t small_hash_reset_interval, \\ - uint32_t num_random_samplings, \\ - uint64_t rand_xor_mask, \\ - uint32_t num_seeds, \\ - size_t itopk_size, \\ - size_t search_width, \\ - size_t min_iterations, \\ - size_t max_iterations, \\ +#define instantiate_single_cta_select_and_run( \\ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \\ + template void \\ + select_and_run( \\ + raft::device_matrix_view dataset, \\ + raft::device_matrix_view graph, \\ + INDEX_T* const topk_indices_ptr, \\ + DISTANCE_T* const topk_distances_ptr, \\ + const DATA_T* const queries_ptr, \\ + const uint32_t num_queries, \\ + const INDEX_T* dev_seed_ptr, \\ + uint32_t* const num_executed_iterations, \\ + uint32_t topk, \\ + uint32_t num_itopk_candidates, \\ + uint32_t block_size, \\ + uint32_t smem_size, \\ + int64_t hash_bitlen, \\ + INDEX_T* hashmap_ptr, \\ + size_t small_hash_bitlen, \\ + size_t small_hash_reset_interval, \\ + uint32_t num_random_samplings, \\ + uint64_t rand_xor_mask, \\ + uint32_t num_seeds, \\ + size_t itopk_size, \\ + size_t search_width, \\ + size_t min_iterations, \\ + size_t max_iterations, \\ + SAMPLE_FILTER_T sample_filter, \\ cudaStream_t stream); """ @@ -75,7 +78,7 @@ trailer = """ #undef instantiate_single_cta_search_kernel -} // namespace raft::neighbors::cagra::detail::single_cta_search +} // namespace raft::neighbors::cagra::detail::single_cta_search """ mxdim_team = [(128, 8), (256, 16), (512, 32), (1024, 32)] @@ -102,7 +105,7 @@ with open(path, "w") as f: f.write(header) f.write( - f"instantiate_single_cta_select_and_run({team}, {mxdim},{data_t}, {idx_t}, {distance_t});\n" + f"instantiate_single_cta_select_and_run(\n {team}, {mxdim}, {data_t}, {idx_t}, {distance_t}, raft::neighbors::filtering::none_cagra_sample_filter);\n" ) f.write(trailer) diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index eb45d4ff08..b8c23103ba 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index 049715aa20..8ab1897119 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 8, 128, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index 6028c283db..9fd36b4cb9 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 16, 256, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index 2566e9cbd9..a9ee2c864b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, float, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 512, float, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index 4cd96ad9c0..dadc574b65 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index 822a2efb2f..30e043f47e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index 80d1f76b9b..089e4c930f 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 06c3eaf10b..3e8ffb8bf8 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, float, uint64_t, float); +instantiate_single_cta_select_and_run( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index b4c30ac943..279587738e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index c8d0df3ac4..ef127d3f7d 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 8, 128, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 19ecee91af..7fcfdcc28e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 16, 256, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index 52c4eb7d6b..a6c606d99b 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, int8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 512, int8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 4675e17084..0b8be56614 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 1024, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 1024, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index e73e1071ee..4c193b9408 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(8, 128, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 8, 128, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index 01e26b5f29..bdf16d2f03 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(16, 256, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 16, 256, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index b0534b555f..93624df4aa 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -25,38 +25,42 @@ */ #include +#include namespace raft::neighbors::cagra::detail::single_cta_search { -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ cudaStream_t stream); -instantiate_single_cta_select_and_run(32, 512, uint8_t, uint32_t, float); +instantiate_single_cta_select_and_run( + 32, 512, uint8_t, uint32_t, float, raft::neighbors::filtering::none_cagra_sample_filter); #undef instantiate_single_cta_search_kernel diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index eadc88085f..90f271e3ee 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -15,6 +15,8 @@ */ #pragma once +#undef RAFT_EXPLICIT_INSTANTIATE_ONLY // Search with filter instantiation + #include "../test_utils.cuh" #include "ann_utils.cuh" #include @@ -25,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -41,8 +44,22 @@ #include #include -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { namespace { + +/* A filter that excludes all indices below `offset`. */ +struct test_cagra_sample_filter { + static constexpr unsigned offset = 400; + inline _RAFT_HOST_DEVICE auto operator()( + // query index + const uint32_t query_ix, + // the index of the current sample inside the current inverted list + const uint32_t sample_ix) const + { + return sample_ix >= offset; + } +}; + // For sort_knn_graph test template void RandomSuffle(raft::host_matrix_view index) @@ -365,6 +382,162 @@ class AnnCagraSortTest : public ::testing::TestWithParam { rmm::device_uvector database; }; +template +class AnnCagraFilterTest : public ::testing::TestWithParam { + public: + AnnCagraFilterTest() + : stream_(resource::get_cuda_stream(handle_)), + ps(::testing::TestWithParam::GetParam()), + database(0, stream_), + search_queries(0, stream_) + { + } + + protected: + void testCagraFilter() + { + size_t queries_size = ps.n_queries * ps.k; + std::vector indices_Cagra(queries_size); + std::vector indices_naive(queries_size); + std::vector distances_Cagra(queries_size); + std::vector distances_naive(queries_size); + + { + rmm::device_uvector distances_naive_dev(queries_size, stream_); + rmm::device_uvector indices_naive_dev(queries_size, stream_); + auto* database_filtered_ptr = database.data() + test_cagra_sample_filter::offset * ps.dim; + naive_knn(handle_, + distances_naive_dev.data(), + indices_naive_dev.data(), + search_queries.data(), + database_filtered_ptr, + ps.n_queries, + ps.n_rows - test_cagra_sample_filter::offset, + ps.dim, + ps.k, + ps.metric); + raft::linalg::addScalar(indices_naive_dev.data(), + indices_naive_dev.data(), + IdxT(test_cagra_sample_filter::offset), + queries_size, + stream_); + update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); + update_host(indices_naive.data(), indices_naive_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + { + rmm::device_uvector distances_dev(queries_size, stream_); + rmm::device_uvector indices_dev(queries_size, stream_); + + { + cagra::index_params index_params; + index_params.metric = ps.metric; // Note: currently ony the cagra::index_params metric is + // not used for knn_graph building. + cagra::search_params search_params; + search_params.algo = ps.algo; + search_params.max_queries = ps.max_queries; + search_params.team_size = ps.team_size; + search_params.hashmap_mode = cagra::hash_mode::HASH; + + auto database_view = raft::make_device_matrix_view( + (const DataT*)database.data(), ps.n_rows, ps.dim); + + cagra::index index(handle_); + if (ps.host_dataset) { + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + auto database_host_view = raft::make_host_matrix_view( + (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); + index = cagra::build(handle_, index_params, database_host_view); + } else { + index = cagra::build(handle_, index_params, database_view); + } + + if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); } + + auto search_queries_view = raft::make_device_matrix_view( + search_queries.data(), ps.n_queries, ps.dim); + auto indices_out_view = + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, ps.k); + + cagra::search_with_filtering(handle_, + search_params, + index, + search_queries_view, + indices_out_view, + dists_out_view, + test_cagra_sample_filter()); + update_host(distances_Cagra.data(), distances_dev.data(), queries_size, stream_); + update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); + resource::sync_stream(handle_); + } + + // Test filter + bool unacceptable_node = false; + for (int q = 0; q < ps.n_queries; q++) { + for (int i = 0; i < ps.k; i++) { + const auto n = indices_Cagra[q * ps.k + i]; + unacceptable_node = unacceptable_node | !test_cagra_sample_filter()(q, n); + } + } + EXPECT_FALSE(unacceptable_node); + + double min_recall = ps.min_recall; + EXPECT_TRUE(eval_neighbours(indices_naive, + indices_Cagra, + distances_naive, + distances_Cagra, + ps.n_queries, + ps.k, + 0.001, + min_recall)); + EXPECT_TRUE(eval_distances(handle_, + database.data(), + search_queries.data(), + indices_dev.data(), + distances_dev.data(), + ps.n_rows, + ps.dim, + ps.n_queries, + ps.k, + ps.metric, + 1.0e-4)); + } + } + + void SetUp() override + { + database.resize(((size_t)ps.n_rows) * ps.dim, stream_); + search_queries.resize(ps.n_queries * ps.dim, stream_); + raft::random::Rng r(1234ULL); + if constexpr (std::is_same{}) { + r.normal(database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0), stream_); + r.normal(search_queries.data(), ps.n_queries * ps.dim, DataT(0.1), DataT(2.0), stream_); + } else { + r.uniformInt(database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20), stream_); + r.uniformInt(search_queries.data(), ps.n_queries * ps.dim, DataT(1), DataT(20), stream_); + } + resource::sync_stream(handle_); + } + + void TearDown() override + { + resource::sync_stream(handle_); + database.resize(0, stream_); + search_queries.resize(0, stream_); + } + + private: + raft::resources handle_; + rmm::cuda_stream_view stream_; + AnnCagraInputs ps; + rmm::device_uvector database; + rmm::device_uvector search_queries; +}; + inline std::vector generate_inputs() { // TODO(tfeher): test MULTI_CTA kernel with search_width > 1 to allow multiple CTA per queries @@ -467,4 +640,4 @@ inline std::vector generate_inputs() const std::vector inputs = generate_inputs(); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index f61e476652..175e4ef483 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -1,93 +1,107 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include // RAFT_EXPLICIT - -namespace raft::neighbors::cagra::detail { - -namespace multi_cta_search { -#define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t block_size, \ - uint32_t result_buffer_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - cudaStream_t stream); - -instantiate_kernel_selection(32, 1024, float, uint64_t, float); -instantiate_kernel_selection(8, 128, float, uint64_t, float); -instantiate_kernel_selection(16, 256, float, uint64_t, float); -instantiate_kernel_selection(32, 512, float, uint64_t, float); - -#undef instantiate_kernel_selection -} // namespace multi_cta_search - -namespace single_cta_search { - -#define instantiate_single_cta_select_and_run( \ - TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ - extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ - INDEX_T* const topk_indices_ptr, \ - DISTANCE_T* const topk_distances_ptr, \ - const DATA_T* const queries_ptr, \ - const uint32_t num_queries, \ - const INDEX_T* dev_seed_ptr, \ - uint32_t* const num_executed_iterations, \ - uint32_t topk, \ - uint32_t num_itopk_candidates, \ - uint32_t block_size, \ - uint32_t smem_size, \ - int64_t hash_bitlen, \ - INDEX_T* hashmap_ptr, \ - size_t small_hash_bitlen, \ - size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ - uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ - cudaStream_t stream); - -instantiate_single_cta_select_and_run(32, 1024, float, uint64_t, float); -instantiate_single_cta_select_and_run(8, 128, float, uint64_t, float); -instantiate_single_cta_select_and_run(16, 256, float, uint64_t, float); -instantiate_single_cta_select_and_run(32, 512, float, uint64_t, float); - -} // namespace single_cta_search -} // namespace raft::neighbors::cagra::detail +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include // none_cagra_sample_filter +#include // RAFT_EXPLICIT + +namespace raft::neighbors::cagra::detail { + +namespace multi_cta_search { +#define instantiate_kernel_selection( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t block_size, \ + uint32_t result_buffer_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + uint32_t num_cta_per_query, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_kernel_selection( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_kernel_selection( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +#undef instantiate_kernel_selection +} // namespace multi_cta_search + +namespace single_cta_search { + +#define instantiate_single_cta_select_and_run( \ + TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T, SAMPLE_FILTER_T) \ + extern template void \ + select_and_run( \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ + INDEX_T* const topk_indices_ptr, \ + DISTANCE_T* const topk_distances_ptr, \ + const DATA_T* const queries_ptr, \ + const uint32_t num_queries, \ + const INDEX_T* dev_seed_ptr, \ + uint32_t* const num_executed_iterations, \ + uint32_t topk, \ + uint32_t num_itopk_candidates, \ + uint32_t block_size, \ + uint32_t smem_size, \ + int64_t hash_bitlen, \ + INDEX_T* hashmap_ptr, \ + size_t small_hash_bitlen, \ + size_t small_hash_reset_interval, \ + uint32_t num_random_samplings, \ + uint64_t rand_xor_mask, \ + uint32_t num_seeds, \ + size_t itopk_size, \ + size_t search_width, \ + size_t min_iterations, \ + size_t max_iterations, \ + SAMPLE_FILTER_T sample_filter, \ + cudaStream_t stream); + +instantiate_single_cta_select_and_run( + 32, 1024, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 8, 128, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 16, 256, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); +instantiate_single_cta_select_and_run( + 32, 512, float, uint64_t, float, raft::neighbors::filtering::none_cagra_sample_filter); + +} // namespace single_cta_search +} // namespace raft::neighbors::cagra::detail \ No newline at end of file diff --git a/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu b/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu index fa3d76d066..6f9e8dbd43 100644 --- a/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_float_int64_t.cu @@ -19,11 +19,11 @@ #include "../ann_cagra.cuh" #include "search_kernel_uint64_t.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestF_I64; TEST_P(AnnCagraTestF_I64, AnnCagra) { this->testCagra(); } INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_I64, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu index dbaf4dedd9..01d7e1e1ea 100644 --- a/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_float_uint32_t.cu @@ -18,7 +18,7 @@ #include "../ann_cagra.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestF_U32; TEST_P(AnnCagraTestF_U32, AnnCagra) { this->testCagra(); } @@ -26,7 +26,11 @@ TEST_P(AnnCagraTestF_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestF_U32; TEST_P(AnnCagraSortTestF_U32, AnnCagraSort) { this->testCagraSort(); } +typedef AnnCagraFilterTest AnnCagraFilterTestF_U32; +TEST_P(AnnCagraFilterTestF_U32, AnnCagraFilter) { this->testCagraFilter(); } + INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestF_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestF_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestF_U32, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu index ba60131677..ee06d369fa 100644 --- a/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_int8_t_uint32_t.cu @@ -18,14 +18,17 @@ #include "../ann_cagra.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestI8_U32; TEST_P(AnnCagraTestI8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestI8_U32; TEST_P(AnnCagraSortTestI8_U32, AnnCagraSort) { this->testCagraSort(); } +typedef AnnCagraFilterTest AnnCagraFilterTestI8_U32; +TEST_P(AnnCagraFilterTestI8_U32, AnnCagraFilter) { this->testCagraFilter(); } INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestI8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestI8_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestI8_U32, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra diff --git a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu index cc172e4833..3243e73ccd 100644 --- a/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu +++ b/cpp/test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu @@ -18,7 +18,7 @@ #include "../ann_cagra.cuh" -namespace raft::neighbors::experimental::cagra { +namespace raft::neighbors::cagra { typedef AnnCagraTest AnnCagraTestU8_U32; TEST_P(AnnCagraTestU8_U32, AnnCagra) { this->testCagra(); } @@ -26,7 +26,11 @@ TEST_P(AnnCagraTestU8_U32, AnnCagra) { this->testCagra(); } typedef AnnCagraSortTest AnnCagraSortTestU8_U32; TEST_P(AnnCagraSortTestU8_U32, AnnCagraSort) { this->testCagraSort(); } +typedef AnnCagraFilterTest AnnCagraFilterTestU8_U32; +TEST_P(AnnCagraFilterTestU8_U32, AnnCagraSort) { this->testCagraFilter(); } + INSTANTIATE_TEST_CASE_P(AnnCagraTest, AnnCagraTestU8_U32, ::testing::ValuesIn(inputs)); INSTANTIATE_TEST_CASE_P(AnnCagraSortTest, AnnCagraSortTestU8_U32, ::testing::ValuesIn(inputs)); +INSTANTIATE_TEST_CASE_P(AnnCagraFilterTest, AnnCagraFilterTestU8_U32, ::testing::ValuesIn(inputs)); -} // namespace raft::neighbors::experimental::cagra +} // namespace raft::neighbors::cagra