diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a70632497..66862ada5e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: additional_dependencies: [toml] args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.1 + rev: v16.0.6 hooks: - id: clang-format types_or: [c, c++, cuda] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 7e921decd5..65b4232d83 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-python>=11.7.1,<12.0a0 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 2ea685b529..9db38ed1de 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-cudart-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 742040ad50..5a9ef5bd32 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-version=11.8 diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index b817968379..91d0fdb729 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -78,6 +78,8 @@ requirements: - h5py {{ h5py_version }} - benchmark - matplotlib + # rmm is needed to determine if package is gpu-enabled + - rmm ={{ minor_version }} - python - pandas - pyyaml diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 4e91ee0690..4ec977700d 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -211,9 +211,10 @@ void bench_search(::benchmark::State& state, try { algo->set_search_dataset(dataset->base_set(algo_property.dataset_memory_type), dataset->base_set_size()); - } catch (const std::exception&) { + } catch (const std::exception& ex) { state.SkipWithError("The algorithm '" + index.name + - "' requires the base set, but it's not available."); + "' requires the base set, but it's not available. " + + "Exception: " + std::string(ex.what())); return; } } diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu index 99481c2921..3b2e97062f 100644 --- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu +++ b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu @@ -33,8 +33,7 @@ template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::Ggnn::BuildParam& param) { - param.dataset_size = conf.at("dataset_size"); - param.k = conf.at("k"); + param.k = conf.at("k"); if (conf.contains("k_build")) { param.k_build = conf.at("k_build"); } if (conf.contains("segment_size")) { param.segment_size = conf.at("segment_size"); } diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh index 74c7cddc3c..664ec511dd 100644 --- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh +++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh @@ -38,8 +38,6 @@ class Ggnn : public ANN { int num_layers{4}; // L float tau{0.5}; int refine_iterations{2}; - - size_t dataset_size; int k; // GGNN requires to know k during building }; @@ -182,12 +180,6 @@ GgnnImpl::GgnnImpl(Metric metric, } if (dim != D) { throw std::runtime_error("mis-matched dim"); } - - int device; - RAFT_CUDA_TRY(cudaGetDevice(&device)); - - ggnn_ = std::make_unique( - device, build_param_.dataset_size, build_param_.num_layers, true, build_param_.tau); } template @@ -195,11 +187,10 @@ void GgnnImpl::build(const T* dataset, size_t nrow, cudaStream_t stream) { - if (nrow != build_param_.dataset_size) { - throw std::runtime_error( - "build_param_.dataset_size = " + std::to_string(build_param_.dataset_size) + - " , but nrow = " + std::to_string(nrow)); - } + int device; + RAFT_CUDA_TRY(cudaGetDevice(&device)); + ggnn_ = std::make_unique( + device, nrow, build_param_.num_layers, true, build_param_.tau); ggnn_->set_base_data(dataset); ggnn_->set_stream(stream); @@ -212,11 +203,6 @@ void GgnnImpl::build(const T* dataset, template void GgnnImpl::set_search_dataset(const T* dataset, size_t nrow) { - if (nrow != build_param_.dataset_size) { - throw std::runtime_error( - "build_param_.dataset_size = " + std::to_string(build_param_.dataset_size) + - " , but nrow = " + std::to_string(nrow)); - } ggnn_->set_base_data(dataset); } diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 5cd33ef94d..4d7b993aa1 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -31,6 +31,8 @@ #include #include +#include + #include "../common/ann_types.hpp" #include @@ -164,13 +166,13 @@ class HnswLib : public ANN { struct BuildParam { int M; int ef_construction; - int num_threads{1}; + int num_threads = omp_get_num_procs(); }; using typename ANN::AnnSearchParam; struct SearchParam : public AnnSearchParam { int ef; - int num_threads{1}; + int num_threads = omp_get_num_procs(); }; HnswLib(Metric metric, int dim, const BuildParam& param); diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu index 1bff66cac4..d3994e59c5 100644 --- a/cpp/bench/prims/matrix/select_k.cu +++ b/cpp/bench/prims/matrix/select_k.cu @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -38,6 +39,19 @@ namespace raft::matrix { using namespace raft::bench; // NOLINT +template +struct replace_with_mask { + KeyT replacement; + int64_t line_length; + int64_t spared_inputs; + constexpr auto inline operator()(int64_t offset, KeyT x, uint8_t mask) -> KeyT + { + auto i = offset % line_length; + // don't replace all the inputs, spare a few elements at the beginning of the input + return (mask && i >= spared_inputs) ? replacement : x; + } +}; + template struct selection : public fixture { explicit selection(const select::params& p) @@ -67,6 +81,21 @@ struct selection : public fixture { } } raft::random::uniform(handle, state, in_dists_.data(), in_dists_.size(), min_value, max_value); + if (p.frac_infinities > 0.0) { + rmm::device_uvector mask_buf(p.batch_size * p.len, stream); + auto mask = make_device_vector_view(mask_buf.data(), mask_buf.size()); + raft::random::bernoulli(handle, state, mask, p.frac_infinities); + KeyT bound = p.select_min ? raft::upper_bound() : raft::lower_bound(); + auto mask_in = + make_device_vector_view(mask_buf.data(), mask_buf.size()); + auto dists_in = make_device_vector_view(in_dists_.data(), in_dists_.size()); + auto dists_out = make_device_vector_view(in_dists_.data(), in_dists_.size()); + raft::linalg::map_offset(handle, + dists_out, + replace_with_mask{bound, int64_t(p.len), int64_t(p.k / 2)}, + dists_in, + mask_in); + } } void run_benchmark(::benchmark::State& state) override // NOLINT @@ -75,8 +104,12 @@ struct selection : public fixture { std::ostringstream label_stream; label_stream << params_.batch_size << "#" << params_.len << "#" << params_.k; if (params_.use_same_leading_bits) { label_stream << "#same-leading-bits"; } + if (params_.frac_infinities > 0) { label_stream << "#infs-" << params_.frac_infinities; } state.SetLabel(label_stream.str()); - loop_on_state(state, [this]() { + common::nvtx::range case_scope("%s - %s", state.name().c_str(), label_stream.str().c_str()); + int iter = 0; + loop_on_state(state, [&iter, this]() { + common::nvtx::range lap_scope("lap-", iter++); select::select_k_impl(handle, Algo, in_dists_.data(), @@ -149,6 +182,35 @@ const std::vector kInputs{ {10, 1000000, 64, true, false, true}, {10, 1000000, 128, true, false, true}, {10, 1000000, 256, true, false, true}, + + {10, 1000000, 1, true, false, false, true, 0.1}, + {10, 1000000, 16, true, false, false, true, 0.1}, + {10, 1000000, 64, true, false, false, true, 0.1}, + {10, 1000000, 128, true, false, false, true, 0.1}, + {10, 1000000, 256, true, false, false, true, 0.1}, + + {10, 1000000, 1, true, false, false, true, 0.9}, + {10, 1000000, 16, true, false, false, true, 0.9}, + {10, 1000000, 64, true, false, false, true, 0.9}, + {10, 1000000, 128, true, false, false, true, 0.9}, + {10, 1000000, 256, true, false, false, true, 0.9}, + {1000, 10000, 1, true, false, false, true, 0.9}, + {1000, 10000, 16, true, false, false, true, 0.9}, + {1000, 10000, 64, true, false, false, true, 0.9}, + {1000, 10000, 128, true, false, false, true, 0.9}, + {1000, 10000, 256, true, false, false, true, 0.9}, + + {10, 1000000, 1, true, false, false, true, 1.0}, + {10, 1000000, 16, true, false, false, true, 1.0}, + {10, 1000000, 64, true, false, false, true, 1.0}, + {10, 1000000, 128, true, false, false, true, 1.0}, + {10, 1000000, 256, true, false, false, true, 1.0}, + {1000, 10000, 1, true, false, false, true, 1.0}, + {1000, 10000, 16, true, false, false, true, 1.0}, + {1000, 10000, 64, true, false, false, true, 1.0}, + {1000, 10000, 128, true, false, false, true, 1.0}, + {1000, 10000, 256, true, false, false, true, 1.0}, + {1000, 10000, 256, true, false, false, true, 0.999}, }; #define SELECTION_REGISTER(KeyT, IdxT, A) \ @@ -157,28 +219,28 @@ const std::vector kInputs{ RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \ } -SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix8bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bitsExtraPass); // NOLINT SELECTION_REGISTER(double, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT // For learning a heuristic of which selection algorithm to use, we // have a couple of additional constraints when generating the dataset: diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh index 866a0ebdfa..ade3a6e348 100644 --- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh @@ -438,7 +438,7 @@ __global__ void __launch_bounds__((WarpSize * BlockDimY)) adjust_centers_kernel(MathT* centers, // [n_clusters, dim] IdxT n_clusters, IdxT dim, - const T* dataset, // [n_rows, dim] + const T* dataset, // [n_rows, dim] IdxT n_rows, const LabelT* labels, // [n_rows] const CounterT* cluster_sizes, // [n_clusters] diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 328080da1f..8e41aa96f3 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -75,7 +75,7 @@ namespace numpy_serializer { #if RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_LITTLE_ENDIAN_CHAR -#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 +#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_BIG_ENDIAN_CHAR #endif // RAFT_SYSTEM_LITTLE_ENDIAN == 1 diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp index e734c99029..f077a49b77 100644 --- a/cpp/include/raft/core/detail/nvtx.hpp +++ b/cpp/include/raft/core/detail/nvtx.hpp @@ -193,7 +193,7 @@ inline void pop_range() } // namespace raft::common::nvtx::detail -#else // NVTX_ENABLED +#else // NVTX_ENABLED namespace raft::common::nvtx::detail { diff --git a/cpp/include/raft/core/kvp.hpp b/cpp/include/raft/core/kvp.hpp index 2e0d1117a1..192d160d45 100644 --- a/cpp/include/raft/core/kvp.hpp +++ b/cpp/include/raft/core/kvp.hpp @@ -32,8 +32,8 @@ struct KeyValuePair { typedef _Key Key; ///< Key data type typedef _Value Value; ///< Value data type - Key key; ///< Item key - Value value; ///< Item value + Key key; ///< Item key + Value value; ///< Item value /// Constructor RAFT_INLINE_FUNCTION KeyValuePair() {} diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp index 2dc4eb1f9d..8e331293bf 100644 --- a/cpp/include/raft/core/resource/resource_types.hpp +++ b/cpp/include/raft/core/resource/resource_types.hpp @@ -42,7 +42,7 @@ enum resource_type { THRUST_POLICY, // thrust execution policy WORKSPACE_RESOURCE, // rmm device memory resource - LAST_KEY // reserved for the last key + LAST_KEY // reserved for the last key }; /** diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h index 10827a8778..f659ed256d 100644 --- a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h +++ b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h @@ -397,7 +397,7 @@ class EpilogueWithBroadcastCustom : public EpilogueBase diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh index 68922943f4..f0f12acdb1 100644 --- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh +++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh @@ -16,11 +16,11 @@ #pragma once -#include // size_t -#include // std::numeric_limits -#include // raft::KeyValuePair -#include // raft::identity_op -#include // ops::l2_exp_distance_op +#include // size_t +#include // std::numeric_limits +#include // raft::KeyValuePair +#include // raft::identity_op +#include // ops::l2_exp_distance_op #include #include // PairwiseDistances #include // Policy diff --git a/cpp/include/raft/distance/detail/masked_distance_base.cuh b/cpp/include/raft/distance/detail/masked_distance_base.cuh index 5a33c9ce4a..55da634145 100644 --- a/cpp/include/raft/distance/detail/masked_distance_base.cuh +++ b/cpp/include/raft/distance/detail/masked_distance_base.cuh @@ -217,7 +217,7 @@ struct MaskedDistances : public BaseClass { } // tile_idx_n } // idx_g rowEpilog_op(tile_idx_m); - } // tile_idx_m + } // tile_idx_m } private: diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh index 58b5daa8ca..c6b09be31e 100644 --- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh +++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh @@ -18,7 +18,7 @@ #include // ceildiv #include // RAFT_CUDA_TRY -#include // size_t +#include // size_t namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh index dd58ab4328..e1dc6f9b37 100644 --- a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -45,7 +45,7 @@ void pairwise_matrix_dispatch(OpT distance_op, cudaStream_t stream, bool is_row_major) RAFT_EXPLICIT; -}; // namespace raft::distance::detail +}; // namespace raft::distance::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h index cd748b9e6b..951f8a0132 100644 --- a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h +++ b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h @@ -57,8 +57,8 @@ namespace threadblock { /// /// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator /// -template diff --git a/cpp/include/raft/distance/distance-ext.cuh b/cpp/include/raft/distance/distance-ext.cuh index 3f7f2b0a23..7171ba605f 100644 --- a/cpp/include/raft/distance/distance-ext.cuh +++ b/cpp/include/raft/distance/distance-ext.cuh @@ -140,8 +140,8 @@ void pairwise_distance(raft::resources const& handle, raft::distance::DistanceType metric, Type metric_arg = 2.0f) RAFT_EXPLICIT; -}; // namespace distance -}; // namespace raft +}; // namespace distance +}; // namespace raft #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh index 30f4a2d167..b2cd736c57 100644 --- a/cpp/include/raft/linalg/add.cuh +++ b/cpp/include/raft/linalg/add.cuh @@ -217,7 +217,7 @@ void add_scalar(raft::resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh index f6889e959b..03beb1d1d1 100644 --- a/cpp/include/raft/linalg/binary_op.cuh +++ b/cpp/include/raft/linalg/binary_op.cuh @@ -82,7 +82,7 @@ void binary_op(raft::resources const& handle, InType in1, InType in2, OutType ou /** @} */ // end of group binary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh index 5609656234..afa58d73fc 100644 --- a/cpp/include/raft/linalg/coalesced_reduction.cuh +++ b/cpp/include/raft/linalg/coalesced_reduction.cuh @@ -160,7 +160,7 @@ void coalesced_reduction(raft::resources const& handle, /** @} */ // end of group coalesced_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh index 3b1e8c41c4..cb6488bedf 100644 --- a/cpp/include/raft/linalg/contractions.cuh +++ b/cpp/include/raft/linalg/contractions.cuh @@ -100,7 +100,7 @@ struct KernelPolicy { SmemSize = 2 * SmemPage * sizeof(DataT), }; // enum -}; // struct KernelPolicy +}; // struct KernelPolicy template struct ColKernelPolicy { diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp index 5a7356a4c2..d15e343c9a 100644 --- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp +++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp @@ -550,7 +550,7 @@ cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - float* const A[], // NOLINT + float* const A[], // NOLINT int lda, int* P, int* info, @@ -564,7 +564,7 @@ inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - double* const A[], // NOLINT + double* const A[], // NOLINT int lda, int* P, int* info, diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh index d617b065da..17ec5c3136 100644 --- a/cpp/include/raft/linalg/divide.cuh +++ b/cpp/include/raft/linalg/divide.cuh @@ -96,7 +96,7 @@ void divide_scalar(raft::resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 954bf19334..57f3b61388 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -220,7 +220,7 @@ void eig_jacobi(raft::resources const& handle, /** @} */ // end of eig -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh index 640964d018..610ea07f96 100644 --- a/cpp/include/raft/linalg/gemv.cuh +++ b/cpp/include/raft/linalg/gemv.cuh @@ -305,6 +305,6 @@ void gemv(raft::resources const& handle, } /** @} */ // end of gemv -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh index 20588cbe17..21575d7806 100644 --- a/cpp/include/raft/linalg/lstsq.cuh +++ b/cpp/include/raft/linalg/lstsq.cuh @@ -245,7 +245,7 @@ void lstsq_qr(raft::resources const& handle, /** @} */ // end of lstsq -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh index e620d227eb..a65f6ed390 100644 --- a/cpp/include/raft/linalg/matrix_vector_op.cuh +++ b/cpp/include/raft/linalg/matrix_vector_op.cuh @@ -240,7 +240,7 @@ void matrix_vector_op(raft::resources const& handle, /** @} */ // end of group matrix_vector_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh index d45f11524d..b59a0fcef7 100644 --- a/cpp/include/raft/linalg/mean_squared_error.cuh +++ b/cpp/include/raft/linalg/mean_squared_error.cuh @@ -79,7 +79,7 @@ void mean_squared_error(raft::resources const& handle, /** @} */ // end of group mean_squared_error -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh index 3ade108235..9973a3cc6c 100644 --- a/cpp/include/raft/linalg/multiply.cuh +++ b/cpp/include/raft/linalg/multiply.cuh @@ -98,7 +98,7 @@ void multiply_scalar( /** @} */ // end of group multiply -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh index 26ac1035ca..5c7dcbd5cf 100644 --- a/cpp/include/raft/linalg/power.cuh +++ b/cpp/include/raft/linalg/power.cuh @@ -154,7 +154,7 @@ void power_scalar( /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh index a3d0ef71d0..3181dd0224 100644 --- a/cpp/include/raft/linalg/reduce.cuh +++ b/cpp/include/raft/linalg/reduce.cuh @@ -162,7 +162,7 @@ void reduce(raft::resources const& handle, /** @} */ // end of group reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh index 6eaf1e2ba7..5ed0fb7407 100644 --- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh @@ -113,7 +113,7 @@ void reduce_cols_by_key( /** @} */ // end of group reduce_cols_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh index fa624b2191..7d93c3946f 100644 --- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh @@ -192,7 +192,7 @@ void reduce_rows_by_key( /** @} */ // end of group reduce_rows_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh index 2dece5b957..163f360481 100644 --- a/cpp/include/raft/linalg/rsvd.cuh +++ b/cpp/include/raft/linalg/rsvd.cuh @@ -876,7 +876,7 @@ void randomized_svd(const raft::resources& handle, /** @} */ // end of group rsvd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh index 99754c4eb2..81b7ab7dec 100644 --- a/cpp/include/raft/linalg/sqrt.cuh +++ b/cpp/include/raft/linalg/sqrt.cuh @@ -84,7 +84,7 @@ void sqrt(raft::resources const& handle, InType in, OutType out) /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh index f971d0e40b..c7ff000e00 100644 --- a/cpp/include/raft/linalg/strided_reduction.cuh +++ b/cpp/include/raft/linalg/strided_reduction.cuh @@ -171,7 +171,7 @@ void strided_reduction(raft::resources const& handle, /** @} */ // end of group strided_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh index 688e60a806..f4243f9dc1 100644 --- a/cpp/include/raft/linalg/subtract.cuh +++ b/cpp/include/raft/linalg/subtract.cuh @@ -223,7 +223,7 @@ void subtract_scalar( /** @} */ // end of group subtract -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index 08f9462ba9..f7071de75b 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -416,7 +416,7 @@ void svd_reconstruction(raft::resources const& handle, /** @} */ // end of group svd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh index f46133abd9..67b04c6791 100644 --- a/cpp/include/raft/linalg/ternary_op.cuh +++ b/cpp/include/raft/linalg/ternary_op.cuh @@ -83,7 +83,7 @@ void ternary_op( /** @} */ // end of group ternary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index afe1962223..1b46082fbe 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -103,7 +103,7 @@ auto transpose(raft::resources const& handle, /** @} */ // end of group transpose -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh index 47a432f415..5ebe27923a 100644 --- a/cpp/include/raft/linalg/unary_op.cuh +++ b/cpp/include/raft/linalg/unary_op.cuh @@ -125,7 +125,7 @@ void write_only_unary_op(const raft::resources& handle, OutType out, Lambda op) /** @} */ // end of group unary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh index 887741ad71..c94b2506d3 100644 --- a/cpp/include/raft/matrix/col_wise_sort.cuh +++ b/cpp/include/raft/matrix/col_wise_sort.cuh @@ -134,6 +134,6 @@ void sort_cols_per_row(Args... args) /** @} */ // end of group col_wise_sort -}; // end namespace raft::matrix +}; // end namespace raft::matrix #endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh index f934d7e3b4..870f0c3240 100644 --- a/cpp/include/raft/matrix/detail/select_k-ext.cuh +++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh @@ -16,8 +16,8 @@ #pragma once -#include // uint32_t -#include // __half +#include // uint32_t +#include // __half #include #include // RAFT_EXPLICIT #include // rmm:cuda_stream_view diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh index dc86a04733..2927604e7d 100644 --- a/cpp/include/raft/matrix/detail/select_warpsort.cuh +++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh @@ -959,7 +959,7 @@ void calc_launch_parameter( if (batch_size >= size_t(another_min_grid_size) // still have enough work && another_block_size < block_size // protect against an infinite loop && another_min_grid_size * another_block_size > - min_grid_size * block_size // improve occupancy + min_grid_size * block_size // improve occupancy ) { block_size = another_block_size; min_grid_size = another_min_grid_size; diff --git a/cpp/include/raft/neighbors/ann_types.hpp b/cpp/include/raft/neighbors/ann_types.hpp index 469d3c09d4..c17be4a8ff 100644 --- a/cpp/include/raft/neighbors/ann_types.hpp +++ b/cpp/include/raft/neighbors/ann_types.hpp @@ -49,4 +49,4 @@ struct search_params {}; /** @} */ // end group ann_types -}; // namespace raft::neighbors::ann +}; // namespace raft::neighbors::ann diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index fe2cc5c52f..602db5d5ba 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -53,7 +53,7 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( INDEX_T* const result_indices_ptr, // [num_pickup] DISTANCE_T* const result_distances_ptr, // [num_pickup] const float* const query_buffer, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] + const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] const std::size_t dataset_dim, const std::size_t dataset_size, const std::size_t dataset_ld, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 314ab6e6a6..6ea1e34032 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -190,9 +190,9 @@ struct search : public search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 5284e2d2cd..d462439f23 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -48,7 +48,7 @@ namespace multi_cta_search { template __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [search_width] const uint32_t search_width, - INDEX_T* const itopk_indices, // [num_itopk] + INDEX_T* const itopk_indices, // [num_itopk] const size_t num_itopk, uint32_t* const terminate_flag) { @@ -86,8 +86,8 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [sea } template -__device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] - INDEX_T* indices, // [num_elements] +__device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] + INDEX_T* indices, // [num_elements] const uint32_t num_elements, const uint32_t num_itopk // num_itopk <= num_elements ) @@ -142,7 +142,7 @@ __launch_bounds__(1024, 1) __global__ void search_kernel( const uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const uint32_t hash_bitlen, @@ -373,27 +373,32 @@ struct search_kernel_config { static auto choose_buffer_size(unsigned result_buffer_size, unsigned block_size) -> kernel_t { if (result_buffer_size <= 64) { - return choose_max_elements<64>(block_size); + return search_kernel; } else if (result_buffer_size <= 128) { - return choose_max_elements<128>(block_size); + return search_kernel; } else if (result_buffer_size <= 256) { - return choose_max_elements<256>(block_size); + return search_kernel; } THROW("Result buffer size %u larger than max buffer size %u", result_buffer_size, 256); } - - template - // Todo: rename this to choose block_size - static auto choose_max_elements(unsigned block_size) -> kernel_t - { - return search_kernel; - } }; template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index d52e066669..f2c793789e 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -98,7 +98,7 @@ __global__ void random_pickup_kernel( const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const result_indices_ptr, // [num_queries, ldr] DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] @@ -170,7 +170,7 @@ void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_d const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const result_indices_ptr, // [num_queries, ldr] DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] @@ -310,18 +310,18 @@ template = search_width * graph_degree + INDEX_T* const result_indices_ptr, // [num_queries, ldd] + DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd // (*) ldd >= search_width * graph_degree ) { const uint32_t ldb = hashmap::get_size(hash_bitlen); @@ -371,19 +371,19 @@ template = search_width * graph_degree + INDEX_T* const result_indices_ptr, // [num_queries, ldd] + DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree cudaStream_t cuda_stream = 0) { const auto block_size = 128; @@ -437,7 +437,7 @@ void remove_parent_bit(const std::uint32_t num_queries, } template -__global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] +__global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, const T* const src, // [batch_size, ld_src] const uint64_t ld_src, @@ -452,7 +452,7 @@ __global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_ds } template -void batched_memcpy(T* const dst, // [batch_size, ld_dst] +void batched_memcpy(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, const T* const src, // [batch_size, ld_src] const uint64_t ld_src, @@ -596,9 +596,9 @@ struct search : search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index e6966987c8..33c77db61e 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -111,7 +111,7 @@ struct search_plan_impl : public search_plan_impl_base { DISTANCE_T* const result_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const std::uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] uint32_t topk){}; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index e4903f9a4f..8ecf7fbdf5 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -202,9 +202,9 @@ struct search : search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const result_indices_ptr, // [num_queries, topk] - DISTANCE_T* const result_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const result_indices_ptr, // [num_queries, topk] + DISTANCE_T* const result_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const std::uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index f7c43fe11c..5f5df1a818 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -29,9 +29,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 999e8ddf84..8cb0f7085d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -401,8 +401,8 @@ __device__ inline void topk_by_bitonic_sort_2nd(float* itopk_distances, // template -__device__ void topk_by_bitonic_sort(float* itopk_distances, // [num_itopk] - IdxT* itopk_indices, // [num_itopk] +__device__ void topk_by_bitonic_sort(float* itopk_distances, // [num_itopk] + IdxT* itopk_indices, // [num_itopk] const std::uint32_t num_itopk, float* candidate_distances, // [num_candidates] IdxT* candidate_indices, // [num_candidates] @@ -465,7 +465,7 @@ __launch_bounds__(1024, 1) __global__ void search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] const std::uint32_t top_k, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] + const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] const std::size_t dataset_dim, const std::size_t dataset_size, const std::size_t dataset_ld, // stride of dataset @@ -474,7 +474,7 @@ __launch_bounds__(1024, 1) __global__ const std::uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t internal_topk, @@ -745,11 +745,40 @@ template ); - template - static auto choose_block_size(unsigned block_size) -> kernel_t + template + static auto choose_search_kernel(unsigned itopk_size) -> kernel_t { - constexpr unsigned BS = USE_BITONIC_SORT; - return search_kernel; + if (itopk_size <= 64) { + return search_kernel; + } else if (itopk_size <= 128) { + return search_kernel; + } else if (itopk_size <= 256) { + return search_kernel; + } else if (itopk_size <= 512) { + return search_kernel; + } + THROW("No kernel for parametels itopk_size %u, max_candidates %u", itopk_size, MAX_CANDIDATES); } static auto choose_itopk_and_mx_candidates(unsigned itopk_size, @@ -758,45 +787,18 @@ struct search_kernel_config { { if (num_itopk_candidates <= 64) { // use bitonic sort based topk - constexpr unsigned max_candidates = 64; - if (itopk_size <= 64) { - return choose_block_size<64, max_candidates, 1>(block_size); - } else if (itopk_size <= 128) { - return choose_block_size<128, max_candidates, 1>(block_size); - } else if (itopk_size <= 256) { - return choose_block_size<256, max_candidates, 1>(block_size); - } else if (itopk_size <= 512) { - return choose_block_size<512, max_candidates, 1>(block_size); - } + return choose_search_kernel<64, 1>(itopk_size); } else if (num_itopk_candidates <= 128) { - constexpr unsigned max_candidates = 128; - if (itopk_size <= 64) { - return choose_block_size<64, max_candidates, 1>(block_size); - } else if (itopk_size <= 128) { - return choose_block_size<128, max_candidates, 1>(block_size); - } else if (itopk_size <= 256) { - return choose_block_size<256, max_candidates, 1>(block_size); - } else if (itopk_size <= 512) { - return choose_block_size<512, max_candidates, 1>(block_size); - } + return choose_search_kernel<128, 1>(itopk_size); } else if (num_itopk_candidates <= 256) { - constexpr unsigned max_candidates = 256; - if (itopk_size <= 64) { - return choose_block_size<64, max_candidates, 1>(block_size); - } else if (itopk_size <= 128) { - return choose_block_size<128, max_candidates, 1>(block_size); - } else if (itopk_size <= 256) { - return choose_block_size<256, max_candidates, 1>(block_size); - } else if (itopk_size <= 512) { - return choose_block_size<512, max_candidates, 1>(block_size); - } + return choose_search_kernel<526, 1>(itopk_size); } else { // Radix-based topk is used constexpr unsigned max_candidates = 32; // to avoid build failure if (itopk_size <= 256) { - return choose_block_size<256, max_candidates, 0>(block_size); + return search_kernel; } else if (itopk_size <= 512) { - return choose_block_size<512, max_candidates, 0>(block_size); + return search_kernel; } } THROW("No kernel for parametels itopk_size %u, num_itopk_candidates %u", @@ -813,9 +815,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh index 638cb4fe91..dc13a7d290 100644 --- a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh @@ -246,9 +246,9 @@ __device__ inline void update_histogram(int itr, uint32_t threshold, uint32_t& num_bins, uint32_t& shift, - const T* x, // [nx,] + const T* x, // [nx,] uint32_t nx, - uint32_t* hist, // [num_bins] + uint32_t* hist, // [num_bins] uint8_t* state, uint32_t* output, // [topk] uint32_t* output_count) @@ -830,16 +830,16 @@ __launch_bounds__(1024, 1) __global__ void kern_topk_cta_11(uint32_t topk, uint32_t size_batch, uint32_t len_x, - const uint32_t* _x, // [size_batch, ld_x,] + const uint32_t* _x, // [size_batch, ld_x,] uint32_t ld_x, const ValT* _in_vals, // [size_batch, ld_iv,] uint32_t ld_iv, - uint32_t* _y, // [size_batch, ld_y,] + uint32_t* _y, // [size_batch, ld_y,] uint32_t ld_y, - ValT* _out_vals, // [size_batch, ld_ov,] + ValT* _out_vals, // [size_batch, ld_ov,] uint32_t ld_ov, - uint8_t* _state, // [size_batch, ...,] - uint32_t* _hints, // [size_batch,] + uint8_t* _state, // [size_batch, ...,] + uint32_t* _hints, // [size_batch,] bool sort) { const uint32_t i_batch = blockIdx.x; diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh index 93eeb0dead..c0f856103a 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh @@ -16,7 +16,7 @@ #pragma once -#include // RAFT_LOG_TRACE +#include // RAFT_LOG_TRACE #include #include // raft::resources #include // is_min_close, DistanceType diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 199cb74fbe..47c10de200 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -220,7 +220,7 @@ void select_residuals(raft::resources const& handle, template void flat_compute_residuals( raft::resources const& handle, - float* residuals, // [n_rows, rot_dim] + float* residuals, // [n_rows, rot_dim] IdxT n_rows, device_matrix_view rotation_matrix, // [rot_dim, dim] device_matrix_view centers, // [n_lists, dim_ext] diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh index 2ab216b13b..7c5b523a8b 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh @@ -898,7 +898,7 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props, } { - if (selected_perf.occupancy <= 0.0 // no candidate yet + if (selected_perf.occupancy <= 0.0 // no candidate yet || (selected_perf.occupancy < cur.occupancy * kTargetOccupancy && selected_perf.shmem_use >= cur.shmem_use) // much improved occupancy ) { diff --git a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp index 3ce2dc3eb5..2a863b47b3 100644 --- a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp +++ b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::host_matrix_view #include // raft::distance::DistanceType diff --git a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh index c000a4810b..a6ed17e251 100644 --- a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh +++ b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh @@ -36,7 +36,7 @@ void select_k(const key_t* inK, bool select_min, int k, cudaStream_t stream) RAFT_EXPLICIT; -}; // namespace raft::neighbors::detail +}; // namespace raft::neighbors::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/neighbors/ivf_flat-ext.cuh b/cpp/include/raft/neighbors/ivf_flat-ext.cuh index 848703c9b5..8dbe7587ff 100644 --- a/cpp/include/raft/neighbors/ivf_flat-ext.cuh +++ b/cpp/include/raft/neighbors/ivf_flat-ext.cuh @@ -16,10 +16,10 @@ #pragma once -#include // int64_t +#include // int64_t -#include // raft::device_matrix_view -#include // raft::resources +#include // raft::device_matrix_view +#include // raft::resources #include #include // raft::neighbors::ivf_flat::index #include // RAFT_EXPLICIT diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh index fcfe837e2d..4a60cfc09d 100644 --- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh +++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::device_matrix_view #include // raft::resources diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh index c1fd4676dc..3ccd3891b7 100644 --- a/cpp/include/raft/neighbors/refine-ext.cuh +++ b/cpp/include/raft/neighbors/refine-ext.cuh @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::device_matrix_view #include // // raft::host_matrix_view diff --git a/cpp/include/raft/sparse/distance/distance.cuh b/cpp/include/raft/sparse/distance/distance.cuh index b60940341a..702846f586 100644 --- a/cpp/include/raft/sparse/distance/distance.cuh +++ b/cpp/include/raft/sparse/distance/distance.cuh @@ -218,8 +218,8 @@ void pairwise_distance(raft::resources const& handle, /** @} */ // end of sparse_distance -}; // namespace distance -}; // namespace sparse -}; // namespace raft +}; // namespace distance +}; // namespace sparse +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 3cb4a3e353..56ca2ebfa7 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -49,10 +49,10 @@ __global__ void csr_row_normalize_l1_kernel( // over each row and then divide the values in parallel. const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result) -{ // output array +{ // output array // row-based matrix 1 thread per row int row = (blockIdx.x * TPB_X) + threadIdx.x; @@ -95,8 +95,8 @@ __global__ void csr_row_normalize_l1_kernel( template void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result, cudaStream_t stream) { // output array @@ -115,10 +115,10 @@ __global__ void csr_row_normalize_max_kernel( // over each row and then divide the values in parallel. const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result) -{ // output array +{ // output array // row-based matrix 1 thread per row int row = (blockIdx.x * TPB_X) + threadIdx.x; @@ -163,8 +163,8 @@ __global__ void csr_row_normalize_max_kernel( template void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result, cudaStream_t stream) { diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 545f218e63..51836ca9aa 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -69,7 +69,7 @@ void fit_embedding(raft::resources const& handle, handle, ro, ci, vs, n, nnz}; index_type neigvs = n_components + 1; - index_type maxiter = 4000; // default reset value (when set to 0); + index_type maxiter = 4000; // default reset value (when set to 0); value_type tol = 0.01; index_type restart_iter = 15 + neigvs; // what cugraph is using diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index f7ebf50db0..43dd182fe5 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -39,8 +39,8 @@ namespace linalg { template void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result, cudaStream_t stream) { // output array @@ -60,8 +60,8 @@ void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) template void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result, cudaStream_t stream) { diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh index f2be427367..ff644c000e 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh @@ -58,7 +58,7 @@ struct csr_batcher_t { void set_batch(int batch_num) { batch_start_ = batch_num * batch_size_; - batch_stop_ = batch_start_ + batch_size_ - 1; // zero-based indexing + batch_stop_ = batch_start_ + batch_size_ - 1; // zero-based indexing if (batch_stop_ >= total_rows_) batch_stop_ = total_rows_ - 1; // zero-based indexing diff --git a/cpp/include/raft/sparse/solver/mst_solver.cuh b/cpp/include/raft/sparse/solver/mst_solver.cuh index bfedb9ce2a..193431251f 100644 --- a/cpp/include/raft/sparse/solver/mst_solver.cuh +++ b/cpp/include/raft/sparse/solver/mst_solver.cuh @@ -78,10 +78,10 @@ class MST_solver { rmm::device_uvector altered_weights; // weights to be used for mst rmm::device_scalar mst_edge_count; // total number of edges added after every iteration rmm::device_scalar - prev_mst_edge_count; // total number of edges up to the previous iteration - rmm::device_uvector mst_edge; // mst output - true if the edge belongs in mst - rmm::device_uvector next_color; // next iteration color - rmm::device_uvector color; // index of color that vertex points to + prev_mst_edge_count; // total number of edges up to the previous iteration + rmm::device_uvector mst_edge; // mst output - true if the edge belongs in mst + rmm::device_uvector next_color; // next iteration color + rmm::device_uvector color; // index of color that vertex points to // new src-dst pairs found per iteration rmm::device_uvector temp_src; diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh index 95aeca64e5..70c5cec23f 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh @@ -61,7 +61,7 @@ void rbc_low_dim_pass_two(raft::resources const& handle, float weight, value_int* post_dists_counter) RAFT_EXPLICIT; -}; // namespace raft::spatial::knn::detail +}; // namespace raft::spatial::knn::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 7daa1792b1..1f97cd5f76 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -83,7 +83,7 @@ double adjusted_rand_index(raft::resources const& handle, /** @} */ // end group stats_adj_rand_index -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 07fd61411d..b669e0de32 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -85,7 +85,7 @@ double completeness_score(raft::resources const& handle, /** @} */ // end group stats_completeness -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 67f44b0fde..ad5d233c0e 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -116,7 +116,7 @@ void cov(raft::resources const& handle, /** @} */ // end group stats_cov -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index fcc49fefd2..fe432569ee 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -80,7 +80,7 @@ double entropy(raft::resources const& handle, /** @} */ // end group stats_entropy -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index c374251359..8480d16316 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -115,7 +115,7 @@ void histogram(raft::resources const& handle, /** @} */ // end group stats_histogram -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index ce7872d55d..311cd599f8 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -88,7 +88,7 @@ double homogeneity_score(raft::resources const& handle, /** @} */ // end group stats_homogeneity_score -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index 2e01918d2a..b417cbd509 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -76,7 +76,7 @@ value_t kl_divergence(raft::resources const& handle, /** @} */ // end group kl_divergence -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 96c9ca3b5c..43d39cfd6c 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -93,7 +93,7 @@ void mean(raft::resources const& handle, /** @} */ // end group stats_mean -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 48f5eb667f..83f9a8a941 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -160,7 +160,7 @@ void mean_add(raft::resources const& handle, /** @} */ // end group stats_mean_center -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index f6127df701..5c27a6caf6 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -107,6 +107,6 @@ void meanvar(raft::resources const& handle, /** @} */ // end group stats_mean_var -}; // namespace raft::stats +}; // namespace raft::stats #endif diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 0c5a62257d..5c5ff346a4 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -139,6 +139,6 @@ void minmax(raft::resources const& handle, /** @} */ // end group stats_minmax -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 5c4ae43e09..5a334e9280 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -86,7 +86,7 @@ double mutual_info_score(raft::resources const& handle, /** @} */ // end group stats_mutual_info -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index 6a208c5492..a21a0c0dc5 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -72,7 +72,7 @@ double rand_index(raft::resources const& handle, /** @} */ // end group stats_rand_index -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 6349f8fd11..0a67bd2325 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -182,7 +182,7 @@ void vars(raft::resources const& handle, /** @} */ // end group stats_variance -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 2ac9cd9eb5..2c3ed1b83e 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -85,7 +85,7 @@ void sum(raft::resources const& handle, /** @} */ // end group stats_sum -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index 8ea5c65600..041adb5e38 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -92,7 +92,7 @@ double v_measure(raft::resources const& handle, /** @} */ // end group stats_vmeasure -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 7d06d5dff1..da22f0163c 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -186,7 +186,7 @@ void col_weighted_mean(raft::resources const& handle, /** @} */ // end group stats_weighted_mean -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/util/cache.cuh b/cpp/include/raft/util/cache.cuh index a9cfe64568..255dea16bb 100644 --- a/cpp/include/raft/util/cache.cuh +++ b/cpp/include/raft/util/cache.cuh @@ -362,9 +362,9 @@ class Cache { int GetSize() const { return cached_keys.size(); } protected: - int n_vec; //!< Number of elements in a cached vector - float cache_size; //!< in MiB - int n_cache_sets; //!< number of cache sets + int n_vec; //!< Number of elements in a cached vector + float cache_size; //!< in MiB + int n_cache_sets; //!< number of cache sets const int TPB = 256; //!< threads per block for kernel launch int n_iter = 0; //!< Counter for time stamping cache operation diff --git a/cpp/include/raft/util/cache_util.cuh b/cpp/include/raft/util/cache_util.cuh index a7dcc22b02..79a94d9563 100644 --- a/cpp/include/raft/util/cache_util.cuh +++ b/cpp/include/raft/util/cache_util.cuh @@ -46,7 +46,7 @@ __global__ void get_vecs( const math_t* cache, int_t n_vec, const idx_t* cache_idx, int_t n, math_t* out) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - int row = tid % n_vec; // row idx + int row = tid % n_vec; // row idx if (tid < n_vec * n) { size_t out_col = tid / n_vec; // col idx size_t cache_col = cache_idx[out_col]; @@ -93,7 +93,7 @@ __global__ void store_vecs(const math_t* tile, int n_cache_vecs) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - int row = tid % n_vec; // row idx + int row = tid % n_vec; // row idx if (tid < n_vec * n) { int tile_col = tid / n_vec; // col idx int data_col = tile_idx ? tile_idx[tile_col] : tile_col; @@ -357,7 +357,7 @@ __global__ void get_cache_idx(int* keys, cache_time[cidx] = time; // update time stamp cache_idx[tid] = cidx; // exact cache idx } else { - cache_idx[tid] = sidx; // assign cache set + cache_idx[tid] = sidx; // assign cache set } } } diff --git a/cpp/include/raft/util/device_loads_stores.cuh b/cpp/include/raft/util/device_loads_stores.cuh index e3d54c51f5..65936b2f66 100644 --- a/cpp/include/raft/util/device_loads_stores.cuh +++ b/cpp/include/raft/util/device_loads_stores.cuh @@ -16,7 +16,7 @@ #pragma once -#include // uintX_t +#include // uintX_t #include #include // DI diff --git a/cpp/internal/raft_internal/matrix/select_k.cuh b/cpp/internal/raft_internal/matrix/select_k.cuh index b72e67580a..1d15c5fc03 100644 --- a/cpp/internal/raft_internal/matrix/select_k.cuh +++ b/cpp/internal/raft_internal/matrix/select_k.cuh @@ -33,6 +33,7 @@ struct params { bool use_index_input = true; bool use_same_leading_bits = false; bool use_memory_pool = true; + double frac_infinities = 0.0; }; inline auto operator<<(std::ostream& os, const params& ss) -> std::ostream& @@ -41,8 +42,10 @@ inline auto operator<<(std::ostream& os, const params& ss) -> std::ostream& os << ", len: " << ss.len; os << ", k: " << ss.k; os << (ss.select_min ? ", asc" : ", dsc"); - os << (ss.use_index_input ? "" : ", no-input-index"); - os << (ss.use_same_leading_bits ? ", same-leading-bits}" : "}"); + if (!ss.use_index_input) { os << ", no-input-index"; } + if (ss.use_same_leading_bits) { os << ", same-leading-bits"; } + if (ss.frac_infinities > 0) { os << ", infs: " << ss.frac_infinities; } + os << "}"; return os; } diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 49f96aa18b..3d8bbcec4a 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ import subprocess -EXPECTED_VERSIONS = ("11.1.0",) +EXPECTED_VERSIONS = ("16.0.6",) VERSION_REGEX = re.compile(r"clang version ([0-9.]+)") CMAKE_COMPILER_REGEX = re.compile( r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE) diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index 20d78c7bb5..8f616ada98 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -19,13 +19,13 @@ #include // common::nvtx::range #include -#include // make_device_matrix_view -#include // raft::sqrt -#include // raft::resources +#include // make_device_matrix_view +#include // raft::sqrt +#include // raft::resources #include #include // raft::distance::DistanceType #include -#include // rmm::device_uvector +#include // rmm::device_uvector namespace raft { namespace distance { diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu index 0c66f47c7f..b5d10d215c 100644 --- a/cpp/test/linalg/rsvd.cu +++ b/cpp/test/linalg/rsvd.cu @@ -160,24 +160,24 @@ class RsvdTest : public ::testing::TestWithParam> { const std::vector> inputs_fx = { // Test with ratios - {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT - {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT + {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Tall + non-BBT + {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Tall + non-BBT - {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT - {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT + {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL} // Tall + non-BBT + {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL} // Tall + non-BBT - , // Test with fixed ranks - {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT - {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT + , // Test with fixed ranks + {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT + {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT - {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Square + non-BBT - {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Tall + non-BBT + {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Square + non-BBT + {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Tall + non-BBT {0.60f, 2048, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT {1.00f, 16384, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT @@ -188,14 +188,14 @@ const std::vector> inputs_fx = { const std::vector> inputs_dx = { // Test with ratios - {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT - {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Tall + non-BBT - {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT - {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL} // Tall + non-BBT + {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT + {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Tall + non-BBT + {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT + {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL} // Tall + non-BBT , // Test with fixed ranks {0.10, 256, 256, 0.25f, 0.0, 0.0, 100, 5, true, 4321ULL}, // Square + BBT diff --git a/cpp/test/matrix/select_k.cu b/cpp/test/matrix/select_k.cu index 63f020b420..ce4e3e867e 100644 --- a/cpp/test/matrix/select_k.cu +++ b/cpp/test/matrix/select_k.cu @@ -70,6 +70,28 @@ auto inputs_random_largek = testing::Values(select::params{100, 100000, 1000, tr select::params{100, 100000, 2048, false}, select::params{100, 100000, 1237, true}); +auto inputs_random_many_infs = + testing::Values(select::params{10, 100000, 1, true, false, false, true, 0.9}, + select::params{10, 100000, 16, true, false, false, true, 0.9}, + select::params{10, 100000, 64, true, false, false, true, 0.9}, + select::params{10, 100000, 128, true, false, false, true, 0.9}, + select::params{10, 100000, 256, true, false, false, true, 0.9}, + select::params{1000, 10000, 1, true, false, false, true, 0.9}, + select::params{1000, 10000, 16, true, false, false, true, 0.9}, + select::params{1000, 10000, 64, true, false, false, true, 0.9}, + select::params{1000, 10000, 128, true, false, false, true, 0.9}, + select::params{1000, 10000, 256, true, false, false, true, 0.9}, + select::params{10, 100000, 1, true, false, false, true, 0.999}, + select::params{10, 100000, 16, true, false, false, true, 0.999}, + select::params{10, 100000, 64, true, false, false, true, 0.999}, + select::params{10, 100000, 128, true, false, false, true, 0.999}, + select::params{10, 100000, 256, true, false, false, true, 0.999}, + select::params{1000, 10000, 1, true, false, false, true, 0.999}, + select::params{1000, 10000, 16, true, false, false, true, 0.999}, + select::params{1000, 10000, 64, true, false, false, true, 0.999}, + select::params{1000, 10000, 128, true, false, false, true, 0.999}, + select::params{1000, 10000, 256, true, false, false, true, 0.999}); + using ReferencedRandomFloatInt = SelectK::params_random>; TEST_P(ReferencedRandomFloatInt, Run) { run(); } // NOLINT @@ -111,4 +133,16 @@ INSTANTIATE_TEST_CASE_P( // NOLINT select::Algo::kRadix8bits, select::Algo::kRadix11bits, select::Algo::kRadix11bitsExtraPass))); + +using ReferencedRandomFloatIntkWarpsortAsGT = + SelectK::params_random>; +TEST_P(ReferencedRandomFloatIntkWarpsortAsGT, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P( // NOLINT + SelectK, + ReferencedRandomFloatIntkWarpsortAsGT, + testing::Combine(inputs_random_many_infs, + testing::Values(select::Algo::kRadix8bits, + select::Algo::kRadix11bits, + select::Algo::kRadix11bitsExtraPass))); + } // namespace raft::matrix diff --git a/cpp/test/matrix/select_k.cuh b/cpp/test/matrix/select_k.cuh index e0e0cad225..e94a6d029e 100644 --- a/cpp/test/matrix/select_k.cuh +++ b/cpp/test/matrix/select_k.cuh @@ -49,14 +49,16 @@ auto gen_simple_ids(uint32_t batch_size, uint32_t len) -> std::vector template struct io_simple { public: - bool not_supported = false; + bool not_supported = false; + std::optional algo = std::nullopt; io_simple(const select::params& spec, const std::vector& in_dists, + const std::optional>& in_ids, const std::vector& out_dists, const std::vector& out_ids) : in_dists_(in_dists), - in_ids_(gen_simple_ids(spec.batch_size, spec.len)), + in_ids_(in_ids.value_or(gen_simple_ids(spec.batch_size, spec.len))), out_dists_(out_dists), out_ids_(out_ids) { @@ -78,12 +80,14 @@ template struct io_computed { public: bool not_supported = false; + select::Algo algo; io_computed(const select::params& spec, const select::Algo& algo, const std::vector& in_dists, const std::optional>& in_ids = std::nullopt) - : in_dists_(in_dists), + : algo(algo), + in_dists_(in_dists), in_ids_(in_ids.value_or(gen_simple_ids(spec.batch_size, spec.len))), out_dists_(spec.batch_size * spec.k), out_ids_(spec.batch_size * spec.k) @@ -223,32 +227,62 @@ struct SelectK // NOLINT if (ref.not_supported || res.not_supported) { GTEST_SKIP(); } ASSERT_TRUE(hostVecMatch(ref.get_out_dists(), res.get_out_dists(), Compare())); - // If the dists (keys) are the same, different corresponding ids may end up in the selection due - // to non-deterministic nature of some implementations. - auto& in_ids = ref.get_in_ids(); - auto& in_dists = ref.get_in_dists(); - auto compare_ids = [&in_ids, &in_dists](const IdxT& i, const IdxT& j) { + // If the dists (keys) are the same, different corresponding ids may end up in the selection + // due to non-deterministic nature of some implementations. + auto compare_ids = [this](const IdxT& i, const IdxT& j) { if (i == j) return true; + auto& in_ids = ref.get_in_ids(); + auto& in_dists = ref.get_in_dists(); auto ix_i = static_cast(std::find(in_ids.begin(), in_ids.end(), i) - in_ids.begin()); auto ix_j = static_cast(std::find(in_ids.begin(), in_ids.end(), j) - in_ids.begin()); - if (static_cast(ix_i) >= in_ids.size() || static_cast(ix_j) >= in_ids.size()) - return false; + auto forgive_i = forgive_algo(ref.algo, i); + auto forgive_j = forgive_algo(res.algo, j); + // Some algorithms return invalid indices in special cases. + // TODO: https://github.com/rapidsai/raft/issues/1822 + if (static_cast(ix_i) >= in_ids.size()) return forgive_i; + if (static_cast(ix_j) >= in_ids.size()) return forgive_j; auto dist_i = in_dists[ix_i]; auto dist_j = in_dists[ix_j]; if (dist_i == dist_j) return true; + const auto bound = spec.select_min ? raft::upper_bound() : raft::lower_bound(); + if (forgive_i && dist_i == bound) return true; + if (forgive_j && dist_j == bound) return true; + // Otherwise really fail std::cout << "ERROR: ref[" << ix_i << "] = " << dist_i << " != " << "res[" << ix_j << "] = " << dist_j << std::endl; return false; }; ASSERT_TRUE(hostVecMatch(ref.get_out_ids(), res.get_out_ids(), compare_ids)); } + + auto forgive_algo(const std::optional& algo, IdxT ix) const -> bool + { + if (!algo.has_value()) { return false; } + switch (algo.value()) { + // not sure which algo this is. + case select::Algo::kPublicApi: return true; + // warp-sort-based algos currently return zero index for inf distances. + case select::Algo::kWarpAuto: + case select::Algo::kWarpImmediate: + case select::Algo::kWarpFiltered: + case select::Algo::kWarpDistributed: + case select::Algo::kWarpDistributedShm: return ix == 0; + // FAISS version returns a special invalid value: + case select::Algo::kFaissBlockSelect: return ix == std::numeric_limits::max(); + // Do not forgive by default + default: return false; + } + } }; template struct params_simple { - using io_t = io_simple; - using input_t = - std::tuple, std::vector, std::vector>; + using io_t = io_simple; + using input_t = std::tuple, + std::optional>, + std::vector, + std::vector>; using params_t = std::tuple; static auto read(params_t ps) -> Params @@ -259,15 +293,17 @@ struct params_simple { std::get<0>(ins), algo, io_simple( - std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins))); + std::get<0>(ins), std::get<1>(ins), std::get<2>(ins), std::get<3>(ins), std::get<4>(ins))); } }; +auto inf_f = std::numeric_limits::max(); auto inputs_simple_f = testing::Values( params_simple::input_t( {5, 5, 5, true, true}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), @@ -275,12 +311,14 @@ auto inputs_simple_f = testing::Values( {5, 5, 3, true, true}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), params_simple::input_t( {5, 5, 5, true, false}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0}, {4, 3, 2, 1, 0, 0, 1, 2, 3, 4, 3, 0, 1, 4, 2, 4, 2, 1, 3, 0, 0, 2, 1, 4, 3}), @@ -288,20 +326,31 @@ auto inputs_simple_f = testing::Values( {5, 5, 3, true, false}, {5.0, 4.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 5.0, 1.0, 4.0, 5.0, 3.0, 2.0, 4.0, 1.0, 1.0, 3.0, 2.0, 5.0, 4.0}, + std::nullopt, {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0}, {4, 3, 2, 0, 1, 2, 3, 0, 1, 4, 2, 1, 0, 2, 1}), params_simple::input_t( {5, 7, 3, true, true}, {5.0, 4.0, 3.0, 2.0, 1.3, 7.5, 19.0, 9.0, 2.0, 3.0, 3.0, 5.0, 6.0, 4.0, 2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0, 5.0, 7.0, 2.5, 4.0, 7.0, 8.0, 8.0, 1.0, 3.0, 2.0, 5.0, 4.0, 1.1, 1.2}, + std::nullopt, {1.3, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0, 1.0, 1.0, 2.5, 4.0, 5.0, 1.0, 1.1, 1.2}, {4, 3, 2, 1, 2, 3, 3, 5, 6, 2, 3, 0, 0, 5, 6}), - params_simple::input_t( - {1, 7, 3, true, true}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {1.0, 1.0, 1.0}, {3, 5, 6}), - params_simple::input_t( - {1, 7, 3, false, false}, {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, {5.0, 4.0, 3.0}, {2, 4, 1}), - params_simple::input_t( - {1, 7, 3, false, true}, {2.0, 3.0, 5.0, 9.0, 4.0, 9.0, 9.0}, {9.0, 9.0, 9.0}, {3, 5, 6}), + params_simple::input_t({1, 7, 3, true, true}, + {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, + std::nullopt, + {1.0, 1.0, 1.0}, + {3, 5, 6}), + params_simple::input_t({1, 7, 3, false, false}, + {2.0, 3.0, 5.0, 1.0, 4.0, 1.0, 1.0}, + std::nullopt, + {5.0, 4.0, 3.0}, + {2, 4, 1}), + params_simple::input_t({1, 7, 3, false, true}, + {2.0, 3.0, 5.0, 9.0, 4.0, 9.0, 9.0}, + std::nullopt, + {9.0, 9.0, 9.0}, + {3, 5, 6}), params_simple::input_t( {1, 130, 5, false, true}, {19, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, @@ -309,6 +358,7 @@ auto inputs_simple_f = testing::Values( 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, + std::nullopt, {20, 19, 18, 17, 16}, {129, 0, 117, 116, 115}), params_simple::input_t( @@ -318,8 +368,20 @@ auto inputs_simple_f = testing::Values( 0, 1, 0, 1, 0, 1, 0, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 4, 4, 2, 3, 2, 3, 2, 3, 2, 3, 2, 20}, + std::nullopt, {20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6}, - {129, 0, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105})); + {129, 0, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105}), + params_simple::input_t( + select::params{1, 32, 31, true, true}, + {0, 1, 2, 3, inf_f, inf_f, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + std::optional{std::vector{31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, + 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, + 9, 8, 7, 6, 75, 74, 3, 2, 1, 0}}, + {0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, inf_f}, + {31, 30, 29, 28, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, + 13, 12, 11, 10, 9, 8, 7, 6, 75, 74, 3, 2, 1, 0, 27})); using SimpleFloatInt = SelectK; TEST_P(SimpleFloatInt, Run) { run(); } // NOLINT @@ -335,6 +397,12 @@ INSTANTIATE_TEST_CASE_P( // NOLINT select::Algo::kWarpFiltered, select::Algo::kWarpDistributed))); +template +struct replace_with_mask { + KeyT replacement; + constexpr auto inline operator()(KeyT x, uint8_t mask) -> KeyT { return mask ? replacement : x; } +}; + template struct with_ref { template @@ -354,6 +422,19 @@ struct with_ref { rmm::device_uvector dists_d(spec.len * spec.batch_size, s); raft::random::RngState r(42); normal(handle, r, dists_d.data(), dists_d.size(), KeyT(10.0), KeyT(100.0)); + + if (spec.frac_infinities > 0.0) { + rmm::device_uvector mask_buf(dists_d.size(), s); + auto mask = make_device_vector_view(mask_buf.data(), mask_buf.size()); + raft::random::bernoulli(handle, r, mask, spec.frac_infinities); + KeyT bound = spec.select_min ? raft::upper_bound() : raft::lower_bound(); + auto mask_in = + make_device_vector_view(mask_buf.data(), mask_buf.size()); + auto dists_in = make_device_vector_view(dists_d.data(), dists_d.size()); + auto dists_out = make_device_vector_view(dists_d.data(), dists_d.size()); + raft::linalg::map(handle, dists_out, replace_with_mask{bound}, dists_in, mask_in); + } + update_host(dists.data(), dists_d.data(), dists_d.size(), s); s.synchronize(); } diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index ea905d2089..eadc88085f 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -372,7 +372,7 @@ inline std::vector generate_inputs() {100}, {1000}, {1, 8, 17}, - {1, 16}, // k + {1, 16}, // k {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL}, {0, 1, 10, 100}, // query size {0}, diff --git a/cpp/test/util/bitonic_sort.cu b/cpp/test/util/bitonic_sort.cu index d1f03f78b5..2cf5420334 100644 --- a/cpp/test/util/bitonic_sort.cu +++ b/cpp/test/util/bitonic_sort.cu @@ -103,12 +103,12 @@ struct bitonic_launch { }; template -class BitonicTest : public testing::TestWithParam { // NOLINT +class BitonicTest : public testing::TestWithParam { // NOLINT protected: - const test_spec spec; // NOLINT - std::vector in; // NOLINT - std::vector out; // NOLINT - std::vector ref; // NOLINT + const test_spec spec; // NOLINT + std::vector in; // NOLINT + std::vector out; // NOLINT + std::vector ref; // NOLINT void segmented_sort(std::vector& vec, int k, bool ascending) // NOLINT { @@ -184,13 +184,13 @@ auto inputs = ::testing::Values(test_spec{1, 1, 1, true}, test_spec{70, 1, 64, true}, test_spec{70, 2, 128, false}); -using Floats = BitonicTest; // NOLINT -TEST_P(Floats, Run) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs); // NOLINT +using Floats = BitonicTest; // NOLINT +TEST_P(Floats, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs); // NOLINT -using Ints = BitonicTest; // NOLINT -TEST_P(Ints, Run) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs); // NOLINT +using Ints = BitonicTest; // NOLINT +TEST_P(Ints, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs); // NOLINT using Doubles = BitonicTest; // NOLINT TEST_P(Doubles, Run) { run(); } // NOLINT diff --git a/cpp/test/util/reduction.cu b/cpp/test/util/reduction.cu index 17deaf99eb..548d3b9d53 100644 --- a/cpp/test/util/reduction.cu +++ b/cpp/test/util/reduction.cu @@ -147,9 +147,9 @@ struct reduction_launch { template class ReductionTest : public testing::TestWithParam> { // NOLINT protected: - const std::vector input; // NOLINT - rmm::cuda_stream_view stream; // NOLINT - rmm::device_uvector arr_d; // NOLINT + const std::vector input; // NOLINT + rmm::cuda_stream_view stream; // NOLINT + rmm::device_uvector arr_d; // NOLINT public: explicit ReductionTest() @@ -184,8 +184,8 @@ const std::vector binary_test_vector{ auto reduction_input = ::testing::Values(test_vector); auto binary_reduction_input = ::testing::Values(binary_test_vector); -using ReductionTestInt = ReductionTest; // NOLINT -using BinaryReductionTestInt = ReductionTest; // NOLINT +using ReductionTestInt = ReductionTest; // NOLINT +using BinaryReductionTestInt = ReductionTest; // NOLINT TEST_P(ReductionTestInt, REDUCTIONS) { run_reduction(); } INSTANTIATE_TEST_CASE_P(ReductionTest, ReductionTestInt, reduction_input); // NOLINT TEST_P(BinaryReductionTestInt, BINARY_REDUCTION) { run_binary_reduction(); } // NOLINT diff --git a/dependencies.yaml b/dependencies.yaml index 6f64287f54..700a6db1bf 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -169,10 +169,10 @@ dependencies: common: - output_types: [conda, requirements] packages: - - clang=16.0.1 + - clang=16.0.6 - output_types: [conda] packages: - - clang-tools=16.0.1 + - clang-tools=16.0.6 nn_bench: common: - output_types: [conda, pyproject, requirements] diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 712d22f0aa..dd6090c5e2 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -99,4 +99,4 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `ef` | `search_param` | Y | Positive Integer >0 | | Size of the dynamic list for the nearest neighbors used for search. Higher value leads to more accurate but slower search. Cannot be lower than `k`. | | `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. | -Please refer to [HNSW algorithm parameters guide](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) from `hnswlib` to learn more about these arguments. +Please refer to [HNSW algorithm parameters guide] from `hnswlib` to learn more about these arguments. \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json index 6bef94c070..bc77b522a8 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json @@ -248,184 +248,111 @@ {"max_batch":10000, "max_k":10, "nprobe":1000} ] }, - - { - "name": "raft_ivf_pq.dimpq128-cluster1024", - "algo": "raft_ivf_pq", - "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024", - "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half"}, - {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half"}, - {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half"}, - {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half"}, - {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half"}, - {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024" - }, - { - "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", - "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 128, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float", - "search_params": [ - {"nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float"}, - {"nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float"}, - {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"}, - {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"}, - {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"}, - {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"}, - {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"}, - {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float" - }, - { - "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", - "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 128, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half", - "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half" - }, - { - "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", - "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 128, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8", - "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8" - }, - { - "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", - "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 64, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8", - "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8" - }, - { - "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", - "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 64, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half", - "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"}, - {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half" - }, - { - "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", - "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 32, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8", - "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8" - }, - { - "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", +{ + "name": "raft_ivf_pq.d96b5n50K", "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 16, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "build_param": {"nlist": 50000, "pq_dim": 96, "pq_bits": 5, "ratio": 10, "niter": 25}, + "file": "deep-100M/raft_ivf_pq/d96b5n50K", "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, - {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 2 }, + { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 2 }, + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 }, + { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 2 } + ] }, { - "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", + "name": "raft_ivf_pq.d64b5n50K", "algo": "raft_ivf_pq", - "build_param": { - "nlist": 1024, - "pq_dim": 128, - "ratio": 1, - "niter": 25 - }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float", + "build_param": {"nlist": 50000, "pq_dim": 64, "pq_bits": 5, "ratio": 10, "niter": 25}, + "file": "deep-100M/raft_ivf_pq/d64b5n50K", "search_params": [ - {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float"}, - {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float"}, - {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float"}, - {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float"}, - {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float"}, - {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float"} - ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float" + { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "float", "refine_ratio": 4 }, + { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 2000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 5000, "internalDistanceDtype": "float", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "half", "refine_ratio": 4 }, + { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 2000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 }, + { "nprobe": 5000, "internalDistanceDtype": "half", "smemLutDtype": "fp8", "refine_ratio": 4 } + ] }, { "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",