diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 00004c4e4d..107823d5ee 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -62,7 +62,7 @@ jobs: arch: "amd64" branch: ${{ inputs.branch }} build_type: ${{ inputs.build_type || 'branch' }} - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" date: ${{ inputs.date }} node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4437e0dc85..4fa3c5df86 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -62,7 +62,7 @@ jobs: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" wheel-build-pylibraft: needs: checks diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2a70632497..66862ada5e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,7 +43,7 @@ repos: additional_dependencies: [toml] args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.1 + rev: v16.0.6 hooks: - id: clang-format types_or: [c, c++, cuda] diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 6a7e319f5d..7a69b95da1 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -47,10 +47,6 @@ sed_runner 's/'"branch-.*\/RAPIDS.cmake"'/'"branch-${NEXT_SHORT_TAG}\/RAPIDS.cma sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pylibraft/__init__.py sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/raft_dask/__init__.py -# Python pyproject.toml updates -sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/pylibraft/pyproject.toml -sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/raft-dask/pyproject.toml - # Wheel testing script sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_raft_dask.sh @@ -74,6 +70,7 @@ for FILE in python/*/pyproject.toml; do for DEP in "${DEPENDENCIES[@]}"; do sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE} done + sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" "${FILE}" sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\"/g" ${FILE} done diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 7e921decd5..65b4232d83 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-python>=11.7.1,<12.0a0 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 2ea685b529..9db38ed1de 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - breathe - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-cudart-dev - cuda-profiler-api diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 742040ad50..5a9ef5bd32 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -9,8 +9,8 @@ channels: dependencies: - benchmark>=1.8.2 - c-compiler -- clang-tools=16.0.1 -- clang=16.0.1 +- clang-tools=16.0.6 +- clang=16.0.6 - cmake>=3.26.4 - cuda-profiler-api=11.8.86 - cuda-version=11.8 diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index 91d0fdb729..a2ab0af643 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -78,11 +78,11 @@ requirements: - h5py {{ h5py_version }} - benchmark - matplotlib - # rmm is needed to determine if package is gpu-enabled - - rmm ={{ minor_version }} - python - pandas - pyyaml + # rmm is needed to determine if package is gpu-enabled + - rmm ={{ minor_version }} run: - python @@ -104,6 +104,8 @@ requirements: - python - pandas - pyyaml + # rmm is needed to determine if package is gpu-enabled + - rmm ={{ minor_version }} about: home: https://rapids.ai/ license: Apache-2.0 diff --git a/cpp/bench/prims/matrix/select_k.cu b/cpp/bench/prims/matrix/select_k.cu index 992fda8a38..d3994e59c5 100644 --- a/cpp/bench/prims/matrix/select_k.cu +++ b/cpp/bench/prims/matrix/select_k.cu @@ -219,28 +219,28 @@ const std::vector kInputs{ RAFT_BENCH_REGISTER(SelectK, #KeyT "/" #IdxT "/" #A, kInputs); \ } -SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(float, uint32_t, kPublicApi); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(float, uint32_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpAuto); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(float, uint32_t, kWarpDistributedShm); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix8bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bits); // NOLINT SELECTION_REGISTER(double, uint32_t, kRadix11bitsExtraPass); // NOLINT SELECTION_REGISTER(double, uint32_t, kWarpAuto); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT -SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT -SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix8bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bits); // NOLINT +SELECTION_REGISTER(double, int64_t, kRadix11bitsExtraPass); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpImmediate); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpFiltered); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributed); // NOLINT +SELECTION_REGISTER(double, int64_t, kWarpDistributedShm); // NOLINT // For learning a heuristic of which selection algorithm to use, we // have a couple of additional constraints when generating the dataset: diff --git a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh index 866a0ebdfa..ade3a6e348 100644 --- a/cpp/include/raft/cluster/detail/kmeans_balanced.cuh +++ b/cpp/include/raft/cluster/detail/kmeans_balanced.cuh @@ -438,7 +438,7 @@ __global__ void __launch_bounds__((WarpSize * BlockDimY)) adjust_centers_kernel(MathT* centers, // [n_clusters, dim] IdxT n_clusters, IdxT dim, - const T* dataset, // [n_rows, dim] + const T* dataset, // [n_rows, dim] IdxT n_rows, const LabelT* labels, // [n_rows] const CounterT* cluster_sizes, // [n_clusters] diff --git a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp index 328080da1f..8e41aa96f3 100644 --- a/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp +++ b/cpp/include/raft/core/detail/mdspan_numpy_serializer.hpp @@ -75,7 +75,7 @@ namespace numpy_serializer { #if RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_LITTLE_ENDIAN_CHAR -#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 +#else // RAFT_SYSTEM_LITTLE_ENDIAN == 1 #define RAFT_NUMPY_HOST_ENDIAN_CHAR RAFT_NUMPY_BIG_ENDIAN_CHAR #endif // RAFT_SYSTEM_LITTLE_ENDIAN == 1 diff --git a/cpp/include/raft/core/detail/nvtx.hpp b/cpp/include/raft/core/detail/nvtx.hpp index e734c99029..f077a49b77 100644 --- a/cpp/include/raft/core/detail/nvtx.hpp +++ b/cpp/include/raft/core/detail/nvtx.hpp @@ -193,7 +193,7 @@ inline void pop_range() } // namespace raft::common::nvtx::detail -#else // NVTX_ENABLED +#else // NVTX_ENABLED namespace raft::common::nvtx::detail { diff --git a/cpp/include/raft/core/kvp.hpp b/cpp/include/raft/core/kvp.hpp index 2e0d1117a1..192d160d45 100644 --- a/cpp/include/raft/core/kvp.hpp +++ b/cpp/include/raft/core/kvp.hpp @@ -32,8 +32,8 @@ struct KeyValuePair { typedef _Key Key; ///< Key data type typedef _Value Value; ///< Value data type - Key key; ///< Item key - Value value; ///< Item value + Key key; ///< Item key + Value value; ///< Item value /// Constructor RAFT_INLINE_FUNCTION KeyValuePair() {} diff --git a/cpp/include/raft/core/resource/resource_types.hpp b/cpp/include/raft/core/resource/resource_types.hpp index 2dc4eb1f9d..8e331293bf 100644 --- a/cpp/include/raft/core/resource/resource_types.hpp +++ b/cpp/include/raft/core/resource/resource_types.hpp @@ -42,7 +42,7 @@ enum resource_type { THRUST_POLICY, // thrust execution policy WORKSPACE_RESOURCE, // rmm device memory resource - LAST_KEY // reserved for the last key + LAST_KEY // reserved for the last key }; /** diff --git a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h index 10827a8778..f659ed256d 100644 --- a/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h +++ b/cpp/include/raft/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast.h @@ -397,7 +397,7 @@ class EpilogueWithBroadcastCustom : public EpilogueBase diff --git a/cpp/include/raft/distance/detail/fused_l2_nn.cuh b/cpp/include/raft/distance/detail/fused_l2_nn.cuh index 68922943f4..f0f12acdb1 100644 --- a/cpp/include/raft/distance/detail/fused_l2_nn.cuh +++ b/cpp/include/raft/distance/detail/fused_l2_nn.cuh @@ -16,11 +16,11 @@ #pragma once -#include // size_t -#include // std::numeric_limits -#include // raft::KeyValuePair -#include // raft::identity_op -#include // ops::l2_exp_distance_op +#include // size_t +#include // std::numeric_limits +#include // raft::KeyValuePair +#include // raft::identity_op +#include // ops::l2_exp_distance_op #include #include // PairwiseDistances #include // Policy diff --git a/cpp/include/raft/distance/detail/masked_distance_base.cuh b/cpp/include/raft/distance/detail/masked_distance_base.cuh index 5a33c9ce4a..55da634145 100644 --- a/cpp/include/raft/distance/detail/masked_distance_base.cuh +++ b/cpp/include/raft/distance/detail/masked_distance_base.cuh @@ -217,7 +217,7 @@ struct MaskedDistances : public BaseClass { } // tile_idx_n } // idx_g rowEpilog_op(tile_idx_m); - } // tile_idx_m + } // tile_idx_m } private: diff --git a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh index 58b5daa8ca..c6b09be31e 100644 --- a/cpp/include/raft/distance/detail/pairwise_distance_base.cuh +++ b/cpp/include/raft/distance/detail/pairwise_distance_base.cuh @@ -18,7 +18,7 @@ #include // ceildiv #include // RAFT_CUDA_TRY -#include // size_t +#include // size_t namespace raft { namespace distance { diff --git a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh index dd58ab4328..e1dc6f9b37 100644 --- a/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/include/raft/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -45,7 +45,7 @@ void pairwise_matrix_dispatch(OpT distance_op, cudaStream_t stream, bool is_row_major) RAFT_EXPLICIT; -}; // namespace raft::distance::detail +}; // namespace raft::distance::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h index cd748b9e6b..951f8a0132 100644 --- a/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h +++ b/cpp/include/raft/distance/detail/predicated_tile_iterator_normvec.h @@ -57,8 +57,8 @@ namespace threadblock { /// /// Satisfies: ReadableTileIterator | PredicatedTileIterator | ForwardTileIterator /// -template diff --git a/cpp/include/raft/distance/distance-ext.cuh b/cpp/include/raft/distance/distance-ext.cuh index 3f7f2b0a23..7171ba605f 100644 --- a/cpp/include/raft/distance/distance-ext.cuh +++ b/cpp/include/raft/distance/distance-ext.cuh @@ -140,8 +140,8 @@ void pairwise_distance(raft::resources const& handle, raft::distance::DistanceType metric, Type metric_arg = 2.0f) RAFT_EXPLICIT; -}; // namespace distance -}; // namespace raft +}; // namespace distance +}; // namespace raft #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/linalg/add.cuh b/cpp/include/raft/linalg/add.cuh index 30f4a2d167..b2cd736c57 100644 --- a/cpp/include/raft/linalg/add.cuh +++ b/cpp/include/raft/linalg/add.cuh @@ -217,7 +217,7 @@ void add_scalar(raft::resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/binary_op.cuh b/cpp/include/raft/linalg/binary_op.cuh index f6889e959b..03beb1d1d1 100644 --- a/cpp/include/raft/linalg/binary_op.cuh +++ b/cpp/include/raft/linalg/binary_op.cuh @@ -82,7 +82,7 @@ void binary_op(raft::resources const& handle, InType in1, InType in2, OutType ou /** @} */ // end of group binary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/coalesced_reduction.cuh b/cpp/include/raft/linalg/coalesced_reduction.cuh index 5609656234..afa58d73fc 100644 --- a/cpp/include/raft/linalg/coalesced_reduction.cuh +++ b/cpp/include/raft/linalg/coalesced_reduction.cuh @@ -160,7 +160,7 @@ void coalesced_reduction(raft::resources const& handle, /** @} */ // end of group coalesced_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/contractions.cuh b/cpp/include/raft/linalg/contractions.cuh index 3b1e8c41c4..cb6488bedf 100644 --- a/cpp/include/raft/linalg/contractions.cuh +++ b/cpp/include/raft/linalg/contractions.cuh @@ -100,7 +100,7 @@ struct KernelPolicy { SmemSize = 2 * SmemPage * sizeof(DataT), }; // enum -}; // struct KernelPolicy +}; // struct KernelPolicy template struct ColKernelPolicy { diff --git a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp index 5a7356a4c2..d15e343c9a 100644 --- a/cpp/include/raft/linalg/detail/cublas_wrappers.hpp +++ b/cpp/include/raft/linalg/detail/cublas_wrappers.hpp @@ -550,7 +550,7 @@ cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - float* const A[], // NOLINT + float* const A[], // NOLINT int lda, int* P, int* info, @@ -564,7 +564,7 @@ inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT template <> inline cublasStatus_t cublasgetrfBatched(cublasHandle_t handle, // NOLINT int n, - double* const A[], // NOLINT + double* const A[], // NOLINT int lda, int* P, int* info, diff --git a/cpp/include/raft/linalg/divide.cuh b/cpp/include/raft/linalg/divide.cuh index d617b065da..17ec5c3136 100644 --- a/cpp/include/raft/linalg/divide.cuh +++ b/cpp/include/raft/linalg/divide.cuh @@ -96,7 +96,7 @@ void divide_scalar(raft::resources const& handle, /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/eig.cuh b/cpp/include/raft/linalg/eig.cuh index 954bf19334..57f3b61388 100644 --- a/cpp/include/raft/linalg/eig.cuh +++ b/cpp/include/raft/linalg/eig.cuh @@ -220,7 +220,7 @@ void eig_jacobi(raft::resources const& handle, /** @} */ // end of eig -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/gemv.cuh b/cpp/include/raft/linalg/gemv.cuh index 640964d018..610ea07f96 100644 --- a/cpp/include/raft/linalg/gemv.cuh +++ b/cpp/include/raft/linalg/gemv.cuh @@ -305,6 +305,6 @@ void gemv(raft::resources const& handle, } /** @} */ // end of gemv -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/lstsq.cuh b/cpp/include/raft/linalg/lstsq.cuh index 20588cbe17..21575d7806 100644 --- a/cpp/include/raft/linalg/lstsq.cuh +++ b/cpp/include/raft/linalg/lstsq.cuh @@ -245,7 +245,7 @@ void lstsq_qr(raft::resources const& handle, /** @} */ // end of lstsq -}; // namespace linalg -}; // namespace raft +}; // namespace linalg +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/matrix_vector_op.cuh b/cpp/include/raft/linalg/matrix_vector_op.cuh index e620d227eb..a65f6ed390 100644 --- a/cpp/include/raft/linalg/matrix_vector_op.cuh +++ b/cpp/include/raft/linalg/matrix_vector_op.cuh @@ -240,7 +240,7 @@ void matrix_vector_op(raft::resources const& handle, /** @} */ // end of group matrix_vector_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/mean_squared_error.cuh b/cpp/include/raft/linalg/mean_squared_error.cuh index d45f11524d..b59a0fcef7 100644 --- a/cpp/include/raft/linalg/mean_squared_error.cuh +++ b/cpp/include/raft/linalg/mean_squared_error.cuh @@ -79,7 +79,7 @@ void mean_squared_error(raft::resources const& handle, /** @} */ // end of group mean_squared_error -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/multiply.cuh b/cpp/include/raft/linalg/multiply.cuh index 3ade108235..9973a3cc6c 100644 --- a/cpp/include/raft/linalg/multiply.cuh +++ b/cpp/include/raft/linalg/multiply.cuh @@ -98,7 +98,7 @@ void multiply_scalar( /** @} */ // end of group multiply -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/power.cuh b/cpp/include/raft/linalg/power.cuh index 26ac1035ca..5c7dcbd5cf 100644 --- a/cpp/include/raft/linalg/power.cuh +++ b/cpp/include/raft/linalg/power.cuh @@ -154,7 +154,7 @@ void power_scalar( /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce.cuh b/cpp/include/raft/linalg/reduce.cuh index a3d0ef71d0..3181dd0224 100644 --- a/cpp/include/raft/linalg/reduce.cuh +++ b/cpp/include/raft/linalg/reduce.cuh @@ -162,7 +162,7 @@ void reduce(raft::resources const& handle, /** @} */ // end of group reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_cols_by_key.cuh b/cpp/include/raft/linalg/reduce_cols_by_key.cuh index 6eaf1e2ba7..5ed0fb7407 100644 --- a/cpp/include/raft/linalg/reduce_cols_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_cols_by_key.cuh @@ -113,7 +113,7 @@ void reduce_cols_by_key( /** @} */ // end of group reduce_cols_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/reduce_rows_by_key.cuh b/cpp/include/raft/linalg/reduce_rows_by_key.cuh index fa624b2191..7d93c3946f 100644 --- a/cpp/include/raft/linalg/reduce_rows_by_key.cuh +++ b/cpp/include/raft/linalg/reduce_rows_by_key.cuh @@ -192,7 +192,7 @@ void reduce_rows_by_key( /** @} */ // end of group reduce_rows_by_key -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/rsvd.cuh b/cpp/include/raft/linalg/rsvd.cuh index 2dece5b957..163f360481 100644 --- a/cpp/include/raft/linalg/rsvd.cuh +++ b/cpp/include/raft/linalg/rsvd.cuh @@ -876,7 +876,7 @@ void randomized_svd(const raft::resources& handle, /** @} */ // end of group rsvd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/sqrt.cuh b/cpp/include/raft/linalg/sqrt.cuh index 99754c4eb2..81b7ab7dec 100644 --- a/cpp/include/raft/linalg/sqrt.cuh +++ b/cpp/include/raft/linalg/sqrt.cuh @@ -84,7 +84,7 @@ void sqrt(raft::resources const& handle, InType in, OutType out) /** @} */ // end of group add -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/strided_reduction.cuh b/cpp/include/raft/linalg/strided_reduction.cuh index f971d0e40b..c7ff000e00 100644 --- a/cpp/include/raft/linalg/strided_reduction.cuh +++ b/cpp/include/raft/linalg/strided_reduction.cuh @@ -171,7 +171,7 @@ void strided_reduction(raft::resources const& handle, /** @} */ // end of group strided_reduction -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/subtract.cuh b/cpp/include/raft/linalg/subtract.cuh index 688e60a806..f4243f9dc1 100644 --- a/cpp/include/raft/linalg/subtract.cuh +++ b/cpp/include/raft/linalg/subtract.cuh @@ -223,7 +223,7 @@ void subtract_scalar( /** @} */ // end of group subtract -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/svd.cuh b/cpp/include/raft/linalg/svd.cuh index 08f9462ba9..f7071de75b 100644 --- a/cpp/include/raft/linalg/svd.cuh +++ b/cpp/include/raft/linalg/svd.cuh @@ -416,7 +416,7 @@ void svd_reconstruction(raft::resources const& handle, /** @} */ // end of group svd -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/linalg/ternary_op.cuh b/cpp/include/raft/linalg/ternary_op.cuh index f46133abd9..67b04c6791 100644 --- a/cpp/include/raft/linalg/ternary_op.cuh +++ b/cpp/include/raft/linalg/ternary_op.cuh @@ -83,7 +83,7 @@ void ternary_op( /** @} */ // end of group ternary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/transpose.cuh b/cpp/include/raft/linalg/transpose.cuh index afe1962223..1b46082fbe 100644 --- a/cpp/include/raft/linalg/transpose.cuh +++ b/cpp/include/raft/linalg/transpose.cuh @@ -103,7 +103,7 @@ auto transpose(raft::resources const& handle, /** @} */ // end of group transpose -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/linalg/unary_op.cuh b/cpp/include/raft/linalg/unary_op.cuh index 47a432f415..5ebe27923a 100644 --- a/cpp/include/raft/linalg/unary_op.cuh +++ b/cpp/include/raft/linalg/unary_op.cuh @@ -125,7 +125,7 @@ void write_only_unary_op(const raft::resources& handle, OutType out, Lambda op) /** @} */ // end of group unary_op -}; // end namespace linalg -}; // end namespace raft +}; // end namespace linalg +}; // end namespace raft #endif diff --git a/cpp/include/raft/matrix/col_wise_sort.cuh b/cpp/include/raft/matrix/col_wise_sort.cuh index 887741ad71..c94b2506d3 100644 --- a/cpp/include/raft/matrix/col_wise_sort.cuh +++ b/cpp/include/raft/matrix/col_wise_sort.cuh @@ -134,6 +134,6 @@ void sort_cols_per_row(Args... args) /** @} */ // end of group col_wise_sort -}; // end namespace raft::matrix +}; // end namespace raft::matrix #endif \ No newline at end of file diff --git a/cpp/include/raft/matrix/detail/select_k-ext.cuh b/cpp/include/raft/matrix/detail/select_k-ext.cuh index f934d7e3b4..870f0c3240 100644 --- a/cpp/include/raft/matrix/detail/select_k-ext.cuh +++ b/cpp/include/raft/matrix/detail/select_k-ext.cuh @@ -16,8 +16,8 @@ #pragma once -#include // uint32_t -#include // __half +#include // uint32_t +#include // __half #include #include // RAFT_EXPLICIT #include // rmm:cuda_stream_view diff --git a/cpp/include/raft/matrix/detail/select_warpsort.cuh b/cpp/include/raft/matrix/detail/select_warpsort.cuh index dc86a04733..2927604e7d 100644 --- a/cpp/include/raft/matrix/detail/select_warpsort.cuh +++ b/cpp/include/raft/matrix/detail/select_warpsort.cuh @@ -959,7 +959,7 @@ void calc_launch_parameter( if (batch_size >= size_t(another_min_grid_size) // still have enough work && another_block_size < block_size // protect against an infinite loop && another_min_grid_size * another_block_size > - min_grid_size * block_size // improve occupancy + min_grid_size * block_size // improve occupancy ) { block_size = another_block_size; min_grid_size = another_min_grid_size; diff --git a/cpp/include/raft/neighbors/ann_types.hpp b/cpp/include/raft/neighbors/ann_types.hpp index 469d3c09d4..c17be4a8ff 100644 --- a/cpp/include/raft/neighbors/ann_types.hpp +++ b/cpp/include/raft/neighbors/ann_types.hpp @@ -49,4 +49,4 @@ struct search_params {}; /** @} */ // end group ann_types -}; // namespace raft::neighbors::ann +}; // namespace raft::neighbors::ann diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index d19d7e7904..80e964df57 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,7 @@ void build_knn_graph(raft::resources const& res, std::optional build_params = std::nullopt, std::optional search_params = std::nullopt) { + resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::build"); RAFT_EXPECTS(!build_params || build_params->metric == distance::DistanceType::L2Expanded, "Currently only L2Expanded metric is supported"); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 8190817b5b..b484fa55f9 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -22,6 +22,8 @@ #include #include +#include +#include #include #include #include @@ -60,17 +62,21 @@ void search_main(raft::resources const& res, raft::device_matrix_view neighbors, raft::device_matrix_view distances) { + resource::detail::warn_non_pool_workspace(res, "raft::neighbors::cagra::search"); RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", static_cast(index.dataset().extent(0)), static_cast(index.dataset().extent(1))); RAFT_LOG_DEBUG("# query size = %lu, dim = %lu\n", static_cast(queries.extent(0)), static_cast(queries.extent(1))); - RAFT_EXPECTS(queries.extent(1) == index.dim(), "Querise and index dim must match"); + RAFT_EXPECTS(queries.extent(1) == index.dim(), "Queries and index dim must match"); const uint32_t topk = neighbors.extent(1); if (params.max_queries == 0) { params.max_queries = queries.extent(0); } + common::nvtx::range fun_scope( + "cagra::search(max_queries = %u, k = %u, dim = %zu)", params.max_queries, topk, index.dim()); + std::unique_ptr> plan = factory::create( res, params, index.dim(), index.graph_degree(), topk); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 7ad44482e7..8261f637e1 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include @@ -54,6 +55,8 @@ void serialize(raft::resources const& res, const index& index_, bool include_dataset) { + common::nvtx::range fun_scope("cagra::serialize"); + RAFT_LOG_DEBUG( "Saving CAGRA index, size %zu, dim %u", static_cast(index_.size()), index_.dim()); @@ -113,6 +116,8 @@ void serialize(raft::resources const& res, template auto deserialize(raft::resources const& res, std::istream& is) -> index { + common::nvtx::range fun_scope("cagra::deserialize"); + char dtype_string[4]; is.read(dtype_string, 4); diff --git a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp index 2758148942..47e976e252 100644 --- a/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/compute_distance.hpp @@ -53,7 +53,7 @@ _RAFT_DEVICE void compute_distance_to_random_nodes( INDEX_T* const result_indices_ptr, // [num_pickup] DISTANCE_T* const result_distances_ptr, // [num_pickup] const float* const query_buffer, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] + const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] const std::size_t dataset_dim, const std::size_t dataset_size, const std::size_t dataset_ld, diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index 314ab6e6a6..6ea1e34032 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -190,9 +190,9 @@ struct search : public search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 0015b4a791..4fc051ac09 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -48,7 +48,7 @@ namespace multi_cta_search { template __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [search_width] const uint32_t search_width, - INDEX_T* const itopk_indices, // [num_itopk] + INDEX_T* const itopk_indices, // [num_itopk] const size_t num_itopk, uint32_t* const terminate_flag) { @@ -86,8 +86,8 @@ __device__ void pickup_next_parents(INDEX_T* const next_parent_indices, // [sea } template -__device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] - INDEX_T* indices, // [num_elements] +__device__ inline void topk_by_bitonic_sort(float* distances, // [num_elements] + INDEX_T* indices, // [num_elements] const uint32_t num_elements, const uint32_t num_itopk // num_itopk <= num_elements ) @@ -144,7 +144,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel( const uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const uint32_t hash_bitlen, @@ -454,9 +454,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index e664764721..f312226f42 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -98,7 +98,7 @@ __global__ void random_pickup_kernel( const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const result_indices_ptr, // [num_queries, ldr] DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] @@ -170,7 +170,7 @@ void random_pickup(const DATA_T* const dataset_ptr, // [dataset_size, dataset_d const std::size_t num_pickup, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const result_indices_ptr, // [num_queries, ldr] DISTANCE_T* const result_distances_ptr, // [num_queries, ldr] @@ -310,18 +310,18 @@ template = search_width * graph_degree + INDEX_T* const result_indices_ptr, // [num_queries, ldd] + DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd // (*) ldd >= search_width * graph_degree ) { const uint32_t ldb = hashmap::get_size(hash_bitlen); @@ -371,19 +371,19 @@ template = search_width * graph_degree + INDEX_T* const result_indices_ptr, // [num_queries, ldd] + DISTANCE_T* const result_distances_ptr, // [num_queries, ldd] + const std::uint32_t ldd, // (*) ldd >= search_width * graph_degree cudaStream_t cuda_stream = 0) { const auto block_size = 128; @@ -437,7 +437,7 @@ void remove_parent_bit(const std::uint32_t num_queries, } template -__global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] +__global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, const T* const src, // [batch_size, ld_src] const uint64_t ld_src, @@ -452,7 +452,7 @@ __global__ void batched_memcpy_kernel(T* const dst, // [batch_size, ld_ds } template -void batched_memcpy(T* const dst, // [batch_size, ld_dst] +void batched_memcpy(T* const dst, // [batch_size, ld_dst] const uint64_t ld_dst, const T* const src, // [batch_size, ld_src] const uint64_t ld_src, @@ -596,9 +596,9 @@ struct search : search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index e6966987c8..33c77db61e 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -111,7 +111,7 @@ struct search_plan_impl : public search_plan_impl_base { DISTANCE_T* const result_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const std::uint32_t num_queries, - const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] uint32_t topk){}; diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index 96de83369d..45dd535e1d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -228,9 +228,9 @@ struct search : search_plan_impl { void operator()(raft::resources const& res, raft::device_matrix_view dataset, raft::device_matrix_view graph, - INDEX_T* const result_indices_ptr, // [num_queries, topk] - DISTANCE_T* const result_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const result_indices_ptr, // [num_queries, topk] + DISTANCE_T* const result_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const std::uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] std::uint32_t* const num_executed_iterations, // [num_queries] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index f7c43fe11c..5f5df1a818 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -29,9 +29,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index 31d9c9fffa..81325fd5da 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -192,8 +192,8 @@ __device__ inline void topk_by_bitonic_sort_1st(float* candidate_distances, // } template -__device__ inline void topk_by_bitonic_sort_2nd(float* itopk_distances, // [num_itopk] - IdxT* itopk_indices, // [num_itopk] +__device__ inline void topk_by_bitonic_sort_2nd(float* itopk_distances, // [num_itopk] + IdxT* itopk_indices, // [num_itopk] const std::uint32_t num_itopk, float* candidate_distances, // [num_candidates] IdxT* candidate_indices, // [num_candidates] @@ -401,8 +401,8 @@ template -__device__ void topk_by_bitonic_sort(float* itopk_distances, // [num_itopk] - IdxT* itopk_indices, // [num_itopk] +__device__ void topk_by_bitonic_sort(float* itopk_distances, // [num_itopk] + IdxT* itopk_indices, // [num_itopk] const std::uint32_t num_itopk, float* candidate_distances, // [num_candidates] IdxT* candidate_indices, // [num_candidates] @@ -463,7 +463,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ void search_kernel(INDEX_T* const result_indices_ptr, // [num_queries, top_k] DISTANCE_T* const result_distances_ptr, // [num_queries, top_k] const std::uint32_t top_k, - const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] + const DATA_T* const dataset_ptr, // [dataset_size, dataset_dim] const std::size_t dataset_dim, const std::size_t dataset_size, const std::size_t dataset_ld, // stride of dataset @@ -472,7 +472,7 @@ __launch_bounds__(BLOCK_SIZE, BLOCK_COUNT) __global__ const std::uint32_t graph_degree, const unsigned num_distilation, const uint64_t rand_xor_mask, - const INDEX_T* seed_ptr, // [num_queries, num_seeds] + const INDEX_T* seed_ptr, // [num_queries, num_seeds] const uint32_t num_seeds, INDEX_T* const visited_hashmap_ptr, // [num_queries, 1 << hash_bitlen] const std::uint32_t internal_topk, @@ -830,9 +830,9 @@ template dataset, raft::device_matrix_view graph, - INDEX_T* const topk_indices_ptr, // [num_queries, topk] - DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] - const DATA_T* const queries_ptr, // [num_queries, dataset_dim] + INDEX_T* const topk_indices_ptr, // [num_queries, topk] + DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, const INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] diff --git a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh index dd73558f86..0fcfe2cc16 100644 --- a/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/topk_for_cagra/topk_core.cuh @@ -183,9 +183,9 @@ __device__ inline void update_histogram(int itr, uint32_t threshold, uint32_t& num_bins, uint32_t& shift, - const T* x, // [nx,] + const T* x, // [nx,] uint32_t nx, - uint32_t* hist, // [num_bins] + uint32_t* hist, // [num_bins] uint8_t* state, uint32_t* output, // [topk] uint32_t* output_count) @@ -761,16 +761,16 @@ __launch_bounds__(1024, 1) __global__ void kern_topk_cta_11(uint32_t topk, uint32_t size_batch, uint32_t len_x, - const uint32_t* _x, // [size_batch, ld_x,] + const uint32_t* _x, // [size_batch, ld_x,] uint32_t ld_x, const ValT* _in_vals, // [size_batch, ld_iv,] uint32_t ld_iv, - uint32_t* _y, // [size_batch, ld_y,] + uint32_t* _y, // [size_batch, ld_y,] uint32_t ld_y, - ValT* _out_vals, // [size_batch, ld_ov,] + ValT* _out_vals, // [size_batch, ld_ov,] uint32_t ld_ov, - uint8_t* _state, // [size_batch, ...,] - uint32_t* _hints, // [size_batch,] + uint8_t* _state, // [size_batch, ...,] + uint32_t* _hints, // [size_batch,] bool sort) { const uint32_t i_batch = blockIdx.x; diff --git a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh index 93eeb0dead..c0f856103a 100644 --- a/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_flat_search-inl.cuh @@ -16,7 +16,7 @@ #pragma once -#include // RAFT_LOG_TRACE +#include // RAFT_LOG_TRACE #include #include // raft::resources #include // is_min_close, DistanceType diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh index 199cb74fbe..47c10de200 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_build.cuh @@ -220,7 +220,7 @@ void select_residuals(raft::resources const& handle, template void flat_compute_residuals( raft::resources const& handle, - float* residuals, // [n_rows, rot_dim] + float* residuals, // [n_rows, rot_dim] IdxT n_rows, device_matrix_view rotation_matrix, // [rot_dim, dim] device_matrix_view centers, // [n_lists, dim_ext] diff --git a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh index 2ab216b13b..7c5b523a8b 100644 --- a/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh +++ b/cpp/include/raft/neighbors/detail/ivf_pq_compute_similarity-inl.cuh @@ -898,7 +898,7 @@ auto compute_similarity_select(const cudaDeviceProp& dev_props, } { - if (selected_perf.occupancy <= 0.0 // no candidate yet + if (selected_perf.occupancy <= 0.0 // no candidate yet || (selected_perf.occupancy < cur.occupancy * kTargetOccupancy && selected_perf.shmem_use >= cur.shmem_use) // much improved occupancy ) { diff --git a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp index 3ce2dc3eb5..2a863b47b3 100644 --- a/cpp/include/raft/neighbors/detail/refine_host-ext.hpp +++ b/cpp/include/raft/neighbors/detail/refine_host-ext.hpp @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::host_matrix_view #include // raft::distance::DistanceType diff --git a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh index c000a4810b..a6ed17e251 100644 --- a/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh +++ b/cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh @@ -36,7 +36,7 @@ void select_k(const key_t* inK, bool select_min, int k, cudaStream_t stream) RAFT_EXPLICIT; -}; // namespace raft::neighbors::detail +}; // namespace raft::neighbors::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/neighbors/ivf_flat-ext.cuh b/cpp/include/raft/neighbors/ivf_flat-ext.cuh index 848703c9b5..8dbe7587ff 100644 --- a/cpp/include/raft/neighbors/ivf_flat-ext.cuh +++ b/cpp/include/raft/neighbors/ivf_flat-ext.cuh @@ -16,10 +16,10 @@ #pragma once -#include // int64_t +#include // int64_t -#include // raft::device_matrix_view -#include // raft::resources +#include // raft::device_matrix_view +#include // raft::resources #include #include // raft::neighbors::ivf_flat::index #include // RAFT_EXPLICIT diff --git a/cpp/include/raft/neighbors/ivf_pq-ext.cuh b/cpp/include/raft/neighbors/ivf_pq-ext.cuh index fcfe837e2d..4a60cfc09d 100644 --- a/cpp/include/raft/neighbors/ivf_pq-ext.cuh +++ b/cpp/include/raft/neighbors/ivf_pq-ext.cuh @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::device_matrix_view #include // raft::resources diff --git a/cpp/include/raft/neighbors/refine-ext.cuh b/cpp/include/raft/neighbors/refine-ext.cuh index c1fd4676dc..3ccd3891b7 100644 --- a/cpp/include/raft/neighbors/refine-ext.cuh +++ b/cpp/include/raft/neighbors/refine-ext.cuh @@ -16,7 +16,7 @@ #pragma once -#include // int64_t +#include // int64_t #include // raft::device_matrix_view #include // // raft::host_matrix_view diff --git a/cpp/include/raft/sparse/distance/distance.cuh b/cpp/include/raft/sparse/distance/distance.cuh index b60940341a..702846f586 100644 --- a/cpp/include/raft/sparse/distance/distance.cuh +++ b/cpp/include/raft/sparse/distance/distance.cuh @@ -218,8 +218,8 @@ void pairwise_distance(raft::resources const& handle, /** @} */ // end of sparse_distance -}; // namespace distance -}; // namespace sparse -}; // namespace raft +}; // namespace distance +}; // namespace sparse +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/sparse/linalg/detail/norm.cuh b/cpp/include/raft/sparse/linalg/detail/norm.cuh index 3cb4a3e353..56ca2ebfa7 100644 --- a/cpp/include/raft/sparse/linalg/detail/norm.cuh +++ b/cpp/include/raft/sparse/linalg/detail/norm.cuh @@ -49,10 +49,10 @@ __global__ void csr_row_normalize_l1_kernel( // over each row and then divide the values in parallel. const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result) -{ // output array +{ // output array // row-based matrix 1 thread per row int row = (blockIdx.x * TPB_X) + threadIdx.x; @@ -95,8 +95,8 @@ __global__ void csr_row_normalize_l1_kernel( template void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result, cudaStream_t stream) { // output array @@ -115,10 +115,10 @@ __global__ void csr_row_normalize_max_kernel( // over each row and then divide the values in parallel. const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result) -{ // output array +{ // output array // row-based matrix 1 thread per row int row = (blockIdx.x * TPB_X) + threadIdx.x; @@ -163,8 +163,8 @@ __global__ void csr_row_normalize_max_kernel( template void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result, cudaStream_t stream) { diff --git a/cpp/include/raft/sparse/linalg/detail/spectral.cuh b/cpp/include/raft/sparse/linalg/detail/spectral.cuh index 545f218e63..51836ca9aa 100644 --- a/cpp/include/raft/sparse/linalg/detail/spectral.cuh +++ b/cpp/include/raft/sparse/linalg/detail/spectral.cuh @@ -69,7 +69,7 @@ void fit_embedding(raft::resources const& handle, handle, ro, ci, vs, n, nnz}; index_type neigvs = n_components + 1; - index_type maxiter = 4000; // default reset value (when set to 0); + index_type maxiter = 4000; // default reset value (when set to 0); value_type tol = 0.01; index_type restart_iter = 15 + neigvs; // what cugraph is using diff --git a/cpp/include/raft/sparse/linalg/norm.cuh b/cpp/include/raft/sparse/linalg/norm.cuh index f7ebf50db0..43dd182fe5 100644 --- a/cpp/include/raft/sparse/linalg/norm.cuh +++ b/cpp/include/raft/sparse/linalg/norm.cuh @@ -39,8 +39,8 @@ namespace linalg { template void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num rows in csr + int nnz, // array of values and number of non-zeros + int m, // num rows in csr T* result, cudaStream_t stream) { // output array @@ -60,8 +60,8 @@ void csr_row_normalize_l1(const int* ia, // csr row ex_scan (sorted by row) template void csr_row_normalize_max(const int* ia, // csr row ind array (sorted by row) const T* vals, - int nnz, // array of values and number of non-zeros - int m, // num total rows in csr + int nnz, // array of values and number of non-zeros + int m, // num total rows in csr T* result, cudaStream_t stream) { diff --git a/cpp/include/raft/sparse/neighbors/detail/knn.cuh b/cpp/include/raft/sparse/neighbors/detail/knn.cuh index f2be427367..ff644c000e 100644 --- a/cpp/include/raft/sparse/neighbors/detail/knn.cuh +++ b/cpp/include/raft/sparse/neighbors/detail/knn.cuh @@ -58,7 +58,7 @@ struct csr_batcher_t { void set_batch(int batch_num) { batch_start_ = batch_num * batch_size_; - batch_stop_ = batch_start_ + batch_size_ - 1; // zero-based indexing + batch_stop_ = batch_start_ + batch_size_ - 1; // zero-based indexing if (batch_stop_ >= total_rows_) batch_stop_ = total_rows_ - 1; // zero-based indexing diff --git a/cpp/include/raft/sparse/solver/mst_solver.cuh b/cpp/include/raft/sparse/solver/mst_solver.cuh index bfedb9ce2a..193431251f 100644 --- a/cpp/include/raft/sparse/solver/mst_solver.cuh +++ b/cpp/include/raft/sparse/solver/mst_solver.cuh @@ -78,10 +78,10 @@ class MST_solver { rmm::device_uvector altered_weights; // weights to be used for mst rmm::device_scalar mst_edge_count; // total number of edges added after every iteration rmm::device_scalar - prev_mst_edge_count; // total number of edges up to the previous iteration - rmm::device_uvector mst_edge; // mst output - true if the edge belongs in mst - rmm::device_uvector next_color; // next iteration color - rmm::device_uvector color; // index of color that vertex points to + prev_mst_edge_count; // total number of edges up to the previous iteration + rmm::device_uvector mst_edge; // mst output - true if the edge belongs in mst + rmm::device_uvector next_color; // next iteration color + rmm::device_uvector color; // index of color that vertex points to // new src-dst pairs found per iteration rmm::device_uvector temp_src; diff --git a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh index 95aeca64e5..70c5cec23f 100644 --- a/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh +++ b/cpp/include/raft/spatial/knn/detail/ball_cover/registers-ext.cuh @@ -61,7 +61,7 @@ void rbc_low_dim_pass_two(raft::resources const& handle, float weight, value_int* post_dists_counter) RAFT_EXPLICIT; -}; // namespace raft::spatial::knn::detail +}; // namespace raft::spatial::knn::detail #endif // RAFT_EXPLICIT_INSTANTIATE_ONLY diff --git a/cpp/include/raft/stats/adjusted_rand_index.cuh b/cpp/include/raft/stats/adjusted_rand_index.cuh index 7daa1792b1..1f97cd5f76 100644 --- a/cpp/include/raft/stats/adjusted_rand_index.cuh +++ b/cpp/include/raft/stats/adjusted_rand_index.cuh @@ -83,7 +83,7 @@ double adjusted_rand_index(raft::resources const& handle, /** @} */ // end group stats_adj_rand_index -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/completeness_score.cuh b/cpp/include/raft/stats/completeness_score.cuh index 07fd61411d..b669e0de32 100644 --- a/cpp/include/raft/stats/completeness_score.cuh +++ b/cpp/include/raft/stats/completeness_score.cuh @@ -85,7 +85,7 @@ double completeness_score(raft::resources const& handle, /** @} */ // end group stats_completeness -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/cov.cuh b/cpp/include/raft/stats/cov.cuh index 67f44b0fde..ad5d233c0e 100644 --- a/cpp/include/raft/stats/cov.cuh +++ b/cpp/include/raft/stats/cov.cuh @@ -116,7 +116,7 @@ void cov(raft::resources const& handle, /** @} */ // end group stats_cov -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/entropy.cuh b/cpp/include/raft/stats/entropy.cuh index fcc49fefd2..fe432569ee 100644 --- a/cpp/include/raft/stats/entropy.cuh +++ b/cpp/include/raft/stats/entropy.cuh @@ -80,7 +80,7 @@ double entropy(raft::resources const& handle, /** @} */ // end group stats_entropy -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/histogram.cuh b/cpp/include/raft/stats/histogram.cuh index c374251359..8480d16316 100644 --- a/cpp/include/raft/stats/histogram.cuh +++ b/cpp/include/raft/stats/histogram.cuh @@ -115,7 +115,7 @@ void histogram(raft::resources const& handle, /** @} */ // end group stats_histogram -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif diff --git a/cpp/include/raft/stats/homogeneity_score.cuh b/cpp/include/raft/stats/homogeneity_score.cuh index ce7872d55d..311cd599f8 100644 --- a/cpp/include/raft/stats/homogeneity_score.cuh +++ b/cpp/include/raft/stats/homogeneity_score.cuh @@ -88,7 +88,7 @@ double homogeneity_score(raft::resources const& handle, /** @} */ // end group stats_homogeneity_score -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/kl_divergence.cuh b/cpp/include/raft/stats/kl_divergence.cuh index 2e01918d2a..b417cbd509 100644 --- a/cpp/include/raft/stats/kl_divergence.cuh +++ b/cpp/include/raft/stats/kl_divergence.cuh @@ -76,7 +76,7 @@ value_t kl_divergence(raft::resources const& handle, /** @} */ // end group kl_divergence -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif diff --git a/cpp/include/raft/stats/mean.cuh b/cpp/include/raft/stats/mean.cuh index 96c9ca3b5c..43d39cfd6c 100644 --- a/cpp/include/raft/stats/mean.cuh +++ b/cpp/include/raft/stats/mean.cuh @@ -93,7 +93,7 @@ void mean(raft::resources const& handle, /** @} */ // end group stats_mean -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/mean_center.cuh b/cpp/include/raft/stats/mean_center.cuh index 48f5eb667f..83f9a8a941 100644 --- a/cpp/include/raft/stats/mean_center.cuh +++ b/cpp/include/raft/stats/mean_center.cuh @@ -160,7 +160,7 @@ void mean_add(raft::resources const& handle, /** @} */ // end group stats_mean_center -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/meanvar.cuh b/cpp/include/raft/stats/meanvar.cuh index f6127df701..5c27a6caf6 100644 --- a/cpp/include/raft/stats/meanvar.cuh +++ b/cpp/include/raft/stats/meanvar.cuh @@ -107,6 +107,6 @@ void meanvar(raft::resources const& handle, /** @} */ // end group stats_mean_var -}; // namespace raft::stats +}; // namespace raft::stats #endif diff --git a/cpp/include/raft/stats/minmax.cuh b/cpp/include/raft/stats/minmax.cuh index 0c5a62257d..5c5ff346a4 100644 --- a/cpp/include/raft/stats/minmax.cuh +++ b/cpp/include/raft/stats/minmax.cuh @@ -139,6 +139,6 @@ void minmax(raft::resources const& handle, /** @} */ // end group stats_minmax -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/mutual_info_score.cuh b/cpp/include/raft/stats/mutual_info_score.cuh index 5c4ae43e09..5a334e9280 100644 --- a/cpp/include/raft/stats/mutual_info_score.cuh +++ b/cpp/include/raft/stats/mutual_info_score.cuh @@ -86,7 +86,7 @@ double mutual_info_score(raft::resources const& handle, /** @} */ // end group stats_mutual_info -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/rand_index.cuh b/cpp/include/raft/stats/rand_index.cuh index 6a208c5492..a21a0c0dc5 100644 --- a/cpp/include/raft/stats/rand_index.cuh +++ b/cpp/include/raft/stats/rand_index.cuh @@ -72,7 +72,7 @@ double rand_index(raft::resources const& handle, /** @} */ // end group stats_rand_index -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/stddev.cuh b/cpp/include/raft/stats/stddev.cuh index 6349f8fd11..0a67bd2325 100644 --- a/cpp/include/raft/stats/stddev.cuh +++ b/cpp/include/raft/stats/stddev.cuh @@ -182,7 +182,7 @@ void vars(raft::resources const& handle, /** @} */ // end group stats_variance -}; // namespace stats -}; // namespace raft +}; // namespace stats +}; // namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/sum.cuh b/cpp/include/raft/stats/sum.cuh index 2ac9cd9eb5..2c3ed1b83e 100644 --- a/cpp/include/raft/stats/sum.cuh +++ b/cpp/include/raft/stats/sum.cuh @@ -85,7 +85,7 @@ void sum(raft::resources const& handle, /** @} */ // end group stats_sum -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/v_measure.cuh b/cpp/include/raft/stats/v_measure.cuh index 8ea5c65600..041adb5e38 100644 --- a/cpp/include/raft/stats/v_measure.cuh +++ b/cpp/include/raft/stats/v_measure.cuh @@ -92,7 +92,7 @@ double v_measure(raft::resources const& handle, /** @} */ // end group stats_vmeasure -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/stats/weighted_mean.cuh b/cpp/include/raft/stats/weighted_mean.cuh index 7d06d5dff1..da22f0163c 100644 --- a/cpp/include/raft/stats/weighted_mean.cuh +++ b/cpp/include/raft/stats/weighted_mean.cuh @@ -186,7 +186,7 @@ void col_weighted_mean(raft::resources const& handle, /** @} */ // end group stats_weighted_mean -}; // end namespace stats -}; // end namespace raft +}; // end namespace stats +}; // end namespace raft #endif \ No newline at end of file diff --git a/cpp/include/raft/util/cache.cuh b/cpp/include/raft/util/cache.cuh index a9cfe64568..255dea16bb 100644 --- a/cpp/include/raft/util/cache.cuh +++ b/cpp/include/raft/util/cache.cuh @@ -362,9 +362,9 @@ class Cache { int GetSize() const { return cached_keys.size(); } protected: - int n_vec; //!< Number of elements in a cached vector - float cache_size; //!< in MiB - int n_cache_sets; //!< number of cache sets + int n_vec; //!< Number of elements in a cached vector + float cache_size; //!< in MiB + int n_cache_sets; //!< number of cache sets const int TPB = 256; //!< threads per block for kernel launch int n_iter = 0; //!< Counter for time stamping cache operation diff --git a/cpp/include/raft/util/cache_util.cuh b/cpp/include/raft/util/cache_util.cuh index a7dcc22b02..79a94d9563 100644 --- a/cpp/include/raft/util/cache_util.cuh +++ b/cpp/include/raft/util/cache_util.cuh @@ -46,7 +46,7 @@ __global__ void get_vecs( const math_t* cache, int_t n_vec, const idx_t* cache_idx, int_t n, math_t* out) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - int row = tid % n_vec; // row idx + int row = tid % n_vec; // row idx if (tid < n_vec * n) { size_t out_col = tid / n_vec; // col idx size_t cache_col = cache_idx[out_col]; @@ -93,7 +93,7 @@ __global__ void store_vecs(const math_t* tile, int n_cache_vecs) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - int row = tid % n_vec; // row idx + int row = tid % n_vec; // row idx if (tid < n_vec * n) { int tile_col = tid / n_vec; // col idx int data_col = tile_idx ? tile_idx[tile_col] : tile_col; @@ -357,7 +357,7 @@ __global__ void get_cache_idx(int* keys, cache_time[cidx] = time; // update time stamp cache_idx[tid] = cidx; // exact cache idx } else { - cache_idx[tid] = sidx; // assign cache set + cache_idx[tid] = sidx; // assign cache set } } } diff --git a/cpp/include/raft/util/device_loads_stores.cuh b/cpp/include/raft/util/device_loads_stores.cuh index e3d54c51f5..65936b2f66 100644 --- a/cpp/include/raft/util/device_loads_stores.cuh +++ b/cpp/include/raft/util/device_loads_stores.cuh @@ -16,7 +16,7 @@ #pragma once -#include // uintX_t +#include // uintX_t #include #include // DI diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index 49f96aa18b..3d8bbcec4a 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,7 +28,7 @@ import subprocess -EXPECTED_VERSIONS = ("11.1.0",) +EXPECTED_VERSIONS = ("16.0.6",) VERSION_REGEX = re.compile(r"clang version ([0-9.]+)") CMAKE_COMPILER_REGEX = re.compile( r"^\s*CMAKE_CXX_COMPILER:FILEPATH=(.+)\s*$", re.MULTILINE) diff --git a/cpp/test/distance/distance_base.cuh b/cpp/test/distance/distance_base.cuh index 20d78c7bb5..8f616ada98 100644 --- a/cpp/test/distance/distance_base.cuh +++ b/cpp/test/distance/distance_base.cuh @@ -19,13 +19,13 @@ #include // common::nvtx::range #include -#include // make_device_matrix_view -#include // raft::sqrt -#include // raft::resources +#include // make_device_matrix_view +#include // raft::sqrt +#include // raft::resources #include #include // raft::distance::DistanceType #include -#include // rmm::device_uvector +#include // rmm::device_uvector namespace raft { namespace distance { diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu index 0c66f47c7f..b5d10d215c 100644 --- a/cpp/test/linalg/rsvd.cu +++ b/cpp/test/linalg/rsvd.cu @@ -160,24 +160,24 @@ class RsvdTest : public ::testing::TestWithParam> { const std::vector> inputs_fx = { // Test with ratios - {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT - {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT + {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Tall + non-BBT + {0.20f, 256, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.20f, 2048, 256, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Tall + non-BBT - {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT - {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Square + BBT + {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL} // Tall + non-BBT + {0.20f, 2048, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.60f, 16384, 2048, 0.25f, 0.2f, 0.05f, 0, 0, false, 4321ULL} // Tall + non-BBT - , // Test with fixed ranks - {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT - {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT + , // Test with fixed ranks + {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT + {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT - {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Square + non-BBT - {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Tall + non-BBT + {0.10f, 256, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Square + non-BBT + {0.12f, 2048, 256, 0.25f, 0.0f, 0.0f, 100, 5, false, 4321ULL}, // Tall + non-BBT {0.60f, 2048, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Square + BBT {1.00f, 16384, 2048, 0.25f, 0.0f, 0.0f, 100, 5, true, 4321ULL}, // Tall + BBT @@ -188,14 +188,14 @@ const std::vector> inputs_fx = { const std::vector> inputs_dx = { // Test with ratios - {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT - {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Tall + non-BBT - {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT - {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT - {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT - {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL} // Tall + non-BBT + {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT + {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20, 256, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.20, 2048, 256, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Tall + non-BBT + {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Square + BBT + {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, true, 4321ULL}, // Tall + BBT + {0.20, 2048, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL}, // Square + non-BBT + {0.60, 16384, 2048, 0.25f, 0.2, 0.05, 0, 0, false, 4321ULL} // Tall + non-BBT , // Test with fixed ranks {0.10, 256, 256, 0.25f, 0.0, 0.0, 100, 5, true, 4321ULL}, // Square + BBT diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index ea905d2089..eadc88085f 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -372,7 +372,7 @@ inline std::vector generate_inputs() {100}, {1000}, {1, 8, 17}, - {1, 16}, // k + {1, 16}, // k {search_algo::SINGLE_CTA, search_algo::MULTI_CTA, search_algo::MULTI_KERNEL}, {0, 1, 10, 100}, // query size {0}, diff --git a/cpp/test/util/bitonic_sort.cu b/cpp/test/util/bitonic_sort.cu index d1f03f78b5..2cf5420334 100644 --- a/cpp/test/util/bitonic_sort.cu +++ b/cpp/test/util/bitonic_sort.cu @@ -103,12 +103,12 @@ struct bitonic_launch { }; template -class BitonicTest : public testing::TestWithParam { // NOLINT +class BitonicTest : public testing::TestWithParam { // NOLINT protected: - const test_spec spec; // NOLINT - std::vector in; // NOLINT - std::vector out; // NOLINT - std::vector ref; // NOLINT + const test_spec spec; // NOLINT + std::vector in; // NOLINT + std::vector out; // NOLINT + std::vector ref; // NOLINT void segmented_sort(std::vector& vec, int k, bool ascending) // NOLINT { @@ -184,13 +184,13 @@ auto inputs = ::testing::Values(test_spec{1, 1, 1, true}, test_spec{70, 1, 64, true}, test_spec{70, 2, 128, false}); -using Floats = BitonicTest; // NOLINT -TEST_P(Floats, Run) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs); // NOLINT +using Floats = BitonicTest; // NOLINT +TEST_P(Floats, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P(BitonicTest, Floats, inputs); // NOLINT -using Ints = BitonicTest; // NOLINT -TEST_P(Ints, Run) { run(); } // NOLINT -INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs); // NOLINT +using Ints = BitonicTest; // NOLINT +TEST_P(Ints, Run) { run(); } // NOLINT +INSTANTIATE_TEST_CASE_P(BitonicTest, Ints, inputs); // NOLINT using Doubles = BitonicTest; // NOLINT TEST_P(Doubles, Run) { run(); } // NOLINT diff --git a/cpp/test/util/reduction.cu b/cpp/test/util/reduction.cu index 17deaf99eb..548d3b9d53 100644 --- a/cpp/test/util/reduction.cu +++ b/cpp/test/util/reduction.cu @@ -147,9 +147,9 @@ struct reduction_launch { template class ReductionTest : public testing::TestWithParam> { // NOLINT protected: - const std::vector input; // NOLINT - rmm::cuda_stream_view stream; // NOLINT - rmm::device_uvector arr_d; // NOLINT + const std::vector input; // NOLINT + rmm::cuda_stream_view stream; // NOLINT + rmm::device_uvector arr_d; // NOLINT public: explicit ReductionTest() @@ -184,8 +184,8 @@ const std::vector binary_test_vector{ auto reduction_input = ::testing::Values(test_vector); auto binary_reduction_input = ::testing::Values(binary_test_vector); -using ReductionTestInt = ReductionTest; // NOLINT -using BinaryReductionTestInt = ReductionTest; // NOLINT +using ReductionTestInt = ReductionTest; // NOLINT +using BinaryReductionTestInt = ReductionTest; // NOLINT TEST_P(ReductionTestInt, REDUCTIONS) { run_reduction(); } INSTANTIATE_TEST_CASE_P(ReductionTest, ReductionTestInt, reduction_input); // NOLINT TEST_P(BinaryReductionTestInt, BINARY_REDUCTION) { run_binary_reduction(); } // NOLINT diff --git a/dependencies.yaml b/dependencies.yaml index 6f64287f54..700a6db1bf 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -169,10 +169,10 @@ dependencies: common: - output_types: [conda, requirements] packages: - - clang=16.0.1 + - clang=16.0.6 - output_types: [conda] packages: - - clang-tools=16.0.1 + - clang-tools=16.0.6 nn_bench: common: - output_types: [conda, pyproject, requirements] diff --git a/notebooks/ivf_flat_example.ipynb b/notebooks/ivf_flat_example.ipynb new file mode 100644 index 0000000000..08b9d78169 --- /dev/null +++ b/notebooks/ivf_flat_example.ipynb @@ -0,0 +1,674 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4f49c5c4-1170-42a7-9d6a-b90acd00c3c3", + "metadata": {}, + "source": [ + "# RAFT IVF Flat Example Notebook" + ] + }, + { + "cell_type": "markdown", + "id": "4bcfe810-f120-422c-b2bb-72cc43d0c4ca", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "This notebook demonstrates how to run approximate nearest neighbor search using RAFT IVF-Flat algorithm.\n", + "It builds and searches an index using a dataset from the ann-benchmarks million-scale datasets, saves/loads the index to disk, and explores important parameters for fine-tuning the search performance and accuracy of the index." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "fe73ada7-7b7f-4005-9440-85428194311b", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import cupy as cp\n", + "import numpy as np\n", + "from pylibraft.common import DeviceResources\n", + "from pylibraft.neighbors import ivf_flat\n", + "import matplotlib.pyplot as plt\n", + "import tempfile\n", + "from utils import BenchmarkTimer, calc_recall, load_dataset" + ] + }, + { + "cell_type": "markdown", + "id": "da9e8615-ea9f-4735-b70f-15ccab36c0d9", + "metadata": {}, + "source": [ + "For best performance it is recommended to use an RMM pooling allocator, to minimize the overheads of repeated CUDA allocations." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5350e4d9-0993-406a-80af-29538b5677c2", + "metadata": {}, + "outputs": [], + "source": [ + "import rmm\n", + "from rmm.allocators.cupy import rmm_cupy_allocator\n", + "mr = rmm.mr.PoolMemoryResource(\n", + " rmm.mr.CudaMemoryResource(),\n", + " initial_pool_size=2**30\n", + ")\n", + "rmm.mr.set_current_device_resource(mr)\n", + "cp.cuda.set_allocator(rmm_cupy_allocator)" + ] + }, + { + "cell_type": "markdown", + "id": "b0d935f2-ba24-44fc-bdfe-a769b7fcd8e6", + "metadata": {}, + "source": [ + "The following GPU is used for this notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a5daa4b4-96de-4e74-bfd6-505b13595f62", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thu Sep 21 02:30:53 2023 \n", + "+---------------------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", + "|-----------------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|=========================================+======================+======================|\n", + "| 0 NVIDIA H100 PCIe On | 00000000:41:00.0 Off | 0 |\n", + "| N/A 35C P0 69W / 350W | 1487MiB / 81559MiB | 0% Default |\n", + "| | | Disabled |\n", + "+-----------------------------------------+----------------------+----------------------+\n", + " \n", + "+---------------------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=======================================================================================|\n", + "| 0 N/A N/A 3940 C /opt/conda/envs/rapids/bin/python 1474MiB |\n", + "+---------------------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "# Report the GPU in use\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "id": "88a654cc-6389-4526-a3e6-826de5606a09", + "metadata": {}, + "source": [ + "## Load dataset\n", + "\n", + "The ANN benchmarks website provides the datasets in HDF5 format.\n", + "\n", + "The list of prepared datasets can be found at https://github.com/erikbern/ann-benchmarks/#data-sets" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "5f529ad6-b0bd-495c-bf7c-43f10fb6aa14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The index and data will be saved in /tmp/raft_example\n" + ] + } + ], + "source": [ + "WORK_FOLDER = os.path.join(tempfile.gettempdir(), \"raft_example\")\n", + "f = load_dataset(\"http://ann-benchmarks.com/sift-128-euclidean.hdf5\", work_folder=WORK_FOLDER)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3d68a7db-bcf4-449c-96c3-1e8ab146c84d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded dataset of size (1000000, 128), 0.5 GiB; metric: 'euclidean'.\n", + "Number of test queries: 10000\n" + ] + } + ], + "source": [ + "metric = f.attrs['distance']\n", + "\n", + "dataset = cp.array(f['train'])\n", + "queries = cp.array(f['test'])\n", + "gt_neighbors = cp.array(f['neighbors'])\n", + "gt_distances = cp.array(f['distances'])\n", + "\n", + "itemsize = dataset.dtype.itemsize \n", + "\n", + "print(f\"Loaded dataset of size {dataset.shape}, {dataset.size*itemsize/(1<<30):4.1f} GiB; metric: '{metric}'.\")\n", + "print(f\"Number of test queries: {queries.shape[0]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "9f463c50-d1d3-49be-bcfe-952602efa603", + "metadata": {}, + "source": [ + "## Build index\n", + "We set [IndexParams](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.IndexParams) and build the index. The index parameters will be discussed in more detail in later sections of this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "737f8841-93f9-4c8e-b2e1-787d4474ef94", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 120 ms, sys: 5.33 ms, total: 125 ms\n", + "Wall time: 124 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=1024,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=0.1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True\n", + " )\n", + "\n", + "index = ivf_flat.build(build_params, dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "a16a0cf6-3b05-4afd-9bb8-54431e0d7439", + "metadata": {}, + "source": [ + "The index is built. We can print some basic information of the index" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1aec7024-6e5d-4d2c-82e6-7b5734aec958", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(type=IVF-FLAT, metric=euclidean, size=1000000, dim=128, n_lists=1024, adaptive_centers=False)\n" + ] + } + ], + "source": [ + "print(index)" + ] + }, + { + "cell_type": "markdown", + "id": "df7d4958-56a3-48ea-bd64-3486fdb57fb7", + "metadata": {}, + "source": [ + "## Search neighbors" + ] + }, + { + "cell_type": "markdown", + "id": "89ba2eaa-4c85-4e1c-b07c-920394e55dce", + "metadata": {}, + "source": [ + "It is recommended to reuse [device recosources](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/common/#pylibraft.common.DeviceResources) across multiple invocations of search, since constructing these can be time consuming. We will reuse the resources by passing the same handle to each RAFT API call." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "46e0421b-9335-47a2-8451-a91f56c2f086", + "metadata": {}, + "outputs": [], + "source": [ + "handle = DeviceResources()" + ] + }, + { + "cell_type": "markdown", + "id": "a6365229-18fd-468f-af30-e24b950cbd6e", + "metadata": {}, + "source": [ + "After setting [SearchParams](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.SearchParams) we search for for `k=10` neighbors." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "595454e1-7240-4b43-9a73-963d5670b00c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 171 ms, sys: 52.6 ms, total: 224 ms\n", + "Wall time: 236 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "n_queries=10000\n", + "# n_probes is the number of clusters we select in the first (coarse) search step. This is the only hyper parameter for search.\n", + "search_params = ivf_flat.SearchParams(n_probes=30)\n", + "\n", + "# Search 10 nearest neighbors.\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "# RAFT calls are asynchronous (when handle arg is provided), we need to sync before accessing the results.\n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" + ] + }, + { + "cell_type": "markdown", + "id": "43d20ca7-7b9e-4046-bb52-640a2744db75", + "metadata": {}, + "source": [ + "The returned arrays have shape {n_queries x 10] and store the distance values and the indices of the searched vectors. We check how accurate the search is. The accuracy of the search is quantified as `recall`, which is a value between 0 and 1 and tells us what fraction of the returned neighbors are actual k nearest neighbors. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8cd9cd20-ca00-4a35-a0a0-86636521b31a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.97406" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "cde5079c-9777-45a1-9545-cffbcc59988f", + "metadata": {}, + "source": [ + "## Save and load the index\n", + "You can serialize the index to file using [save](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.save), and [load](https://docs.rapids.ai/api/raft/nightly/pylibraft_api/neighbors/#pylibraft.neighbors.ivf_flat.load) it later." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "bf94e45c-e7fb-4aa3-a611-ddaee7ac41ae", + "metadata": {}, + "outputs": [], + "source": [ + "index_file = os.path.join(WORK_FOLDER, \"my_ivf_flat_index.bin\")\n", + "ivf_flat.save(index_file, index)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "1622d9be-be41-4d25-be99-d348c5e54957", + "metadata": {}, + "outputs": [], + "source": [ + "index = ivf_flat.load(index_file)" + ] + }, + { + "cell_type": "markdown", + "id": "15d503e5-05e8-47ce-8501-e13fc512099c", + "metadata": {}, + "source": [ + "## Tune search parameters\n", + "Search has a single hyper parameter: `n_probes`, which describes how many neighboring cluster is searched (probed) for each query. Within a probed cluster, the distance is computed between all the vectors in the cluster and the query point, and the top-k neighbors are selected. Finally, the top-k neighbors are selected from all the neighbor candidates from the probed clusters.\n", + "\n", + "Let's see how search accuracy and latency changes when we change the `n_probes` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "ace0c31f-af75-4352-a438-123a9a03612c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Benchmarking search with n_probes = 10\n", + "recall 0.86625\n", + "Average search time: 0.026 +/- 0.000259 s\n", + "Queries per second (QPS): 384968\n", + "\n", + "Benchmarking search with n_probes = 20\n", + "recall 0.94705\n", + "Average search time: 0.050 +/- 5.43e-05 s\n", + "Queries per second (QPS): 198880\n", + "\n", + "Benchmarking search with n_probes = 30\n", + "recall 0.97406\n", + "Average search time: 0.075 +/- 8.59e-05 s\n", + "Queries per second (QPS): 133954\n", + "\n", + "Benchmarking search with n_probes = 50\n", + "recall 0.99169\n", + "Average search time: 0.123 +/- 4.78e-05 s\n", + "Queries per second (QPS): 80997\n", + "\n", + "Benchmarking search with n_probes = 100\n", + "recall 0.99844\n", + "Average search time: 0.244 +/- 0.000249 s\n", + "Queries per second (QPS): 40934\n", + "\n", + "Benchmarking search with n_probes = 200\n", + "recall 0.99932\n", + "Average search time: 0.468 +/- 0.000367 s\n", + "Queries per second (QPS): 21382\n", + "\n", + "Benchmarking search with n_probes = 500\n", + "recall 0.99933\n", + "Average search time: 1.039 +/- 0.000209 s\n", + "Queries per second (QPS): 9625\n", + "\n", + "Benchmarking search with n_probes = 1024\n", + "recall 0.99935\n", + "Average search time: 0.701 +/- 0.00579 s\n", + "Queries per second (QPS): 14273\n" + ] + } + ], + "source": [ + "n_probes = np.asarray([10, 20, 30, 50, 100, 200, 500, 1024]);\n", + "qps = np.zeros(n_probes.shape);\n", + "recall = np.zeros(n_probes.shape);\n", + "\n", + "for i in range(len(n_probes)):\n", + " print(\"\\nBenchmarking search with n_probes =\", n_probes[i])\n", + " timer = BenchmarkTimer(reps=1, warmup=1)\n", + " for rep in timer.benchmark_runs():\n", + " distances, neighbors = ivf_flat.search(\n", + " ivf_flat.SearchParams(n_probes=n_probes[i]),\n", + " index,\n", + " cp.asarray(queries),\n", + " k=10,\n", + " handle=handle,\n", + " )\n", + " handle.sync()\n", + " \n", + " recall[i] = calc_recall(cp.asnumpy(neighbors), gt_neighbors)\n", + " print(\"recall\", recall[i])\n", + "\n", + " timings = np.asarray(timer.timings)\n", + " avg_time = timings.mean()\n", + " std_time = timings.std()\n", + " qps[i] = queries.shape[0] / avg_time\n", + " print(\"Average search time: {0:7.3f} +/- {1:7.3} s\".format(avg_time, std_time))\n", + " print(\"Queries per second (QPS): {0:8.0f}\".format(qps[i]))" + ] + }, + { + "cell_type": "markdown", + "id": "20b2498c-7231-4211-990e-600d5c26a9a1", + "metadata": {}, + "source": [ + "The plots below illustrate how the accuracy (recall) and the throughput (queries per second) depends on the `n_probes` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1ac370f-91c8-4054-95c7-a749df5f16d2", + "metadata": {}, + "outputs": [], + "source": [ + "fig = plt.figure(figsize=(12,3))\n", + "ax = fig.add_subplot(131)\n", + "ax.plot(n_probes, recall,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('n_probes')\n", + "ax.grid()\n", + "ax.set_ylabel('recall (@k=10)')\n", + "\n", + "ax = fig.add_subplot(132)\n", + "ax.plot(n_probes, qps,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('n_probes')\n", + "ax.grid()\n", + "ax.set_ylabel('queries per second');\n", + "\n", + "ax = fig.add_subplot(133)\n", + "ax.plot(recall, qps,'o-')\n", + "#ax.set_xticks(bench_k, bench_k)\n", + "ax.set_xlabel('recall')\n", + "ax.grid()\n", + "ax.set_ylabel('queries per second');\n", + "#ax.set_yscale('log')" + ] + }, + { + "cell_type": "markdown", + "id": "81e7ad6a-bddc-45de-9cce-0fb913f91efe", + "metadata": {}, + "source": [ + "## Adjust build parameters\n", + "### n_lists\n", + "The number of clusters (or lists) is set by the n_list parameter. Let's change it to 100 clusters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "addbfff3-7773-4290-9608-5489edf4886d", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=100,\n", + " metric=\"euclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=True\n", + " )\n", + "\n", + "index = ivf_flat.build(build_params, dataset, handle=handle)" + ] + }, + { + "cell_type": "markdown", + "id": "48db27f9-54c8-4dac-839b-af94ada8885f", + "metadata": {}, + "source": [ + "The ratio of n_probes / n_list will determine how large fraction of the dataset is searched for each query. The right combination depends on the use case. Here we will search 10 of the clusters for each query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a0149ad-de38-4195-97a5-ce5d5d877036", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "n_queries=10000\n", + "\n", + "search_params = ivf_flat.SearchParams(n_probes=10)\n", + "\n", + "# Search 10 nearest neighbors.\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eedc3ec4-06af-42c5-8cdf-490a5c2bc49a", + "metadata": {}, + "outputs": [], + "source": [ + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "0c44800f-1e9e-4f7b-87fe-0f25e6590faa", + "metadata": {}, + "source": [ + "### trainset_fraction\n", + "During clustering we can sub-sample the dataset. The parameter `trainset_fraction` determines what fraction to use. Often we get good results by using only 1/10th of the dataset for clustering. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a54d190-64d4-4cd4-a497-365cbffda871", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "build_params = ivf_flat.IndexParams( \n", + " n_lists=100, \n", + " metric=\"sqeuclidean\", \n", + " kmeans_trainset_fraction=0.1, \n", + " kmeans_n_iters=20 \n", + " ) \n", + "index = ivf_flat.build(build_params, dataset, handle=handle)" + ] + }, + { + "cell_type": "markdown", + "id": "9d86a213-d6ae-4fca-9082-cb5a4d1dab36", + "metadata": {}, + "source": [ + "We see only a minimal change in the recall" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4cc992e8-a5e5-4508-b790-0e934160b660", + "metadata": {}, + "outputs": [], + "source": [ + "search_params = ivf_flat.SearchParams(n_probes=10)\n", + "\n", + "distances, indices = ivf_flat.search(search_params, index, cp.asarray(queries[:n_queries,:]), k=10, handle=handle)\n", + " \n", + "handle.sync()\n", + "distances, neighbors = cp.asnumpy(distances), cp.asnumpy(indices)\n", + "calc_recall(neighbors, gt_neighbors)" + ] + }, + { + "cell_type": "markdown", + "id": "25289ebc-7d89-4fa6-bc62-e25b6e77750c", + "metadata": {}, + "source": [ + "### Add vectors on build\n", + "Currently you cannot configure how RAFT sub-samples the input. If you want to have a fine control on how the training set is selected, then create the index in two steps:\n", + "1. Define cluster centers on a training set, but do not add any vector to the index\n", + "2. Add vectors to the index (extend)\n", + "\n", + "This workflow shall be familiar to FAISS users. Note that raft does not require adding the data in batches, internal batching is used when necessary.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ebcf970-94ed-4825-9885-277bd984b90c", + "metadata": {}, + "outputs": [], + "source": [ + "# subsample the dataset\n", + "n_train = 10000\n", + "train_set = dataset[cp.random.choice(dataset.shape[0], n_train, replace=False),:]\n", + "\n", + "# build using training set\n", + "build_params = ivf_flat.IndexParams(\n", + " n_lists=1024,\n", + " metric=\"sqeuclidean\",\n", + " kmeans_trainset_fraction=1,\n", + " kmeans_n_iters=20,\n", + " add_data_on_build=False\n", + " )\n", + "index = ivf_flat.build(build_params, train_set)\n", + "\n", + "print(\"Index before adding vectors\", index)\n", + "\n", + "ivf_flat.extend(index, dataset, cp.arange(dataset.shape[0], dtype=cp.int64))\n", + "\n", + "print(\"Index after adding vectors\", index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "029d48a9-baf7-4263-af43-9e500ef3cce4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorial_ivf_pq.ipynb b/notebooks/tutorial_ivf_pq.ipynb index 6aa8cd6495..397e39bfba 100644 --- a/notebooks/tutorial_ivf_pq.ipynb +++ b/notebooks/tutorial_ivf_pq.ipynb @@ -79,6 +79,7 @@ "from pylibraft.common import DeviceResources\n", "from pylibraft.neighbors import ivf_pq, refine\n", "from adjustText import adjust_text\n", + "from utils import calc_recall, load_dataset\n", "\n", "%matplotlib inline" ] @@ -194,15 +195,18 @@ "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The index and data will be saved in /tmp/raft_example\n" + ] + } + ], "source": [ "DATASET_URL = \"http://ann-benchmarks.com/sift-128-euclidean.hdf5\"\n", - "DATASET_FILENAME = DATASET_URL.split('/')[-1]\n", - "\n", - "## download the dataset\n", - "dataset_path = os.path.join(WORK_FOLDER, DATASET_FILENAME)\n", - "if not os.path.exists(dataset_path):\n", - " urllib.request.urlretrieve(DATASET_URL, dataset_path)" + "f = load_dataset(DATASET_URL)" ] }, { @@ -227,8 +231,6 @@ } ], "source": [ - "f = h5py.File(dataset_path, \"r\")\n", - "\n", "metric = f.attrs['distance']\n", "\n", "dataset = cp.array(f['train'])\n", @@ -456,28 +458,6 @@ } ], "source": [ - "## Check the quality of the prediction (recall)\n", - "def calc_recall(found_indices, ground_truth):\n", - " found_indices = cp.asarray(found_indices)\n", - " bs, k = found_indices.shape\n", - " if bs != ground_truth.shape[0]:\n", - " raise RuntimeError(\n", - " \"Batch sizes do not match {} vs {}\".format(\n", - " bs, ground_truth.shape[0])\n", - " )\n", - " if k > ground_truth.shape[1]:\n", - " raise RuntimeError(\n", - " \"Not enough indices in the ground truth ({} > {})\".format(\n", - " k, ground_truth.shape[1])\n", - " )\n", - " n = 0\n", - " # Go over the batch\n", - " for i in range(bs):\n", - " # Note, ivf-pq does not guarantee the ordered input, hence the use of intersect1d\n", - " n += cp.intersect1d(found_indices[i, :k], ground_truth[i, :k]).size\n", - " recall = n / found_indices.size\n", - " return recall\n", - "\n", "recall_first_try = calc_recall(neighbors, gt_neighbors)\n", "print(f\"Got recall = {recall_first_try} with the default parameters (k = {k}).\")" ] diff --git a/notebooks/utils.py b/notebooks/utils.py new file mode 100644 index 0000000000..1c2e44a6ae --- /dev/null +++ b/notebooks/utils.py @@ -0,0 +1,103 @@ +# +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import cupy as cp +import h5py +import os +import tempfile +import time +import urllib + +## Check the quality of the prediction (recall) +def calc_recall(found_indices, ground_truth): + found_indices = cp.asarray(found_indices) + bs, k = found_indices.shape + if bs != ground_truth.shape[0]: + raise RuntimeError( + "Batch sizes do not match {} vs {}".format( + bs, ground_truth.shape[0] + ) + ) + if k > ground_truth.shape[1]: + raise RuntimeError( + "Not enough indices in the ground truth ({} > {})".format( + k, ground_truth.shape[1] + ) + ) + n = 0 + # Go over the batch + for i in range(bs): + # Note, ivf-pq does not guarantee the ordered input, hence the use of intersect1d + n += cp.intersect1d(found_indices[i, :k], ground_truth[i, :k]).size + recall = n / found_indices.size + return recall + + +class BenchmarkTimer: + """Provides a context manager that runs a code block `reps` times + and records results to the instance variable `timings`. Use like: + .. code-block:: python + timer = BenchmarkTimer(rep=5) + for _ in timer.benchmark_runs(): + ... do something ... + print(np.min(timer.timings)) + + This class is borrowed from the rapids/cuml benchmark suite + """ + + def __init__(self, reps=1, warmup=0): + self.warmup = warmup + self.reps = reps + self.timings = [] + + def benchmark_runs(self): + for r in range(self.reps + self.warmup): + t0 = time.time() + yield r + t1 = time.time() + self.timings.append(t1 - t0) + if r >= self.warmup: + self.timings.append(t1 - t0) + + +def load_dataset(dataset_url, work_folder=None): + """Download dataset from url. It is expected that the dataset contains a hdf5 file in ann-benchmarks format + + Parameters + ---------- + dataset_url address of hdf5 file + work_folder name of the local folder to store the dataset + + """ + dataset_url = "http://ann-benchmarks.com/sift-128-euclidean.hdf5" + dataset_filename = dataset_url.split("/")[-1] + + # We'll need to load store some data in this tutorial + if work_folder is None: + work_folder = os.path.join(tempfile.gettempdir(), "raft_example") + + if not os.path.exists(work_folder): + os.makedirs(work_folder) + print("The index and data will be saved in", work_folder) + + ## download the dataset + dataset_path = os.path.join(work_folder, dataset_filename) + if not os.path.exists(dataset_path): + urllib.request.urlretrieve(dataset_url, dataset_path) + + f = h5py.File(dataset_path, "r") + + return f