From e618fb07b49bf750357d4fc3c619385ad86e0a87 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Fri, 29 Sep 2023 09:20:59 -0500 Subject: [PATCH 1/3] Pin dask and distributed for 23.10 release (#1864) This PR pins `dask` and `distributed` to `2023.9.2` for `23.10` release. xref: https://github.com/rapidsai/cudf/pull/14225 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Ray Douglass (https://github.com/raydouglass) - Peter Andreas Entschev (https://github.com/pentschev) - Ben Frederickson (https://github.com/benfred) --- ci/test_wheel_raft_dask.sh | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 6 +++--- conda/environments/all_cuda-120_arch-x86_64.yaml | 6 +++--- conda/recipes/raft-dask/meta.yaml | 6 +++--- dependencies.yaml | 6 +++--- python/raft-dask/pyproject.toml | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh index 676d642de9..fd9668e968 100755 --- a/ci/test_wheel_raft_dask.sh +++ b/ci/test_wheel_raft_dask.sh @@ -12,7 +12,7 @@ RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 +python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10 # echo to expand wildcard before adding `[extra]` requires for pip python -m pip install $(echo ./dist/raft_dask*.whl)[test] diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 93e8821575..739e1e9785 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -19,10 +19,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.10.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 01d9bca5c2..321c17bf4f 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -20,10 +20,10 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core>=2023.7.1 +- dask-core==2023.9.2 - dask-cuda==23.10.* -- dask>=2023.7.1 -- distributed>=2023.7.1 +- dask==2023.9.2 +- distributed==2023.9.2 - doxygen>=1.8.20 - gcc_linux-64=11.* - gmock>=1.13.0 diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml index c9caa4dd9b..04dfef5063 100644 --- a/conda/recipes/raft-dask/meta.yaml +++ b/conda/recipes/raft-dask/meta.yaml @@ -60,10 +60,10 @@ requirements: - cudatoolkit {% endif %} - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - - dask >=2023.7.1 - - dask-core >=2023.7.1 + - dask ==2023.9.2 + - dask-core ==2023.9.2 - dask-cuda ={{ minor_version }} - - distributed >=2023.7.1 + - distributed ==2023.9.2 - joblib >=0.11 - nccl >=2.9.9 - pylibraft {{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index b827f11228..3ad51a6377 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -430,15 +430,15 @@ dependencies: common: - output_types: [conda, pyproject] packages: - - dask>=2023.7.1 + - dask==2023.9.2 - dask-cuda==23.10.* - - distributed>=2023.7.1 + - distributed==2023.9.2 - joblib>=0.11 - numba>=0.57 - *numpy - output_types: conda packages: - - dask-core>=2023.7.1 + - dask-core==2023.9.2 - ucx>=1.13.0 - ucx-proc=*=gpu - &ucx_py_conda ucx-py==0.34.* diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml index bdbcf61e0f..3e0ffc2848 100644 --- a/python/raft-dask/pyproject.toml +++ b/python/raft-dask/pyproject.toml @@ -35,8 +35,8 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "dask-cuda==23.10.*", - "dask>=2023.7.1", - "distributed>=2023.7.1", + "dask==2023.9.2", + "distributed==2023.9.2", "joblib>=0.11", "numba>=0.57", "numpy>=1.21", From 1ee423b451cee386302f6d0c893f1feb902a840a Mon Sep 17 00:00:00 2001 From: tsuki <12711693+enp1s0@users.noreply.github.com> Date: Tue, 3 Oct 2023 00:39:45 +0800 Subject: [PATCH 2/3] [BUG] Fix a bug in the filtering operation in CAGRA multi-kernel (#1862) This PR fixes a bug in the filtering operations in the CAGRA multi-kernel search implementation. This bug caused the test of https://github.com/rapidsai/raft/pull/1837 to fail. Authors: - tsuki (https://github.com/enp1s0) Approvers: - Micka (https://github.com/lowener) - Corey J. Nolet (https://github.com/cjnolet) --- .../detail/cagra/search_multi_kernel.cuh | 23 +++++++++++++------ .../neighbors/detail/cagra/search_plan.cuh | 8 +++++++ .../raft/neighbors/detail/nn_descent.cuh | 3 +-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index 5dcfcb3929..9392bde440 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -478,13 +478,15 @@ __global__ void apply_filter_kernel(INDEX_T* const result_indices_ptr, const INDEX_T query_id_offset, SAMPLE_FILTER_T sample_filter) { - const auto tid = threadIdx.x + blockIdx.x * blockDim.x; + constexpr INDEX_T index_msb_1_mask = utils::gen_index_msb_1_mask::value; + const auto tid = threadIdx.x + blockIdx.x * blockDim.x; if (tid >= result_buffer_size * num_queries) { return; } const auto i = tid % result_buffer_size; const auto j = tid / result_buffer_size; const auto index = i + j * lds; - if (!sample_filter(query_id_offset + j, result_indices_ptr[index])) { + if (result_indices_ptr[index] != ~index_msb_1_mask && + !sample_filter(query_id_offset + j, result_indices_ptr[index])) { result_indices_ptr[index] = utils::get_max_value(); result_distances_ptr[index] = utils::get_max_value(); } @@ -788,12 +790,15 @@ struct search : search_plan_impl { auto result_indices_ptr = result_indices.data() + (iter & 0x1) * result_buffer_size; auto result_distances_ptr = result_distances.data() + (iter & 0x1) * result_buffer_size; - // Remove parent bit in search results - remove_parent_bit( - num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); + if constexpr (!std::is_same::value) { + // Remove parent bit in search results + remove_parent_bit(num_queries, + result_buffer_size, + result_indices.data() + (iter & 0x1) * itopk_size, + result_buffer_allocation_size, + stream); - if (!std::is_same::value) { apply_filter( result_indices.data() + (iter & 0x1) * itopk_size, result_distances.data() + (iter & 0x1) * itopk_size, @@ -821,6 +826,10 @@ struct search : search_plan_impl { true, topk_hint.data(), stream); + } else { + // Remove parent bit in search results + remove_parent_bit( + num_queries, itopk_size, result_indices_ptr, result_buffer_allocation_size, stream); } // Copy results from working buffer to final buffer diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index a0f346ab51..147b8b753d 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -291,6 +291,14 @@ struct search_plan_impl : public search_plan_impl_base { "`hashmap_max_fill_rate` must be equal to or greater than 0.1 and smaller than 0.9. " + std::to_string(hashmap_max_fill_rate) + " has been given."; } + if constexpr (!std::is_same::value) { + if (hashmap_mode == hash_mode::SMALL) { + error_message += "`SMALL` hash is not available when filtering"; + } else { + hashmap_mode = hash_mode::HASH; + } + } if (algo == search_algo::MULTI_CTA) { if (hashmap_mode == hash_mode::SMALL) { error_message += "`small_hash` is not available when 'search_mode' is \"multi-cta\""; diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 3e4d0409bd..009ffd4684 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1278,8 +1278,7 @@ void GNND::build(Data_t* data, const Index_t nrow, Index_t* out std::thread update_and_sample_thread(update_and_sample, it); - std::cout << "# GNND iteraton: " << it + 1 << "/" << build_config_.max_iterations << "\r"; - std::fflush(stdout); + RAFT_LOG_DEBUG("# GNND iteraton: %lu / %lu", it + 1, build_config_.max_iterations); // Reuse dists_buffer_ to save GPU memory. graph_buffer_ cannot be reused, because it // contains some information for local_join. From 120cff4bee8be9adbb46d09dfae23c3ad5b46870 Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Mon, 2 Oct 2023 11:08:48 -0700 Subject: [PATCH 3/3] Add option to brute_force index to maintain reference to non-owning norms (#1865) This makes the faiss integration substantially easier, since we can just use the existing norms that have already been calculated in GpuDistanceParams::vectorNorms - rather than require an owned copy that lives in the brute force index. Authors: - Ben Frederickson (https://github.com/benfred) Approvers: - Corey J. Nolet (https://github.com/cjnolet) --- .../raft/neighbors/brute_force_types.hpp | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/cpp/include/raft/neighbors/brute_force_types.hpp b/cpp/include/raft/neighbors/brute_force_types.hpp index cc934b7a98..19dd6b8350 100644 --- a/cpp/include/raft/neighbors/brute_force_types.hpp +++ b/cpp/include/raft/neighbors/brute_force_types.hpp @@ -66,11 +66,11 @@ struct index : ann::index { /** Dataset norms */ [[nodiscard]] inline auto norms() const -> device_vector_view { - return make_const_mdspan(norms_.value().view()); + return norms_view_.value(); } /** Whether ot not this index has dataset norms */ - [[nodiscard]] inline bool has_norms() const noexcept { return norms_.has_value(); } + [[nodiscard]] inline bool has_norms() const noexcept { return norms_view_.has_value(); } [[nodiscard]] inline T metric_arg() const noexcept { return metric_arg_; } @@ -102,10 +102,30 @@ struct index : ann::index { norms_(std::move(norms)), metric_arg_(metric_arg) { + if (norms_) { norms_view_ = make_const_mdspan(norms_.value().view()); } update_dataset(res, dataset); resource::sync_stream(res); } + /** Construct a brute force index from dataset + * + * This class stores a non-owning reference to the dataset and norms here. + * Having precomputed norms gives us a performance advantage at query time. + */ + index(raft::resources const& res, + raft::device_matrix_view dataset_view, + std::optional> norms_view, + raft::distance::DistanceType metric, + T metric_arg = 0.0) + : ann::index(), + metric_(metric), + dataset_(make_device_matrix(res, 0, 0)), + dataset_view_(dataset_view), + norms_view_(norms_view), + metric_arg_(metric_arg) + { + } + private: /** * Replace the dataset with a new dataset. @@ -135,6 +155,7 @@ struct index : ann::index { raft::distance::DistanceType metric_; raft::device_matrix dataset_; std::optional> norms_; + std::optional> norms_view_; raft::device_matrix_view dataset_view_; T metric_arg_; };