From fdb118002a482e878ec48fcaa7f11a15efd59140 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Nov 2024 21:32:29 -0600 Subject: [PATCH 1/4] enforce wheel size limits, README formatting in CI (#464) Contributes to https://github.com/rapidsai/build-planning/issues/110 Proposes adding 2 types of validation on wheels in CI, to ensure we continue to produce wheels that are suitable for PyPI. * checks on wheel size (compressed), - *to be sure they're under PyPI limits* - *and to prompt discussion on PRs that significantly increase wheel sizes* * checks on README formatting - *to ensure they'll render properly as the PyPI project homepages* - *e.g. like how https://github.com/scikit-learn/scikit-learn/blob/main/README.rst becomes https://pypi.org/project/scikit-learn/* Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cuvs/pull/464 --- ci/build_wheel_cuvs.sh | 5 ++++- ci/validate_wheel.sh | 21 +++++++++++++++++++++ python/cuvs/pyproject.toml | 8 ++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100755 ci/validate_wheel.sh diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh index e03da9f19..444657cc0 100755 --- a/ci/build_wheel_cuvs.sh +++ b/ci/build_wheel_cuvs.sh @@ -3,6 +3,8 @@ set -euo pipefail +package_dir="python/cuvs" + case "${RAPIDS_CUDA_VERSION}" in 12.*) EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" @@ -15,4 +17,5 @@ esac # Set up skbuild options. Enable sccache in skbuild config options export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUVS_CPP=OFF${EXTRA_CMAKE_ARGS}" -ci/build_wheel.sh cuvs python/cuvs +ci/build_wheel.sh cuvs ${package_dir} +ci/validate_wheel.sh ${package_dir} final_dist diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 000000000..5910a5c59 --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir=$1 +wheel_dir_relative_path=$2 + +cd "${package_dir}" + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 30d784c67..d40026776 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -133,6 +133,14 @@ build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# detect when package size grows significantly +max_allowed_size_compressed = '1.4G' + [tool.pytest.ini_options] filterwarnings = [ "error", From bb9c669500cf0401114f4a5810d0f3a0ea1db6b3 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Thu, 14 Nov 2024 21:25:58 +0100 Subject: [PATCH 2/4] Fix include errors, header, and unsafe locks in iface.hpp (#467) Fix a few issues with the internal header `neighbors/iface/iface.hpp` leading to compile time errors and dangerous runtime behavior: - Add missing includes - Use `std::lock_guard` to avoid a deadlock on exception - Add NVIDIA header - Avoid an extra stream sync during search. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Victor Lafargue (https://github.com/viclafargue) - Corey J. Nolet (https://github.com/cjnolet) - Ben Frederickson (https://github.com/benfred) URL: https://github.com/rapidsai/cuvs/pull/467 --- cpp/src/neighbors/cagra_c.cpp | 2 ++ cpp/src/neighbors/iface/iface.hpp | 53 +++++++++++++++++-------------- cpp/src/neighbors/ivf_flat_c.cpp | 2 ++ cpp/src/neighbors/mg/mg.cuh | 2 ++ examples/cpp/src/common.cuh | 4 +++ 5 files changed, 39 insertions(+), 24 deletions(-) mode change 100755 => 100644 cpp/src/neighbors/ivf_flat_c.cpp diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 6985ff094..326a89665 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -29,6 +29,8 @@ #include #include +#include + namespace { template diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp index a329db429..9b3da75a4 100644 --- a/cpp/src/neighbors/iface/iface.hpp +++ b/cpp/src/neighbors/iface/iface.hpp @@ -1,4 +1,20 @@ -#include +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once #include #include @@ -6,6 +22,9 @@ #include #include +#include +#include + namespace cuvs::neighbors { using namespace raft; @@ -16,7 +35,7 @@ void build(const raft::device_resources& handle, const cuvs::neighbors::index_params* index_params, raft::mdspan, row_major, Accessor> index_dataset) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { auto idx = cuvs::neighbors::ivf_flat::build( @@ -32,8 +51,6 @@ void build(const raft::device_resources& handle, interface.index_.emplace(std::move(idx)); } resource::sync_stream(handle); - - interface.mutex_->unlock(); } template @@ -44,7 +61,7 @@ void extend( std::optional, layout_c_contiguous, Accessor2>> new_indices) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { auto idx = @@ -58,8 +75,6 @@ void extend( RAFT_FAIL("CAGRA does not implement the extend method"); } resource::sync_stream(handle); - - interface.mutex_->unlock(); } template @@ -70,7 +85,7 @@ void search(const raft::device_resources& handle, raft::device_matrix_view neighbors, raft::device_matrix_view distances) { - // interface.mutex_->lock(); + // std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { cuvs::neighbors::ivf_flat::search( handle, @@ -94,9 +109,7 @@ void search(const raft::device_resources& handle, neighbors, distances); } - resource::sync_stream(handle); - - // interface.mutex_->unlock(); + // resource::sync_stream(handle); } // for MG ANN only @@ -108,7 +121,7 @@ void search(const raft::device_resources& handle, raft::device_matrix_view d_neighbors, raft::device_matrix_view d_distances) { - // interface.mutex_->lock(); + // std::lock_guard(*interface.mutex_); int64_t n_rows = h_queries.extent(0); int64_t n_dims = h_queries.extent(1); @@ -120,8 +133,6 @@ void search(const raft::device_resources& handle, auto d_query_view = raft::make_const_mdspan(d_queries.view()); search(handle, interface, search_params, d_query_view, d_neighbors, d_distances); - - // interface.mutex_->unlock(); } template @@ -129,7 +140,7 @@ void serialize(const raft::device_resources& handle, const cuvs::neighbors::iface& interface, std::ostream& os) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { ivf_flat::serialize(handle, os, interface.index_.value()); @@ -138,8 +149,6 @@ void serialize(const raft::device_resources& handle, } else if constexpr (std::is_same>::value) { cagra::serialize(handle, os, interface.index_.value(), true); } - - interface.mutex_->unlock(); } template @@ -147,7 +156,7 @@ void deserialize(const raft::device_resources& handle, cuvs::neighbors::iface& interface, std::istream& is) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { ivf_flat::index idx(handle); @@ -162,8 +171,6 @@ void deserialize(const raft::device_resources& handle, cagra::deserialize(handle, is, &idx); interface.index_.emplace(std::move(idx)); } - - interface.mutex_->unlock(); } template @@ -171,7 +178,7 @@ void deserialize(const raft::device_resources& handle, cuvs::neighbors::iface& interface, const std::string& filename) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); std::ifstream is(filename, std::ios::in | std::ios::binary); if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } @@ -191,8 +198,6 @@ void deserialize(const raft::device_resources& handle, } is.close(); - - interface.mutex_->unlock(); } -}; // namespace cuvs::neighbors \ No newline at end of file +}; // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/ivf_flat_c.cpp b/cpp/src/neighbors/ivf_flat_c.cpp old mode 100755 new mode 100644 index c14c1edc0..2acc6b678 --- a/cpp/src/neighbors/ivf_flat_c.cpp +++ b/cpp/src/neighbors/ivf_flat_c.cpp @@ -29,6 +29,8 @@ #include #include +#include + namespace { template diff --git a/cpp/src/neighbors/mg/mg.cuh b/cpp/src/neighbors/mg/mg.cuh index d3f635bc4..e9cdc30f6 100644 --- a/cpp/src/neighbors/mg/mg.cuh +++ b/cpp/src/neighbors/mg/mg.cuh @@ -25,6 +25,8 @@ #include #include +#include + namespace cuvs::neighbors { using namespace raft; diff --git a/examples/cpp/src/common.cuh b/examples/cpp/src/common.cuh index 1c93dec0e..8e109a764 100644 --- a/examples/cpp/src/common.cuh +++ b/examples/cpp/src/common.cuh @@ -14,6 +14,8 @@ * limitations under the License. */ +#pragma once + #include #include #include @@ -28,6 +30,8 @@ #include #include +#include + // Fill dataset and queries with synthetic data. void generate_dataset(raft::device_resources const &dev_resources, raft::device_matrix_view dataset, From 7ab2bfdd250613137a5622471212dab528319306 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Fri, 15 Nov 2024 12:16:17 -0500 Subject: [PATCH 3/4] Add `InnerProduct` and `CosineExpanded` metric support in NN Descent (#177) Closes #171 Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/177 --- cpp/CMakeLists.txt | 1 + cpp/include/cuvs/neighbors/nn_descent.hpp | 24 ++--- .../neighbors/detail/cagra/cagra_build.cuh | 12 ++- cpp/src/neighbors/detail/nn_descent.cuh | 87 +++++++++++++------ cpp/src/neighbors/nn_descent_index.cpp | 29 +++++++ cpp/test/neighbors/ann_cagra.cuh | 10 +-- cpp/test/neighbors/ann_nn_descent.cuh | 32 ++++--- python/cuvs/cuvs/test/test_cagra.py | 4 +- python/cuvs/cuvs/test/test_hnsw.py | 4 +- 9 files changed, 139 insertions(+), 64 deletions(-) create mode 100644 cpp/src/neighbors/nn_descent_index.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c493af488..81b82aa7b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -436,6 +436,7 @@ if(BUILD_SHARED_LIBS) src/neighbors/nn_descent.cu src/neighbors/nn_descent_float.cu src/neighbors/nn_descent_half.cu + src/neighbors/nn_descent_index.cpp src/neighbors/nn_descent_int8.cu src/neighbors/nn_descent_uint8.cu src/neighbors/reachability.cu diff --git a/cpp/include/cuvs/neighbors/nn_descent.hpp b/cpp/include/cuvs/neighbors/nn_descent.hpp index bd41d1ff7..9cd8192b5 100644 --- a/cpp/include/cuvs/neighbors/nn_descent.hpp +++ b/cpp/include/cuvs/neighbors/nn_descent.hpp @@ -61,11 +61,10 @@ struct index_params : cuvs::neighbors::index_params { /** @brief Construct NN descent parameters for a specific kNN graph degree * * @param graph_degree output graph degree + * @param metric distance metric to use */ - index_params(size_t graph_degree = 64) - : graph_degree(graph_degree), intermediate_graph_degree(1.5 * graph_degree) - { - } + index_params(size_t graph_degree = 64, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded); }; /** @@ -103,11 +102,16 @@ struct index : cuvs::neighbors::index { * @param n_rows number of rows in knn-graph * @param n_cols number of cols in knn-graph * @param return_distances whether to return distances + * @param metric distance metric to use */ - index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false) + index(raft::resources const& res, + int64_t n_rows, + int64_t n_cols, + bool return_distances = false, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) : cuvs::neighbors::index(), res_{res}, - metric_{cuvs::distance::DistanceType::L2Expanded}, + metric_{metric}, graph_{raft::make_host_matrix(n_rows, n_cols)}, graph_view_{graph_.view()}, return_distances_{return_distances} @@ -129,14 +133,16 @@ struct index : cuvs::neighbors::index { * @param graph_view raft::host_matrix_view for storing knn-graph * @param distances_view optional raft::device_matrix_view for storing * distances + * @param metric distance metric to use */ index(raft::resources const& res, raft::host_matrix_view graph_view, std::optional> distances_view = - std::nullopt) + std::nullopt, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) : cuvs::neighbors::index(), res_{res}, - metric_{cuvs::distance::DistanceType::L2Expanded}, + metric_{metric}, graph_{raft::make_host_matrix(0, 0)}, graph_view_{graph_view}, distances_view_{distances_view}, @@ -473,8 +479,6 @@ auto build(raft::resources const& res, std::optional> graph = std::nullopt) -> cuvs::neighbors::nn_descent::index; -/** @} */ - /** * @brief Test if we have enough GPU memory to run NN descent algorithm. * diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 6209ff819..b7fec724b 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -436,11 +436,11 @@ index build( auto knn_build_params = params.graph_build_params; if (std::holds_alternative(params.graph_build_params)) { // Heuristic to decide default build algo and its params. - if (params.metric == cuvs::distance::DistanceType::L2Expanded && - cuvs::neighbors::nn_descent::has_enough_device_memory( + if (cuvs::neighbors::nn_descent::has_enough_device_memory( res, dataset.extents(), sizeof(IdxT))) { RAFT_LOG_DEBUG("NN descent solver"); - knn_build_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); + knn_build_params = + cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } else { RAFT_LOG_DEBUG("Selecting IVF-PQ solver"); knn_build_params = cagra::graph_build_params::ivf_pq_params(dataset.extents(), params.metric); @@ -453,9 +453,6 @@ index build( std::get(knn_build_params); build_knn_graph(res, dataset, knn_graph->view(), ivf_pq_params); } else { - RAFT_EXPECTS( - params.metric == cuvs::distance::DistanceType::L2Expanded, - "L2Expanded is the only distance metrics supported for CAGRA build with nn_descent"); auto nn_descent_params = std::get(knn_build_params); @@ -466,7 +463,8 @@ index build( "nn-descent graph_degree.", nn_descent_params.graph_degree, intermediate_degree); - nn_descent_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); + nn_descent_params = + cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } // Use nn-descent to build CAGRA knn graph diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 883d82d76..c62a52540 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -19,6 +19,7 @@ #include "ann_utils.cuh" #include "cagra/device_common.hpp" +#include #include #include @@ -216,6 +217,7 @@ struct BuildConfig { size_t max_iterations{50}; float termination_threshold{0.0001}; size_t output_graph_degree{32}; + cuvs::distance::DistanceType metric{cuvs::distance::DistanceType::L2Expanded}; }; template @@ -454,11 +456,13 @@ __device__ __forceinline__ void load_vec(Data_t* vec_buffer, // TODO: Replace with RAFT utilities https://github.com/rapidsai/raft/issues/1827 /** Calculate L2 norm, and cast data to __half */ template -RAFT_KERNEL preprocess_data_kernel(const Data_t* input_data, - __half* output_data, - int dim, - DistData_t* l2_norms, - size_t list_offset = 0) +RAFT_KERNEL preprocess_data_kernel( + const Data_t* input_data, + __half* output_data, + int dim, + DistData_t* l2_norms, + size_t list_offset = 0, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) { extern __shared__ char buffer[]; __shared__ float l2_norm; @@ -468,26 +472,32 @@ RAFT_KERNEL preprocess_data_kernel(const Data_t* input_data, load_vec(s_vec, input_data + blockIdx.x * dim, dim, dim, threadIdx.x % raft::warp_size()); if (threadIdx.x == 0) { l2_norm = 0; } __syncthreads(); - int lane_id = threadIdx.x % raft::warp_size(); - for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { - int idx = step * raft::warp_size() + lane_id; - float part_dist = 0; - if (idx < dim) { - part_dist = s_vec[idx]; - part_dist = part_dist * part_dist; - } - __syncwarp(); - for (int offset = raft::warp_size() >> 1; offset >= 1; offset >>= 1) { - part_dist += __shfl_down_sync(raft::warp_full_mask(), part_dist, offset); + + if (metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::CosineExpanded) { + int lane_id = threadIdx.x % raft::warp_size(); + for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { + int idx = step * raft::warp_size() + lane_id; + float part_dist = 0; + if (idx < dim) { + part_dist = s_vec[idx]; + part_dist = part_dist * part_dist; + } + __syncwarp(); + for (int offset = raft::warp_size() >> 1; offset >= 1; offset >>= 1) { + part_dist += __shfl_down_sync(raft::warp_full_mask(), part_dist, offset); + } + if (lane_id == 0) { l2_norm += part_dist; } + __syncwarp(); } - if (lane_id == 0) { l2_norm += part_dist; } - __syncwarp(); } for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { int idx = step * raft::warp_size() + threadIdx.x; if (idx < dim) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { + output_data[list_id * dim + idx] = input_data[(size_t)blockIdx.x * dim + idx]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { output_data[list_id * dim + idx] = (float)input_data[(size_t)blockIdx.x * dim + idx] / sqrt(l2_norm); } else { @@ -715,7 +725,8 @@ __launch_bounds__(BLOCK_SIZE, 4) DistData_t* dists, int graph_width, int* locks, - DistData_t* l2_norms) + DistData_t* l2_norms, + cuvs::distance::DistanceType metric) { #if (__CUDA_ARCH__ >= 700) using namespace nvcuda; @@ -827,8 +838,10 @@ __launch_bounds__(BLOCK_SIZE, 4) for (int i = threadIdx.x; i < MAX_NUM_BI_SAMPLES * SKEWED_MAX_NUM_BI_SAMPLES; i += blockDim.x) { if (i % SKEWED_MAX_NUM_BI_SAMPLES < list_new_size && i / SKEWED_MAX_NUM_BI_SAMPLES < list_new_size) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { s_distances[i] = -s_distances[i]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + s_distances[i] = 1.0 - s_distances[i]; } else { s_distances[i] = l2_norms[new_neighbors[i % SKEWED_MAX_NUM_BI_SAMPLES]] + l2_norms[new_neighbors[i / SKEWED_MAX_NUM_BI_SAMPLES]] - @@ -906,8 +919,10 @@ __launch_bounds__(BLOCK_SIZE, 4) for (int i = threadIdx.x; i < MAX_NUM_BI_SAMPLES * SKEWED_MAX_NUM_BI_SAMPLES; i += blockDim.x) { if (i % SKEWED_MAX_NUM_BI_SAMPLES < list_old_size && i / SKEWED_MAX_NUM_BI_SAMPLES < list_new_size) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { s_distances[i] = -s_distances[i]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + s_distances[i] = 1.0 - s_distances[i]; } else { s_distances[i] = l2_norms[old_neighbors[i % SKEWED_MAX_NUM_BI_SAMPLES]] + l2_norms[new_neighbors[i / SKEWED_MAX_NUM_BI_SAMPLES]] - @@ -1161,7 +1176,7 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build ndim_(build_config.dataset_dim), d_data_{raft::make_device_matrix<__half, size_t, raft::row_major>( res, nrow_, build_config.dataset_dim)}, - l2_norms_{raft::make_device_vector(res, nrow_)}, + l2_norms_{raft::make_device_vector(res, 0)}, graph_buffer_{ raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, dists_buffer_{ @@ -1181,11 +1196,16 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build d_list_sizes_old_{raft::make_device_vector(res, nrow_)} { static_assert(NUM_SAMPLES <= 32); + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); auto graph_buffer_view = raft::make_device_matrix_view( reinterpret_cast(graph_buffer_.data_handle()), nrow_, DEGREE_ON_DEVICE); raft::matrix::fill(res, graph_buffer_view, std::numeric_limits::max()); raft::matrix::fill(res, d_locks_.view(), 0); + + if (build_config.metric == cuvs::distance::DistanceType::L2Expanded) { + l2_norms_ = raft::make_device_vector(res, nrow_); + } }; template @@ -1228,7 +1248,8 @@ void GNND::local_join(cudaStream_t stream) dists_buffer_.data_handle(), DEGREE_ON_DEVICE, d_locks_.data_handle(), - l2_norms_.data_handle()); + l2_norms_.data_handle(), + build_config_.metric); } template @@ -1261,7 +1282,8 @@ void GNND::build(Data_t* data, d_data_.data_handle(), build_config_.dataset_dim, l2_norms_.data_handle(), - batch.offset()); + batch.offset(), + build_config_.metric); } graph_.clear(); @@ -1417,6 +1439,11 @@ void build(raft::resources const& res, RAFT_EXPECTS(dataset.extent(0) < std::numeric_limits::max() - 1, "The dataset size for GNND should be less than %d", std::numeric_limits::max() - 1); + auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || + params.metric == cuvs::distance::DistanceType::CosineExpanded || + params.metric == cuvs::distance::DistanceType::InnerProduct; + RAFT_EXPECTS(allowed_metrics && idx.metric() == params.metric, + "The metric for NN Descent should be L2Expanded, CosineExpanded or InnerProduct"); size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -1452,7 +1479,8 @@ void build(raft::resources const& res, .internal_node_degree = extended_intermediate_degree, .max_iterations = params.max_iterations, .termination_threshold = params.termination_threshold, - .output_graph_degree = params.graph_degree}; + .output_graph_degree = params.graph_degree, + .metric = params.metric}; GNND nnd(res, build_config); @@ -1500,8 +1528,11 @@ index build( graph_degree = intermediate_degree; } - index idx{ - res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; + index idx{res, + dataset.extent(0), + static_cast(graph_degree), + params.return_distances, + params.metric}; build(res, params, dataset, idx); diff --git a/cpp/src/neighbors/nn_descent_index.cpp b/cpp/src/neighbors/nn_descent_index.cpp new file mode 100644 index 000000000..25d5b6af8 --- /dev/null +++ b/cpp/src/neighbors/nn_descent_index.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace cuvs::neighbors::nn_descent { + +index_params::index_params(size_t graph_degree, cuvs::distance::DistanceType metric) +{ + this->graph_degree = graph_degree; + this->intermediate_graph_degree = 1.5 * graph_degree; + this->metric = metric; +} +} // namespace cuvs::neighbors::nn_descent \ No newline at end of file diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 37d42dd1d..660246c67 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -361,8 +361,8 @@ class AnnCagraTest : public ::testing::TestWithParam { // not used for knn_graph building. switch (ps.build_algo) { case graph_build_algo::IVF_PQ: - index_params.graph_build_params = - graph_build_params::ivf_pq_params(raft::matrix_extent(ps.n_rows, ps.dim)); + index_params.graph_build_params = graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.n_rows, ps.dim), index_params.metric); if (ps.ivf_pq_search_refine_ratio) { std::get( index_params.graph_build_params) @@ -370,8 +370,8 @@ class AnnCagraTest : public ::testing::TestWithParam { } break; case graph_build_algo::NN_DESCENT: { - index_params.graph_build_params = - graph_build_params::nn_descent_params(index_params.intermediate_graph_degree); + index_params.graph_build_params = graph_build_params::nn_descent_params( + index_params.intermediate_graph_degree, index_params.metric); break; } case graph_build_algo::AUTO: @@ -389,7 +389,7 @@ class AnnCagraTest : public ::testing::TestWithParam { (const DataT*)database.data(), ps.n_rows, ps.dim); { - cagra::index index(handle_); + cagra::index index(handle_, index_params.metric); if (ps.host_dataset) { auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index 7d2575c2b..09861a219 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -27,6 +27,7 @@ #include #include "naive_knn.cuh" +#include #include @@ -107,7 +108,6 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); raft::resource::sync_stream(handle_); } - { { nn_descent::index_params index_params; @@ -124,6 +124,7 @@ class AnnNNDescentTest : public ::testing::TestWithParam { if (ps.host_dataset) { auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + raft::resource::sync_stream(handle_); auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); auto index = nn_descent::build(handle_, index_params, database_host_view); @@ -151,6 +152,13 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::resource::sync_stream(handle_); } + if (ps.metric == cuvs::distance::DistanceType::InnerProduct) { + std::transform( + distances_naive.begin(), distances_naive.end(), distances_naive.begin(), [](auto x) { + return -x; + }); + } + double min_recall = ps.min_recall; EXPECT_TRUE(eval_neighbours(indices_naive, indices_NNDescent, @@ -169,9 +177,11 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::random::RngState r(1234ULL); if constexpr (std::is_same{}) { raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); - } else { + } else if constexpr (std::is_same{}) { raft::random::uniformInt( - handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); + handle_, r, database.data(), ps.n_rows * ps.dim, DataT(-5), DataT(5)); + } else { + raft::random::uniformInt(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0), DataT(5)); } raft::resource::sync_stream(handle_); } @@ -308,13 +318,15 @@ class AnnNNDescentBatchTest : public ::testing::TestWithParam database; }; -const std::vector inputs = raft::util::itertools::product( - {1000, 2000}, // n_rows - {3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024}, // dim - {32, 64}, // graph_degree - {cuvs::distance::DistanceType::L2Expanded}, - {false, true}, - {0.90}); +const std::vector inputs = + raft::util::itertools::product({2000, 4000}, // n_rows + {4, 16, 64, 256, 1024}, // dim + {32, 64}, // graph_degree + {cuvs::distance::DistanceType::L2Expanded, + cuvs::distance::DistanceType::InnerProduct, + cuvs::distance::DistanceType::CosineExpanded}, + {false, true}, + {0.90}); // TODO : Investigate why this test is failing Reference issue https // : // github.com/rapidsai/raft/issues/2450 diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py index 92b88f013..56e132c23 100644 --- a/python/cuvs/cuvs/test/test_cagra.py +++ b/python/cuvs/cuvs/test/test_cagra.py @@ -122,8 +122,9 @@ def run_cagra_build_search_test( @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) @pytest.mark.parametrize("array_type", ["device", "host"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) +@pytest.mark.parametrize("metric", ["euclidean"]) def test_cagra_dataset_dtype_host_device( - dtype, array_type, inplace, build_algo + dtype, array_type, inplace, build_algo, metric ): # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only sqeuclidean metric here. @@ -132,6 +133,7 @@ def test_cagra_dataset_dtype_host_device( inplace=inplace, array_type=array_type, build_algo=build_algo, + metric=metric, ) diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py index 8bd2e8b76..20a35401e 100644 --- a/python/cuvs/cuvs/test/test_hnsw.py +++ b/python/cuvs/cuvs/test/test_hnsw.py @@ -41,8 +41,6 @@ def run_hnsw_build_search_test( pytest.skip( "inner_product metric is not supported for int8/uint8 data" ) - if build_algo == "nn_descent": - pytest.skip("inner_product metric is not supported for nn_descent") build_params = cagra.IndexParams( metric=metric, @@ -83,7 +81,7 @@ def run_hnsw_build_search_test( @pytest.mark.parametrize("k", [10, 20]) @pytest.mark.parametrize("ef", [30, 40]) @pytest.mark.parametrize("num_threads", [2, 4]) -@pytest.mark.parametrize("metric", ["sqeuclidean"]) +@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) def test_hnsw(dtype, k, ef, num_threads, metric, build_algo): # Note that inner_product tests use normalized input which we cannot From 7b879116684501f36ca5a19a74c01fcecb52e962 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 15 Nov 2024 16:12:42 -0600 Subject: [PATCH 4/4] use different wheel-size thresholds based on CUDA version (#469) `cuvs-cu11` wheels are significantly larger than `cuvs-cu12` wheels, because (among other reasons) they are not able to dynamically link to CUDA math library wheels. In #464, I proposed a size limit for CI checks of "max CUDA 11 wheel size + a buffer". This PR proposes using different thresholds based on CUDA major version, following these discussions: * https://github.com/rapidsai/cugraph/pull/4754#discussion_r1842526907 * https://github.com/rapidsai/cuml/pull/6136#discussion_r1841774811 Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Mike Sarahan (https://github.com/msarahan) URL: https://github.com/rapidsai/cuvs/pull/469 --- ci/validate_wheel.sh | 14 ++++++++++++++ python/cuvs/pyproject.toml | 4 +--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh index 5910a5c59..f2b235765 100755 --- a/ci/validate_wheel.sh +++ b/ci/validate_wheel.sh @@ -6,12 +6,26 @@ set -euo pipefail package_dir=$1 wheel_dir_relative_path=$2 +RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" + +# some packages are much larger on CUDA 11 than on CUDA 12 +if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '1.4G' + ) +else + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '950M' + ) +fi + cd "${package_dir}" rapids-logger "validate packages with 'pydistcheck'" pydistcheck \ --inspect \ + "${PYDISTCHECK_ARGS[@]}" \ "$(echo ${wheel_dir_relative_path}/*.whl)" rapids-logger "validate packages with 'twine'" diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index d40026776..92e4993c7 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -135,12 +135,10 @@ matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" [tool.pydistcheck] select = [ + # NOTE: size threshold is managed via CLI args in CI scripts "distro-too-large-compressed", ] -# detect when package size grows significantly -max_allowed_size_compressed = '1.4G' - [tool.pytest.ini_options] filterwarnings = [ "error",