diff --git a/ci/build_wheel_cuvs.sh b/ci/build_wheel_cuvs.sh index e03da9f19..444657cc0 100755 --- a/ci/build_wheel_cuvs.sh +++ b/ci/build_wheel_cuvs.sh @@ -3,6 +3,8 @@ set -euo pipefail +package_dir="python/cuvs" + case "${RAPIDS_CUDA_VERSION}" in 12.*) EXTRA_CMAKE_ARGS=";-DUSE_CUDA_MATH_WHEELS=ON" @@ -15,4 +17,5 @@ esac # Set up skbuild options. Enable sccache in skbuild config options export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUVS_CPP=OFF${EXTRA_CMAKE_ARGS}" -ci/build_wheel.sh cuvs python/cuvs +ci/build_wheel.sh cuvs ${package_dir} +ci/validate_wheel.sh ${package_dir} final_dist diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 000000000..f2b235765 --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir=$1 +wheel_dir_relative_path=$2 + +RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" + +# some packages are much larger on CUDA 11 than on CUDA 12 +if [[ "${RAPIDS_CUDA_MAJOR}" == "11" ]]; then + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '1.4G' + ) +else + PYDISTCHECK_ARGS=( + --max-allowed-size-compressed '950M' + ) +fi + +cd "${package_dir}" + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "${PYDISTCHECK_ARGS[@]}" \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c493af488..81b82aa7b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -436,6 +436,7 @@ if(BUILD_SHARED_LIBS) src/neighbors/nn_descent.cu src/neighbors/nn_descent_float.cu src/neighbors/nn_descent_half.cu + src/neighbors/nn_descent_index.cpp src/neighbors/nn_descent_int8.cu src/neighbors/nn_descent_uint8.cu src/neighbors/reachability.cu diff --git a/cpp/include/cuvs/neighbors/nn_descent.hpp b/cpp/include/cuvs/neighbors/nn_descent.hpp index bd41d1ff7..9cd8192b5 100644 --- a/cpp/include/cuvs/neighbors/nn_descent.hpp +++ b/cpp/include/cuvs/neighbors/nn_descent.hpp @@ -61,11 +61,10 @@ struct index_params : cuvs::neighbors::index_params { /** @brief Construct NN descent parameters for a specific kNN graph degree * * @param graph_degree output graph degree + * @param metric distance metric to use */ - index_params(size_t graph_degree = 64) - : graph_degree(graph_degree), intermediate_graph_degree(1.5 * graph_degree) - { - } + index_params(size_t graph_degree = 64, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded); }; /** @@ -103,11 +102,16 @@ struct index : cuvs::neighbors::index { * @param n_rows number of rows in knn-graph * @param n_cols number of cols in knn-graph * @param return_distances whether to return distances + * @param metric distance metric to use */ - index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false) + index(raft::resources const& res, + int64_t n_rows, + int64_t n_cols, + bool return_distances = false, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) : cuvs::neighbors::index(), res_{res}, - metric_{cuvs::distance::DistanceType::L2Expanded}, + metric_{metric}, graph_{raft::make_host_matrix(n_rows, n_cols)}, graph_view_{graph_.view()}, return_distances_{return_distances} @@ -129,14 +133,16 @@ struct index : cuvs::neighbors::index { * @param graph_view raft::host_matrix_view for storing knn-graph * @param distances_view optional raft::device_matrix_view for storing * distances + * @param metric distance metric to use */ index(raft::resources const& res, raft::host_matrix_view graph_view, std::optional> distances_view = - std::nullopt) + std::nullopt, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) : cuvs::neighbors::index(), res_{res}, - metric_{cuvs::distance::DistanceType::L2Expanded}, + metric_{metric}, graph_{raft::make_host_matrix(0, 0)}, graph_view_{graph_view}, distances_view_{distances_view}, @@ -473,8 +479,6 @@ auto build(raft::resources const& res, std::optional> graph = std::nullopt) -> cuvs::neighbors::nn_descent::index; -/** @} */ - /** * @brief Test if we have enough GPU memory to run NN descent algorithm. * diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp index 6985ff094..326a89665 100644 --- a/cpp/src/neighbors/cagra_c.cpp +++ b/cpp/src/neighbors/cagra_c.cpp @@ -29,6 +29,8 @@ #include #include +#include + namespace { template diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index 6209ff819..b7fec724b 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -436,11 +436,11 @@ index build( auto knn_build_params = params.graph_build_params; if (std::holds_alternative(params.graph_build_params)) { // Heuristic to decide default build algo and its params. - if (params.metric == cuvs::distance::DistanceType::L2Expanded && - cuvs::neighbors::nn_descent::has_enough_device_memory( + if (cuvs::neighbors::nn_descent::has_enough_device_memory( res, dataset.extents(), sizeof(IdxT))) { RAFT_LOG_DEBUG("NN descent solver"); - knn_build_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); + knn_build_params = + cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } else { RAFT_LOG_DEBUG("Selecting IVF-PQ solver"); knn_build_params = cagra::graph_build_params::ivf_pq_params(dataset.extents(), params.metric); @@ -453,9 +453,6 @@ index build( std::get(knn_build_params); build_knn_graph(res, dataset, knn_graph->view(), ivf_pq_params); } else { - RAFT_EXPECTS( - params.metric == cuvs::distance::DistanceType::L2Expanded, - "L2Expanded is the only distance metrics supported for CAGRA build with nn_descent"); auto nn_descent_params = std::get(knn_build_params); @@ -466,7 +463,8 @@ index build( "nn-descent graph_degree.", nn_descent_params.graph_degree, intermediate_degree); - nn_descent_params = cagra::graph_build_params::nn_descent_params(intermediate_degree); + nn_descent_params = + cagra::graph_build_params::nn_descent_params(intermediate_degree, params.metric); } // Use nn-descent to build CAGRA knn graph diff --git a/cpp/src/neighbors/detail/cagra/graph_core.cuh b/cpp/src/neighbors/detail/cagra/graph_core.cuh index 4253cb781..daeac82b9 100644 --- a/cpp/src/neighbors/detail/cagra/graph_core.cuh +++ b/cpp/src/neighbors/detail/cagra/graph_core.cuh @@ -156,6 +156,7 @@ __global__ void kern_prune(const IdxT* const knn_graph, // [graph_chunk_size, g // count number of detours (A->D->B) for (uint32_t kAD = 0; kAD < graph_degree - 1; kAD++) { const uint64_t iD = knn_graph[kAD + (graph_degree * iA)]; + if (iD >= graph_size) { continue; } for (uint32_t kDB = threadIdx.x; kDB < graph_degree; kDB += blockDim.x) { const uint64_t iB_candidate = knn_graph[kDB + ((uint64_t)graph_degree * iD)]; for (uint32_t kAB = kAD + 1; kAB < graph_degree; kAB++) { diff --git a/cpp/src/neighbors/detail/nn_descent.cuh b/cpp/src/neighbors/detail/nn_descent.cuh index 883d82d76..c62a52540 100644 --- a/cpp/src/neighbors/detail/nn_descent.cuh +++ b/cpp/src/neighbors/detail/nn_descent.cuh @@ -19,6 +19,7 @@ #include "ann_utils.cuh" #include "cagra/device_common.hpp" +#include #include #include @@ -216,6 +217,7 @@ struct BuildConfig { size_t max_iterations{50}; float termination_threshold{0.0001}; size_t output_graph_degree{32}; + cuvs::distance::DistanceType metric{cuvs::distance::DistanceType::L2Expanded}; }; template @@ -454,11 +456,13 @@ __device__ __forceinline__ void load_vec(Data_t* vec_buffer, // TODO: Replace with RAFT utilities https://github.com/rapidsai/raft/issues/1827 /** Calculate L2 norm, and cast data to __half */ template -RAFT_KERNEL preprocess_data_kernel(const Data_t* input_data, - __half* output_data, - int dim, - DistData_t* l2_norms, - size_t list_offset = 0) +RAFT_KERNEL preprocess_data_kernel( + const Data_t* input_data, + __half* output_data, + int dim, + DistData_t* l2_norms, + size_t list_offset = 0, + cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded) { extern __shared__ char buffer[]; __shared__ float l2_norm; @@ -468,26 +472,32 @@ RAFT_KERNEL preprocess_data_kernel(const Data_t* input_data, load_vec(s_vec, input_data + blockIdx.x * dim, dim, dim, threadIdx.x % raft::warp_size()); if (threadIdx.x == 0) { l2_norm = 0; } __syncthreads(); - int lane_id = threadIdx.x % raft::warp_size(); - for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { - int idx = step * raft::warp_size() + lane_id; - float part_dist = 0; - if (idx < dim) { - part_dist = s_vec[idx]; - part_dist = part_dist * part_dist; - } - __syncwarp(); - for (int offset = raft::warp_size() >> 1; offset >= 1; offset >>= 1) { - part_dist += __shfl_down_sync(raft::warp_full_mask(), part_dist, offset); + + if (metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::CosineExpanded) { + int lane_id = threadIdx.x % raft::warp_size(); + for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { + int idx = step * raft::warp_size() + lane_id; + float part_dist = 0; + if (idx < dim) { + part_dist = s_vec[idx]; + part_dist = part_dist * part_dist; + } + __syncwarp(); + for (int offset = raft::warp_size() >> 1; offset >= 1; offset >>= 1) { + part_dist += __shfl_down_sync(raft::warp_full_mask(), part_dist, offset); + } + if (lane_id == 0) { l2_norm += part_dist; } + __syncwarp(); } - if (lane_id == 0) { l2_norm += part_dist; } - __syncwarp(); } for (int step = 0; step < raft::ceildiv(dim, raft::warp_size()); step++) { int idx = step * raft::warp_size() + threadIdx.x; if (idx < dim) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { + output_data[list_id * dim + idx] = input_data[(size_t)blockIdx.x * dim + idx]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { output_data[list_id * dim + idx] = (float)input_data[(size_t)blockIdx.x * dim + idx] / sqrt(l2_norm); } else { @@ -715,7 +725,8 @@ __launch_bounds__(BLOCK_SIZE, 4) DistData_t* dists, int graph_width, int* locks, - DistData_t* l2_norms) + DistData_t* l2_norms, + cuvs::distance::DistanceType metric) { #if (__CUDA_ARCH__ >= 700) using namespace nvcuda; @@ -827,8 +838,10 @@ __launch_bounds__(BLOCK_SIZE, 4) for (int i = threadIdx.x; i < MAX_NUM_BI_SAMPLES * SKEWED_MAX_NUM_BI_SAMPLES; i += blockDim.x) { if (i % SKEWED_MAX_NUM_BI_SAMPLES < list_new_size && i / SKEWED_MAX_NUM_BI_SAMPLES < list_new_size) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { s_distances[i] = -s_distances[i]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + s_distances[i] = 1.0 - s_distances[i]; } else { s_distances[i] = l2_norms[new_neighbors[i % SKEWED_MAX_NUM_BI_SAMPLES]] + l2_norms[new_neighbors[i / SKEWED_MAX_NUM_BI_SAMPLES]] - @@ -906,8 +919,10 @@ __launch_bounds__(BLOCK_SIZE, 4) for (int i = threadIdx.x; i < MAX_NUM_BI_SAMPLES * SKEWED_MAX_NUM_BI_SAMPLES; i += blockDim.x) { if (i % SKEWED_MAX_NUM_BI_SAMPLES < list_old_size && i / SKEWED_MAX_NUM_BI_SAMPLES < list_new_size) { - if (l2_norms == nullptr) { + if (metric == cuvs::distance::DistanceType::InnerProduct) { s_distances[i] = -s_distances[i]; + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + s_distances[i] = 1.0 - s_distances[i]; } else { s_distances[i] = l2_norms[old_neighbors[i % SKEWED_MAX_NUM_BI_SAMPLES]] + l2_norms[new_neighbors[i / SKEWED_MAX_NUM_BI_SAMPLES]] - @@ -1161,7 +1176,7 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build ndim_(build_config.dataset_dim), d_data_{raft::make_device_matrix<__half, size_t, raft::row_major>( res, nrow_, build_config.dataset_dim)}, - l2_norms_{raft::make_device_vector(res, nrow_)}, + l2_norms_{raft::make_device_vector(res, 0)}, graph_buffer_{ raft::make_device_matrix(res, nrow_, DEGREE_ON_DEVICE)}, dists_buffer_{ @@ -1181,11 +1196,16 @@ GNND::GNND(raft::resources const& res, const BuildConfig& build d_list_sizes_old_{raft::make_device_vector(res, nrow_)} { static_assert(NUM_SAMPLES <= 32); + raft::matrix::fill(res, dists_buffer_.view(), std::numeric_limits::max()); auto graph_buffer_view = raft::make_device_matrix_view( reinterpret_cast(graph_buffer_.data_handle()), nrow_, DEGREE_ON_DEVICE); raft::matrix::fill(res, graph_buffer_view, std::numeric_limits::max()); raft::matrix::fill(res, d_locks_.view(), 0); + + if (build_config.metric == cuvs::distance::DistanceType::L2Expanded) { + l2_norms_ = raft::make_device_vector(res, nrow_); + } }; template @@ -1228,7 +1248,8 @@ void GNND::local_join(cudaStream_t stream) dists_buffer_.data_handle(), DEGREE_ON_DEVICE, d_locks_.data_handle(), - l2_norms_.data_handle()); + l2_norms_.data_handle(), + build_config_.metric); } template @@ -1261,7 +1282,8 @@ void GNND::build(Data_t* data, d_data_.data_handle(), build_config_.dataset_dim, l2_norms_.data_handle(), - batch.offset()); + batch.offset(), + build_config_.metric); } graph_.clear(); @@ -1417,6 +1439,11 @@ void build(raft::resources const& res, RAFT_EXPECTS(dataset.extent(0) < std::numeric_limits::max() - 1, "The dataset size for GNND should be less than %d", std::numeric_limits::max() - 1); + auto allowed_metrics = params.metric == cuvs::distance::DistanceType::L2Expanded || + params.metric == cuvs::distance::DistanceType::CosineExpanded || + params.metric == cuvs::distance::DistanceType::InnerProduct; + RAFT_EXPECTS(allowed_metrics && idx.metric() == params.metric, + "The metric for NN Descent should be L2Expanded, CosineExpanded or InnerProduct"); size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -1452,7 +1479,8 @@ void build(raft::resources const& res, .internal_node_degree = extended_intermediate_degree, .max_iterations = params.max_iterations, .termination_threshold = params.termination_threshold, - .output_graph_degree = params.graph_degree}; + .output_graph_degree = params.graph_degree, + .metric = params.metric}; GNND nnd(res, build_config); @@ -1500,8 +1528,11 @@ index build( graph_degree = intermediate_degree; } - index idx{ - res, dataset.extent(0), static_cast(graph_degree), params.return_distances}; + index idx{res, + dataset.extent(0), + static_cast(graph_degree), + params.return_distances, + params.metric}; build(res, params, dataset, idx); diff --git a/cpp/src/neighbors/iface/iface.hpp b/cpp/src/neighbors/iface/iface.hpp index a329db429..9b3da75a4 100644 --- a/cpp/src/neighbors/iface/iface.hpp +++ b/cpp/src/neighbors/iface/iface.hpp @@ -1,4 +1,20 @@ -#include +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once #include #include @@ -6,6 +22,9 @@ #include #include +#include +#include + namespace cuvs::neighbors { using namespace raft; @@ -16,7 +35,7 @@ void build(const raft::device_resources& handle, const cuvs::neighbors::index_params* index_params, raft::mdspan, row_major, Accessor> index_dataset) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { auto idx = cuvs::neighbors::ivf_flat::build( @@ -32,8 +51,6 @@ void build(const raft::device_resources& handle, interface.index_.emplace(std::move(idx)); } resource::sync_stream(handle); - - interface.mutex_->unlock(); } template @@ -44,7 +61,7 @@ void extend( std::optional, layout_c_contiguous, Accessor2>> new_indices) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { auto idx = @@ -58,8 +75,6 @@ void extend( RAFT_FAIL("CAGRA does not implement the extend method"); } resource::sync_stream(handle); - - interface.mutex_->unlock(); } template @@ -70,7 +85,7 @@ void search(const raft::device_resources& handle, raft::device_matrix_view neighbors, raft::device_matrix_view distances) { - // interface.mutex_->lock(); + // std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { cuvs::neighbors::ivf_flat::search( handle, @@ -94,9 +109,7 @@ void search(const raft::device_resources& handle, neighbors, distances); } - resource::sync_stream(handle); - - // interface.mutex_->unlock(); + // resource::sync_stream(handle); } // for MG ANN only @@ -108,7 +121,7 @@ void search(const raft::device_resources& handle, raft::device_matrix_view d_neighbors, raft::device_matrix_view d_distances) { - // interface.mutex_->lock(); + // std::lock_guard(*interface.mutex_); int64_t n_rows = h_queries.extent(0); int64_t n_dims = h_queries.extent(1); @@ -120,8 +133,6 @@ void search(const raft::device_resources& handle, auto d_query_view = raft::make_const_mdspan(d_queries.view()); search(handle, interface, search_params, d_query_view, d_neighbors, d_distances); - - // interface.mutex_->unlock(); } template @@ -129,7 +140,7 @@ void serialize(const raft::device_resources& handle, const cuvs::neighbors::iface& interface, std::ostream& os) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { ivf_flat::serialize(handle, os, interface.index_.value()); @@ -138,8 +149,6 @@ void serialize(const raft::device_resources& handle, } else if constexpr (std::is_same>::value) { cagra::serialize(handle, os, interface.index_.value(), true); } - - interface.mutex_->unlock(); } template @@ -147,7 +156,7 @@ void deserialize(const raft::device_resources& handle, cuvs::neighbors::iface& interface, std::istream& is) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); if constexpr (std::is_same>::value) { ivf_flat::index idx(handle); @@ -162,8 +171,6 @@ void deserialize(const raft::device_resources& handle, cagra::deserialize(handle, is, &idx); interface.index_.emplace(std::move(idx)); } - - interface.mutex_->unlock(); } template @@ -171,7 +178,7 @@ void deserialize(const raft::device_resources& handle, cuvs::neighbors::iface& interface, const std::string& filename) { - interface.mutex_->lock(); + std::lock_guard(*interface.mutex_); std::ifstream is(filename, std::ios::in | std::ios::binary); if (!is) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } @@ -191,8 +198,6 @@ void deserialize(const raft::device_resources& handle, } is.close(); - - interface.mutex_->unlock(); } -}; // namespace cuvs::neighbors \ No newline at end of file +}; // namespace cuvs::neighbors diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh index fb110d810..d6ffc1218 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_build.cuh @@ -132,6 +132,10 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels, { const IdxT i = IdxT(blockDim.x) * IdxT(blockIdx.x) + threadIdx.x; if (i >= n_rows) { return; } + auto source_ix = source_ixs == nullptr ? i + batch_offset : source_ixs[i]; + // In the context of refinement, some indices may be invalid (the generating NN algorithm does + // not return enough valid items). Do not add the item to the index in this case. + if (source_ix == ivf::kInvalidRecord || source_ix == raft::upper_bound()) { return; } auto list_id = labels[i]; auto inlist_id = atomicAdd(list_sizes_ptr + list_id, 1); @@ -139,7 +143,7 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels, auto* list_data = list_data_ptrs[list_id]; // Record the source vector id in the index - list_index[inlist_id] = source_ixs == nullptr ? i + batch_offset : source_ixs[i]; + list_index[inlist_id] = source_ix; // The data is written in interleaved groups of `index::kGroupSize` vectors using interleaved_group = raft::Pow2; @@ -151,7 +155,7 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels, // Point to the source vector if constexpr (gather_src) { - source_vecs += source_ixs[i] * dim; + source_vecs += source_ix * dim; } else { source_vecs += i * dim; } diff --git a/cpp/src/neighbors/ivf_flat_c.cpp b/cpp/src/neighbors/ivf_flat_c.cpp old mode 100755 new mode 100644 index c14c1edc0..2acc6b678 --- a/cpp/src/neighbors/ivf_flat_c.cpp +++ b/cpp/src/neighbors/ivf_flat_c.cpp @@ -29,6 +29,8 @@ #include #include +#include + namespace { template diff --git a/cpp/src/neighbors/mg/mg.cuh b/cpp/src/neighbors/mg/mg.cuh index d3f635bc4..e9cdc30f6 100644 --- a/cpp/src/neighbors/mg/mg.cuh +++ b/cpp/src/neighbors/mg/mg.cuh @@ -25,6 +25,8 @@ #include #include +#include + namespace cuvs::neighbors { using namespace raft; diff --git a/cpp/src/neighbors/nn_descent_index.cpp b/cpp/src/neighbors/nn_descent_index.cpp new file mode 100644 index 000000000..25d5b6af8 --- /dev/null +++ b/cpp/src/neighbors/nn_descent_index.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +namespace cuvs::neighbors::nn_descent { + +index_params::index_params(size_t graph_degree, cuvs::distance::DistanceType metric) +{ + this->graph_degree = graph_degree; + this->intermediate_graph_degree = 1.5 * graph_degree; + this->metric = metric; +} +} // namespace cuvs::neighbors::nn_descent \ No newline at end of file diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 1ed8466b3..7754a5043 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -137,6 +137,7 @@ if(BUILD_TESTS) NAME NEIGHBORS_ANN_CAGRA_TEST PATH + neighbors/ann_cagra/bug_extreme_inputs_oob.cu neighbors/ann_cagra/bug_multi_cta_crash.cu neighbors/ann_cagra/test_float_uint32_t.cu neighbors/ann_cagra/test_half_uint32_t.cu diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 37d42dd1d..660246c67 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -361,8 +361,8 @@ class AnnCagraTest : public ::testing::TestWithParam { // not used for knn_graph building. switch (ps.build_algo) { case graph_build_algo::IVF_PQ: - index_params.graph_build_params = - graph_build_params::ivf_pq_params(raft::matrix_extent(ps.n_rows, ps.dim)); + index_params.graph_build_params = graph_build_params::ivf_pq_params( + raft::matrix_extent(ps.n_rows, ps.dim), index_params.metric); if (ps.ivf_pq_search_refine_ratio) { std::get( index_params.graph_build_params) @@ -370,8 +370,8 @@ class AnnCagraTest : public ::testing::TestWithParam { } break; case graph_build_algo::NN_DESCENT: { - index_params.graph_build_params = - graph_build_params::nn_descent_params(index_params.intermediate_graph_degree); + index_params.graph_build_params = graph_build_params::nn_descent_params( + index_params.intermediate_graph_degree, index_params.metric); break; } case graph_build_algo::AUTO: @@ -389,7 +389,7 @@ class AnnCagraTest : public ::testing::TestWithParam { (const DataT*)database.data(), ps.n_rows, ps.dim); { - cagra::index index(handle_); + cagra::index index(handle_, index_params.metric); if (ps.host_dataset) { auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); diff --git a/cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu b/cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu new file mode 100644 index 000000000..e21a54e9e --- /dev/null +++ b/cpp/test/neighbors/ann_cagra/bug_extreme_inputs_oob.cu @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include +#include + +#include + +namespace cuvs::neighbors::cagra { + +class cagra_extreme_inputs_oob_test : public ::testing::Test { + public: + using data_type = float; + + protected: + void run() + { + cagra::index_params ix_ps; + graph_build_params::ivf_pq_params gb_params{}; + gb_params.refinement_rate = 2; + ix_ps.graph_build_params = gb_params; + ix_ps.graph_degree = 64; + ix_ps.intermediate_graph_degree = 128; + + [[maybe_unused]] auto ix = cagra::build(res, ix_ps, raft::make_const_mdspan(dataset->view())); + raft::resource::sync_stream(res); + } + + void SetUp() override + { + dataset.emplace(raft::make_device_matrix(res, n_samples, n_dim)); + raft::random::RngState r(1234ULL); + raft::random::normal( + res, r, dataset->data_handle(), n_samples * n_dim, data_type(0), data_type(1e20)); + raft::resource::sync_stream(res); + } + + void TearDown() override + { + dataset.reset(); + raft::resource::sync_stream(res); + } + + private: + raft::resources res; + std::optional> dataset = std::nullopt; + + constexpr static int64_t n_samples = 100000; + constexpr static int64_t n_dim = 200; + constexpr static cuvs::distance::DistanceType metric = cuvs::distance::DistanceType::L2Expanded; +}; + +TEST_F(cagra_extreme_inputs_oob_test, cagra_extreme_inputs_oob_test) { this->run(); } + +} // namespace cuvs::neighbors::cagra diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index 7d2575c2b..09861a219 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -27,6 +27,7 @@ #include #include "naive_knn.cuh" +#include #include @@ -107,7 +108,6 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::update_host(distances_naive.data(), distances_naive_dev.data(), queries_size, stream_); raft::resource::sync_stream(handle_); } - { { nn_descent::index_params index_params; @@ -124,6 +124,7 @@ class AnnNNDescentTest : public ::testing::TestWithParam { if (ps.host_dataset) { auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); + raft::resource::sync_stream(handle_); auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); auto index = nn_descent::build(handle_, index_params, database_host_view); @@ -151,6 +152,13 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::resource::sync_stream(handle_); } + if (ps.metric == cuvs::distance::DistanceType::InnerProduct) { + std::transform( + distances_naive.begin(), distances_naive.end(), distances_naive.begin(), [](auto x) { + return -x; + }); + } + double min_recall = ps.min_recall; EXPECT_TRUE(eval_neighbours(indices_naive, indices_NNDescent, @@ -169,9 +177,11 @@ class AnnNNDescentTest : public ::testing::TestWithParam { raft::random::RngState r(1234ULL); if constexpr (std::is_same{}) { raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); - } else { + } else if constexpr (std::is_same{}) { raft::random::uniformInt( - handle_, r, database.data(), ps.n_rows * ps.dim, DataT(1), DataT(20)); + handle_, r, database.data(), ps.n_rows * ps.dim, DataT(-5), DataT(5)); + } else { + raft::random::uniformInt(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0), DataT(5)); } raft::resource::sync_stream(handle_); } @@ -308,13 +318,15 @@ class AnnNNDescentBatchTest : public ::testing::TestWithParam database; }; -const std::vector inputs = raft::util::itertools::product( - {1000, 2000}, // n_rows - {3, 5, 7, 8, 17, 64, 128, 137, 192, 256, 512, 619, 1024}, // dim - {32, 64}, // graph_degree - {cuvs::distance::DistanceType::L2Expanded}, - {false, true}, - {0.90}); +const std::vector inputs = + raft::util::itertools::product({2000, 4000}, // n_rows + {4, 16, 64, 256, 1024}, // dim + {32, 64}, // graph_degree + {cuvs::distance::DistanceType::L2Expanded, + cuvs::distance::DistanceType::InnerProduct, + cuvs::distance::DistanceType::CosineExpanded}, + {false, true}, + {0.90}); // TODO : Investigate why this test is failing Reference issue https // : // github.com/rapidsai/raft/issues/2450 diff --git a/examples/cpp/src/common.cuh b/examples/cpp/src/common.cuh index 1c93dec0e..8e109a764 100644 --- a/examples/cpp/src/common.cuh +++ b/examples/cpp/src/common.cuh @@ -14,6 +14,8 @@ * limitations under the License. */ +#pragma once + #include #include #include @@ -28,6 +30,8 @@ #include #include +#include + // Fill dataset and queries with synthetic data. void generate_dataset(raft::device_resources const &dev_resources, raft::device_matrix_view dataset, diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 559302ccc..9d1d24eae 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -31,9 +31,9 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array from cuvs.common.c_api cimport cuvsResources_t diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx index 95209dbeb..752aef741 100644 --- a/python/cuvs/cuvs/neighbors/cagra/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -32,7 +32,8 @@ from cuvs.common cimport cydlpack from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array + +from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( int8_t, diff --git a/python/cuvs/cuvs/neighbors/common.py b/python/cuvs/cuvs/neighbors/common.py new file mode 100644 index 000000000..c14b9f8c9 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/common.py @@ -0,0 +1,36 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def _check_input_array(cai, exp_dt, exp_rows=None, exp_cols=None): + if cai.dtype not in exp_dt: + raise TypeError("dtype %s not supported" % cai.dtype) + + if not cai.c_contiguous: + raise ValueError("Row major input is expected") + + if exp_cols is not None and cai.shape[1] != exp_cols: + raise ValueError( + "Incorrect number of columns, expected {} got {}".format( + exp_cols, cai.shape[1] + ) + ) + + if exp_rows is not None and cai.shape[0] != exp_rows: + raise ValueError( + "Incorrect number of rows, expected {} , got {}".format( + exp_rows, cai.shape[0] + ) + ) diff --git a/python/cuvs/cuvs/neighbors/filters/filters.pyx b/python/cuvs/cuvs/neighbors/filters/filters.pyx index 3a81cb786..9bc2a905c 100644 --- a/python/cuvs/cuvs/neighbors/filters/filters.pyx +++ b/python/cuvs/cuvs/neighbors/filters/filters.pyx @@ -20,11 +20,11 @@ import numpy as np from libc.stdint cimport uintptr_t from cuvs.common cimport cydlpack +from cuvs.neighbors.common import _check_input_array from .filters cimport BITMAP, NO_FILTER, cuvsFilter from pylibraft.common.cai_wrapper import wrap_array -from pylibraft.neighbors.common import _check_input_array cdef class Prefilter: diff --git a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx index 018fcfef9..bcfaf167e 100644 --- a/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx +++ b/python/cuvs/cuvs/neighbors/hnsw/hnsw.pyx @@ -21,6 +21,7 @@ from libcpp.string cimport string from cuvs.common.exceptions import check_cuvs from cuvs.common.resources import auto_sync_resources +from cuvs.neighbors.common import _check_input_array from cuvs.common cimport cydlpack @@ -36,7 +37,6 @@ import uuid from pylibraft.common import auto_convert_output from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array cdef class SearchParams: diff --git a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx index 25b9b2aee..7a169e1a0 100644 --- a/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_flat/ivf_flat.pyx @@ -31,9 +31,9 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( int8_t, diff --git a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx index 3add1df75..531302ee6 100644 --- a/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx +++ b/python/cuvs/cuvs/neighbors/ivf_pq/ivf_pq.pyx @@ -31,9 +31,9 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES +from cuvs.neighbors.common import _check_input_array from libc.stdint cimport ( int8_t, diff --git a/python/cuvs/cuvs/neighbors/refine.pyx b/python/cuvs/cuvs/neighbors/refine.pyx index 0eccc4108..b7aa35dca 100644 --- a/python/cuvs/cuvs/neighbors/refine.pyx +++ b/python/cuvs/cuvs/neighbors/refine.pyx @@ -31,13 +31,13 @@ from cuvs.distance_type cimport cuvsDistanceType from pylibraft.common import auto_convert_output, device_ndarray from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -from pylibraft.neighbors.common import _check_input_array from cuvs.distance import DISTANCE_TYPES from cuvs.common.c_api cimport cuvsResources_t from cuvs.common.exceptions import check_cuvs +from cuvs.neighbors.common import _check_input_array @auto_sync_resources diff --git a/python/cuvs/cuvs/test/test_cagra.py b/python/cuvs/cuvs/test/test_cagra.py index 92b88f013..56e132c23 100644 --- a/python/cuvs/cuvs/test/test_cagra.py +++ b/python/cuvs/cuvs/test/test_cagra.py @@ -122,8 +122,9 @@ def run_cagra_build_search_test( @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8]) @pytest.mark.parametrize("array_type", ["device", "host"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) +@pytest.mark.parametrize("metric", ["euclidean"]) def test_cagra_dataset_dtype_host_device( - dtype, array_type, inplace, build_algo + dtype, array_type, inplace, build_algo, metric ): # Note that inner_product tests use normalized input which we cannot # represent in int8, therefore we test only sqeuclidean metric here. @@ -132,6 +133,7 @@ def test_cagra_dataset_dtype_host_device( inplace=inplace, array_type=array_type, build_algo=build_algo, + metric=metric, ) diff --git a/python/cuvs/cuvs/test/test_hnsw.py b/python/cuvs/cuvs/test/test_hnsw.py index 8bd2e8b76..20a35401e 100644 --- a/python/cuvs/cuvs/test/test_hnsw.py +++ b/python/cuvs/cuvs/test/test_hnsw.py @@ -41,8 +41,6 @@ def run_hnsw_build_search_test( pytest.skip( "inner_product metric is not supported for int8/uint8 data" ) - if build_algo == "nn_descent": - pytest.skip("inner_product metric is not supported for nn_descent") build_params = cagra.IndexParams( metric=metric, @@ -83,7 +81,7 @@ def run_hnsw_build_search_test( @pytest.mark.parametrize("k", [10, 20]) @pytest.mark.parametrize("ef", [30, 40]) @pytest.mark.parametrize("num_threads", [2, 4]) -@pytest.mark.parametrize("metric", ["sqeuclidean"]) +@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"]) @pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"]) def test_hnsw(dtype, k, ef, num_threads, metric, build_algo): # Note that inner_product tests use normalized input which we cannot diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 30d784c67..92e4993c7 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -133,6 +133,12 @@ build-backend = "scikit_build_core.build" dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" +[tool.pydistcheck] +select = [ + # NOTE: size threshold is managed via CLI args in CI scripts + "distro-too-large-compressed", +] + [tool.pytest.ini_options] filterwarnings = [ "error",