From 25178260d0f1cc6408ba9ee131c904cd1ce94613 Mon Sep 17 00:00:00 2001 From: rhdong Date: Mon, 29 Jul 2024 13:37:24 -0700 Subject: [PATCH] [FEA] expose python & C API for prefiltered brute force (#174) Authors: - rhdong (https://github.com/rhdong) - Ben Frederickson (https://github.com/benfred) Approvers: - Ben Frederickson (https://github.com/benfred) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/cuvs/pull/174 --- cpp/include/cuvs/neighbors/brute_force.h | 12 +- cpp/include/cuvs/neighbors/brute_force.hpp | 9 +- cpp/include/cuvs/neighbors/common.h | 61 +++ cpp/src/neighbors/brute_force_c.cpp | 34 +- cpp/test/neighbors/ann_cagra_c.cu | 29 +- cpp/test/neighbors/ann_ivf_flat_c.cu | 39 +- cpp/test/neighbors/ann_ivf_pq_c.cu | 39 +- cpp/test/neighbors/brute_force_c.cu | 373 +++++++++++++++++- cpp/test/neighbors/run_brute_force_c.c | 26 +- python/cuvs/cuvs/neighbors/CMakeLists.txt | 1 + python/cuvs/cuvs/neighbors/__init__.py | 11 +- .../neighbors/brute_force/brute_force.pxd | 4 +- .../neighbors/brute_force/brute_force.pyx | 48 ++- .../cuvs/neighbors/filters/CMakeLists.txt | 25 ++ .../cuvs/cuvs/neighbors/filters/__init__.pxd | 0 .../cuvs/cuvs/neighbors/filters/__init__.py | 18 + .../cuvs/cuvs/neighbors/filters/filters.pxd | 30 ++ .../cuvs/cuvs/neighbors/filters/filters.pyx | 97 +++++ python/cuvs/cuvs/test/test_brute_force.py | 98 ++++- rust/cuvs/src/brute_force.rs | 22 +- 20 files changed, 865 insertions(+), 111 deletions(-) create mode 100644 cpp/include/cuvs/neighbors/common.h create mode 100644 python/cuvs/cuvs/neighbors/filters/CMakeLists.txt create mode 100644 python/cuvs/cuvs/neighbors/filters/__init__.pxd create mode 100644 python/cuvs/cuvs/neighbors/filters/__init__.py create mode 100644 python/cuvs/cuvs/neighbors/filters/filters.pxd create mode 100644 python/cuvs/cuvs/neighbors/filters/filters.pyx diff --git a/cpp/include/cuvs/neighbors/brute_force.h b/cpp/include/cuvs/neighbors/brute_force.h index e285eae37..c9e172f62 100644 --- a/cpp/include/cuvs/neighbors/brute_force.h +++ b/cpp/include/cuvs/neighbors/brute_force.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -135,9 +136,13 @@ cuvsError_t cuvsBruteForceBuild(cuvsResources_t res, * DLManagedTensor dataset; * DLManagedTensor queries; * DLManagedTensor neighbors; + * DLManagedTensor bitmap; + * + * cuvsFilter prefilter{(uintptr_t)&bitmap, BITMAP}; * * // Search the `index` built using `cuvsBruteForceBuild` - * cuvsError_t search_status = cuvsBruteForceSearch(res, index, &queries, &neighbors, &distances); + * cuvsError_t search_status = cuvsBruteForceSearch(res, index, &queries, &neighbors, &distances, + * prefilter); * * // de-allocate `res` * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); @@ -148,12 +153,15 @@ cuvsError_t cuvsBruteForceBuild(cuvsResources_t res, * @param[in] queries DLManagedTensor* queries dataset to search * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries * @param[out] distances DLManagedTensor* output `k` distances for queries + * @param[in] prefilter cuvsFilter input prefilter that can be used + to filter queries and neighbors based on the given bitmap. */ cuvsError_t cuvsBruteForceSearch(cuvsResources_t res, cuvsBruteForceIndex_t index, DLManagedTensor* queries, DLManagedTensor* neighbors, - DLManagedTensor* distances); + DLManagedTensor* distances, + cuvsFilter prefilter); /** * @} */ diff --git a/cpp/include/cuvs/neighbors/brute_force.hpp b/cpp/include/cuvs/neighbors/brute_force.hpp index 1ec7e81f7..0a5071b6d 100644 --- a/cpp/include/cuvs/neighbors/brute_force.hpp +++ b/cpp/include/cuvs/neighbors/brute_force.hpp @@ -165,7 +165,7 @@ struct index : cuvs::neighbors::index { * @param[in] metric cuvs::distance::DistanceType * @param[in] metric_arg metric argument * - * @return the constructed bruteforce index + * @return the constructed brute-force index */ auto build(raft::resources const& handle, raft::device_matrix_view dataset, @@ -221,13 +221,14 @@ auto build(raft::resources const& handle, * @endcode * * @param[in] handle - * @param[in] index bruteforce constructed index + * @param[in] index brute-force constructed index * @param[in] queries a device pointer to a row-major matrix [n_queries, index->dim()] * @param[out] neighbors a device pointer to the indices of the neighbors in the source dataset * [n_queries, k] * @param[out] distances a device pointer to the distances to the selected neighbors [n_queries, k] - * @param[in] sample_filter an optional device bitmap filter function that greenlights samples for a - * given query + * @param[in] sample_filter An optional device bitmap filter function with a `row-major` layout and + * the shape of [n_queries, index->size()], which means the filter will use the first + * `index->size()` bits to indicate whether queries[0] should compute the distance with dataset. */ void search(raft::resources const& handle, const cuvs::neighbors::brute_force::index& index, diff --git a/cpp/include/cuvs/neighbors/common.h b/cpp/include/cuvs/neighbors/common.h new file mode 100644 index 000000000..02cbeea96 --- /dev/null +++ b/cpp/include/cuvs/neighbors/common.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @defgroup filters Filters APIs + * @brief APIs related to filter functionality. + * @{ + */ + +/** + * @brief Enum to denote filter type. + */ +enum cuvsFilterType { + /* No filter */ + NO_FILTER, + /* Filter an index with a bitset */ + BITSET, + /* Filter an index with a bitmap */ + BITMAP +}; + +/** + * @brief Struct to hold address of cuvs::neighbor::prefilter and its type + * + */ +typedef struct { + uintptr_t addr; + enum cuvsFilterType type; +} cuvsFilter; + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif diff --git a/cpp/src/neighbors/brute_force_c.cpp b/cpp/src/neighbors/brute_force_c.cpp index 5f04ffa34..f3ca2e730 100644 --- a/cpp/src/neighbors/brute_force_c.cpp +++ b/cpp/src/neighbors/brute_force_c.cpp @@ -27,6 +27,7 @@ #include #include #include +#include namespace { @@ -53,7 +54,8 @@ void _search(cuvsResources_t res, cuvsBruteForceIndex index, DLManagedTensor* queries_tensor, DLManagedTensor* neighbors_tensor, - DLManagedTensor* distances_tensor) + DLManagedTensor* distances_tensor, + cuvsFilter prefilter) { auto res_ptr = reinterpret_cast(res); auto index_ptr = reinterpret_cast*>(index.addr); @@ -61,12 +63,29 @@ void _search(cuvsResources_t res, using queries_mdspan_type = raft::device_matrix_view; using neighbors_mdspan_type = raft::device_matrix_view; using distances_mdspan_type = raft::device_matrix_view; - auto queries_mds = cuvs::core::from_dlpack(queries_tensor); - auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); - auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + using prefilter_mds_type = raft::device_vector_view; + using prefilter_opt_type = cuvs::core::bitmap_view; + + auto queries_mds = cuvs::core::from_dlpack(queries_tensor); + auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + + std::optional> filter_opt; + + if (prefilter.type == NO_FILTER) { + filter_opt = std::nullopt; + } else { + auto prefilter_ptr = reinterpret_cast(prefilter.addr); + auto prefilter_mds = cuvs::core::from_dlpack(prefilter_ptr); + auto prefilter_view = prefilter_opt_type((const uint32_t*)prefilter_mds.data_handle(), + queries_mds.extent(0), + index_ptr->dataset().extent(0)); + + filter_opt = std::make_optional(prefilter_view); + } cuvs::neighbors::brute_force::search( - *res_ptr, *index_ptr, queries_mds, neighbors_mds, distances_mds, std::nullopt); + *res_ptr, *index_ptr, queries_mds, neighbors_mds, distances_mds, filter_opt); } } // namespace @@ -120,7 +139,8 @@ extern "C" cuvsError_t cuvsBruteForceSearch(cuvsResources_t res, cuvsBruteForceIndex_t index_c_ptr, DLManagedTensor* queries_tensor, DLManagedTensor* neighbors_tensor, - DLManagedTensor* distances_tensor) + DLManagedTensor* distances_tensor, + cuvsFilter prefilter) { return cuvs::core::translate_exceptions([=] { auto queries = queries_tensor->dl_tensor; @@ -143,7 +163,7 @@ extern "C" cuvsError_t cuvsBruteForceSearch(cuvsResources_t res, RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries"); if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { - _search(res, index, queries_tensor, neighbors_tensor, distances_tensor); + _search(res, index, queries_tensor, neighbors_tensor, distances_tensor, prefilter); } else { RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", queries.dtype.code, diff --git a/cpp/test/neighbors/ann_cagra_c.cu b/cpp/test/neighbors/ann_cagra_c.cu index 6e3a3cbd1..599d2d842 100644 --- a/cpp/test/neighbors/ann_cagra_c.cu +++ b/cpp/test/neighbors/ann_cagra_c.cu @@ -42,6 +42,8 @@ TEST(CagraC, BuildSearch) // create cuvsResources_t cuvsResources_t res; cuvsResourcesCreate(&res); + cudaStream_t stream; + cuvsStreamGet(res, &stream); // create dataset DLTensor DLManagedTensor dataset_tensor; @@ -65,12 +67,11 @@ TEST(CagraC, BuildSearch) cuvsCagraBuild(res, build_params, &dataset_tensor, index); // create queries DLTensor - float* queries_d; - cudaMalloc(&queries_d, sizeof(float) * 4 * 2); - cudaMemcpy(queries_d, queries, sizeof(float) * 4 * 2, cudaMemcpyDefault); + rmm::device_uvector queries_d(4 * 2, stream); + raft::copy(queries_d.data(), (float*)queries, 4 * 2, stream); DLManagedTensor queries_tensor; - queries_tensor.dl_tensor.data = queries_d; + queries_tensor.dl_tensor.data = queries_d.data(); queries_tensor.dl_tensor.device.device_type = kDLCUDA; queries_tensor.dl_tensor.ndim = 2; queries_tensor.dl_tensor.dtype.code = kDLFloat; @@ -81,11 +82,10 @@ TEST(CagraC, BuildSearch) queries_tensor.dl_tensor.strides = nullptr; // create neighbors DLTensor - uint32_t* neighbors_d; - cudaMalloc(&neighbors_d, sizeof(uint32_t) * 4); + rmm::device_uvector neighbors_d(4, stream); DLManagedTensor neighbors_tensor; - neighbors_tensor.dl_tensor.data = neighbors_d; + neighbors_tensor.dl_tensor.data = neighbors_d.data(); neighbors_tensor.dl_tensor.device.device_type = kDLCUDA; neighbors_tensor.dl_tensor.ndim = 2; neighbors_tensor.dl_tensor.dtype.code = kDLUInt; @@ -96,11 +96,10 @@ TEST(CagraC, BuildSearch) neighbors_tensor.dl_tensor.strides = nullptr; // create distances DLTensor - float* distances_d; - cudaMalloc(&distances_d, sizeof(float) * 4); + rmm::device_uvector distances_d(4, stream); DLManagedTensor distances_tensor; - distances_tensor.dl_tensor.data = distances_d; + distances_tensor.dl_tensor.data = distances_d.data(); distances_tensor.dl_tensor.device.device_type = kDLCUDA; distances_tensor.dl_tensor.ndim = 2; distances_tensor.dl_tensor.dtype.code = kDLFloat; @@ -116,14 +115,10 @@ TEST(CagraC, BuildSearch) cuvsCagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor); // verify output - ASSERT_TRUE(cuvs::devArrMatchHost(neighbors_exp, neighbors_d, 4, cuvs::Compare())); ASSERT_TRUE( - cuvs::devArrMatchHost(distances_exp, distances_d, 4, cuvs::CompareApprox(0.001f))); - - // delete device memory - cudaFree(queries_d); - cudaFree(neighbors_d); - cudaFree(distances_d); + cuvs::devArrMatchHost(neighbors_exp, neighbors_d.data(), 4, cuvs::Compare())); + ASSERT_TRUE(cuvs::devArrMatchHost( + distances_exp, distances_d.data(), 4, cuvs::CompareApprox(0.001f))); // de-allocate index and res cuvsCagraSearchParamsDestroy(search_params); diff --git a/cpp/test/neighbors/ann_ivf_flat_c.cu b/cpp/test/neighbors/ann_ivf_flat_c.cu index 784418860..8c23e6cff 100644 --- a/cpp/test/neighbors/ann_ivf_flat_c.cu +++ b/cpp/test/neighbors/ann_ivf_flat_c.cu @@ -101,36 +101,37 @@ TEST(IvfFlatC, BuildSearch) int64_t n_dim = 32; uint32_t n_neighbors = 8; + raft::handle_t handle; + auto stream = raft::resource::get_cuda_stream(handle); + cuvsDistanceType metric = L2Expanded; size_t n_probes = 20; size_t n_lists = 1024; - float *index_data, *query_data, *distances_data; - int64_t* neighbors_data; - cudaMalloc(&index_data, sizeof(float) * n_rows * n_dim); - cudaMalloc(&query_data, sizeof(float) * n_queries * n_dim); - cudaMalloc(&neighbors_data, sizeof(int64_t) * n_queries * n_neighbors); - cudaMalloc(&distances_data, sizeof(float) * n_queries * n_neighbors); + rmm::device_uvector index_data(n_rows * n_dim, stream); + rmm::device_uvector query_data(n_queries * n_dim, stream); + rmm::device_uvector neighbors_data(n_queries * n_neighbors, stream); + rmm::device_uvector distances_data(n_queries * n_neighbors, stream); - generate_random_data(index_data, n_rows * n_dim); - generate_random_data(query_data, n_queries * n_dim); + generate_random_data(index_data.data(), n_rows * n_dim); + generate_random_data(query_data.data(), n_queries * n_dim); run_ivf_flat(n_rows, n_queries, n_dim, n_neighbors, - index_data, - query_data, - distances_data, - neighbors_data, + index_data.data(), + query_data.data(), + distances_data.data(), + neighbors_data.data(), metric, n_probes, n_lists); - recall_eval(query_data, - index_data, - neighbors_data, - distances_data, + recall_eval(query_data.data(), + index_data.data(), + neighbors_data.data(), + distances_data.data(), n_queries, n_rows, n_dim, @@ -138,10 +139,4 @@ TEST(IvfFlatC, BuildSearch) metric, n_probes, n_lists); - - // delete device memory - cudaFree(index_data); - cudaFree(query_data); - cudaFree(neighbors_data); - cudaFree(distances_data); } diff --git a/cpp/test/neighbors/ann_ivf_pq_c.cu b/cpp/test/neighbors/ann_ivf_pq_c.cu index 88cd1bd93..21e4805b9 100644 --- a/cpp/test/neighbors/ann_ivf_pq_c.cu +++ b/cpp/test/neighbors/ann_ivf_pq_c.cu @@ -101,36 +101,37 @@ TEST(IvfPqC, BuildSearch) int64_t n_dim = 32; uint32_t n_neighbors = 8; + raft::handle_t handle; + auto stream = raft::resource::get_cuda_stream(handle); + cuvsDistanceType metric = L2Expanded; size_t n_probes = 20; size_t n_lists = 1024; - float *index_data, *query_data, *distances_data; - int64_t* neighbors_data; - cudaMalloc(&index_data, sizeof(float) * n_rows * n_dim); - cudaMalloc(&query_data, sizeof(float) * n_queries * n_dim); - cudaMalloc(&neighbors_data, sizeof(int64_t) * n_queries * n_neighbors); - cudaMalloc(&distances_data, sizeof(float) * n_queries * n_neighbors); + rmm::device_uvector index_data(n_rows * n_dim, stream); + rmm::device_uvector query_data(n_queries * n_dim, stream); + rmm::device_uvector neighbors_data(n_queries * n_neighbors, stream); + rmm::device_uvector distances_data(n_queries * n_neighbors, stream); - generate_random_data(index_data, n_rows * n_dim); - generate_random_data(query_data, n_queries * n_dim); + generate_random_data(index_data.data(), n_rows * n_dim); + generate_random_data(query_data.data(), n_queries * n_dim); run_ivf_pq(n_rows, n_queries, n_dim, n_neighbors, - index_data, - query_data, - distances_data, - neighbors_data, + index_data.data(), + query_data.data(), + distances_data.data(), + neighbors_data.data(), metric, n_probes, n_lists); - recall_eval(query_data, - index_data, - neighbors_data, - distances_data, + recall_eval(query_data.data(), + index_data.data(), + neighbors_data.data(), + distances_data.data(), n_queries, n_rows, n_dim, @@ -138,10 +139,4 @@ TEST(IvfPqC, BuildSearch) metric, n_probes, n_lists); - - // delete device memory - cudaFree(index_data); - cudaFree(query_data); - cudaFree(neighbors_data); - cudaFree(distances_data); } diff --git a/cpp/test/neighbors/brute_force_c.cu b/cpp/test/neighbors/brute_force_c.cu index 8caf1c9d1..8abc34071 100644 --- a/cpp/test/neighbors/brute_force_c.cu +++ b/cpp/test/neighbors/brute_force_c.cu @@ -18,16 +18,27 @@ #include #include #include +#include #include "ann_utils.cuh" #include +#include +#include +#include +#include +#include +#include +#include +#include + extern "C" void run_brute_force(int64_t n_rows, int64_t n_queries, int64_t n_dim, uint32_t n_neighbors, float* index_data, float* query_data, + uint32_t* prefilter_data, float* distances_data, int64_t* neighbors_data, cuvsDistanceType metric); @@ -40,9 +51,210 @@ void generate_random_data(T* devPtr, size_t size) raft::random::uniform(handle, r, devPtr, size, T(0.1), T(2.0)); }; +template +index_t create_sparse_matrix(index_t m, index_t n, float sparsity, std::vector& bitmap) +{ + index_t total = static_cast(m * n); + index_t num_ones = static_cast((total * 1.0f) * sparsity); + index_t nnz = num_ones; + + for (auto& item : bitmap) { + item = static_cast(0); + } + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, total - 1); + + while (num_ones > 0) { + index_t index = dis(gen); + + bitmap_t& element = bitmap[index / (8 * sizeof(bitmap_t))]; + index_t bit_position = index % (8 * sizeof(bitmap_t)); + + if (((element >> bit_position) & 1) == 0) { + element |= (static_cast(1) << bit_position); + num_ones--; + } + } + return nnz; +} + +template +void cpu_convert_to_csr(std::vector& bitmap, + index_t rows, + index_t cols, + std::vector& indices, + std::vector& indptr) +{ + index_t offset_indptr = 0; + index_t offset_values = 0; + indptr[offset_indptr++] = 0; + + index_t index = 0; + bitmap_t element = 0; + index_t bit_position = 0; + + for (index_t i = 0; i < rows; ++i) { + for (index_t j = 0; j < cols; ++j) { + index = i * cols + j; + element = bitmap[index / (8 * sizeof(bitmap_t))]; + bit_position = index % (8 * sizeof(bitmap_t)); + + if (((element >> bit_position) & 1)) { + indices[offset_values] = static_cast(j); + offset_values++; + } + } + indptr[offset_indptr++] = static_cast(offset_values); + } +} + +template +void cpu_sddmm(value_t* A, + value_t* B, + std::vector& vals, + const std::vector& cols, + const std::vector& row_ptrs, + bool is_row_major_A, + bool is_row_major_B, + index_t n_queries, + index_t n_dataset, + index_t dim, + cuvsDistanceType metric, + value_t alpha = 1.0, + value_t beta = 0.0) +{ + bool trans_a = is_row_major_A; + bool trans_b = is_row_major_B; + + for (index_t i = 0; i < n_queries; ++i) { + for (index_t j = row_ptrs[i]; j < row_ptrs[i + 1]; ++j) { + value_t sum = 0; + value_t norms_A = 0; + value_t norms_B = 0; + for (index_t l = 0; l < dim; ++l) { + index_t a_index = trans_a ? i * dim + l : l * n_queries + i; + index_t b_index = trans_b ? l * n_dataset + cols[j] : cols[j] * dim + l; + sum += A[a_index] * B[b_index]; + + norms_A += A[a_index] * A[a_index]; + norms_B += B[b_index] * B[b_index]; + } + vals[j] = alpha * sum + beta * vals[j]; + if (metric == cuvs::distance::DistanceType::L2Expanded) { + vals[j] = value_t(-2.0) * vals[j] + norms_A + norms_B; + } else if (metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + vals[j] = std::sqrt(value_t(-2.0) * vals[j] + norms_A + norms_B); + } else if (metric == cuvs::distance::DistanceType::CosineExpanded) { + vals[j] = value_t(1.0) - vals[j] / std::sqrt(norms_A * norms_B); + } + } + } +} + +template +void cpu_select_k(const std::vector& indptr_h, + const std::vector& indices_h, + const std::vector& values_h, + std::optional>& in_idx_h, + index_t n_queries, + index_t n_dataset, + index_t n_neighbors, + std::vector& out_values_h, + std::vector& out_indices_h, + bool select_min = true) +{ + auto comp = [select_min](const std::pair& a, + const std::pair& b) { + return select_min ? a.first < b.first : a.first >= b.first; + }; + + for (index_t row = 0; row < n_queries; ++row) { + std::priority_queue, + std::vector>, + decltype(comp)> + pq(comp); + + for (index_t idx = indptr_h[row]; idx < indptr_h[row + 1]; ++idx) { + pq.push({values_h[idx], (in_idx_h.has_value()) ? (*in_idx_h)[idx] : indices_h[idx]}); + if (pq.size() > size_t(n_neighbors)) { pq.pop(); } + } + + std::vector> row_pairs; + while (!pq.empty()) { + row_pairs.push_back(pq.top()); + pq.pop(); + } + + if (select_min) { + std::sort(row_pairs.begin(), row_pairs.end(), [](const auto& a, const auto& b) { + return a.first <= b.first; + }); + } else { + std::sort(row_pairs.begin(), row_pairs.end(), [](const auto& a, const auto& b) { + return a.first >= b.first; + }); + } + for (index_t col = 0; col < n_neighbors; col++) { + if (col < index_t(row_pairs.size())) { + out_values_h[row * n_neighbors + col] = row_pairs[col].first; + out_indices_h[row * n_neighbors + col] = row_pairs[col].second; + } + } + } +} + +template +void cpu_brute_force_with_filter(value_t* query_data, + value_t* index_data, + std::vector& filter, + std::vector& out_indices_h, + std::vector& out_values_h, + size_t n_queries, + size_t n_dataset, + size_t n_dim, + size_t n_neighbors, + size_t nnz, + bool select_min, + cuvsDistanceType metric) +{ + std::vector values_h(nnz); + std::vector indices_h(nnz); + std::vector indptr_h(n_queries + 1); + + cpu_convert_to_csr(filter, (index_t)n_queries, (index_t)n_dataset, indices_h, indptr_h); + + cpu_sddmm(query_data, + index_data, + values_h, + indices_h, + indptr_h, + true, + false, + (index_t)n_queries, + (index_t)n_dataset, + (index_t)n_dim, + metric); + + std::optional> optional_indices_h = std::nullopt; + + cpu_select_k(indptr_h, + indices_h, + values_h, + optional_indices_h, + (index_t)n_queries, + (index_t)n_dataset, + (index_t)n_neighbors, + out_values_h, + out_indices_h, + select_min); +} + template void recall_eval(T* query_data, T* index_data, + uint32_t* filter, IdxT* neighbors, T* distances, size_t n_queries, @@ -90,6 +302,63 @@ void recall_eval(T* query_data, min_recall)); }; +template +void recall_eval_with_filter(T* query_data, + T* index_data, + std::vector& filter_h, + IdxT* neighbors_d, + T* distances_d, + std::vector& distances_ref_h, + std::vector& neighbors_ref_h, + size_t n_queries, + size_t n_rows, + size_t n_dim, + uint32_t n_neighbors, + size_t nnz, + cuvsDistanceType metric) +{ + raft::handle_t handle; + auto stream = raft::resource::get_cuda_stream(handle); + + std::vector queries_h(n_queries * n_dim); + std::vector indices_h(n_rows * n_dim); + + size_t size = n_queries * n_neighbors; + std::vector neighbors_h(size); + std::vector distances_h(size); + + raft::copy(neighbors_h.data(), neighbors_d, size, stream); + raft::copy(distances_h.data(), distances_d, size, stream); + raft::copy(queries_h.data(), query_data, n_queries * n_dim, stream); + raft::copy(indices_h.data(), index_data, n_rows * n_dim, stream); + + bool select_min = cuvs::distance::is_min_close(metric); + + cpu_brute_force_with_filter(queries_h.data(), + indices_h.data(), + filter_h, + neighbors_ref_h, + distances_ref_h, + n_queries, + n_rows, + n_dim, + n_neighbors, + nnz, + select_min, + static_cast((uint16_t)metric)); + + // verify output + double min_recall = 0.95; + ASSERT_TRUE(cuvs::neighbors::eval_neighbours(neighbors_ref_h, + neighbors_h, + distances_ref_h, + distances_h, + n_queries, + n_neighbors, + 0.001, + min_recall)); +}; + TEST(BruteForceC, BuildSearch) { int64_t n_rows = 8096; @@ -97,41 +366,103 @@ TEST(BruteForceC, BuildSearch) int64_t n_dim = 32; uint32_t n_neighbors = 8; + raft::handle_t handle; + auto stream = raft::resource::get_cuda_stream(handle); + cuvsDistanceType metric = L2Expanded; - float *index_data, *query_data, *distances_data; - int64_t* neighbors_data; - cudaMalloc(&index_data, sizeof(float) * n_rows * n_dim); - cudaMalloc(&query_data, sizeof(float) * n_queries * n_dim); - cudaMalloc(&neighbors_data, sizeof(int64_t) * n_queries * n_neighbors); - cudaMalloc(&distances_data, sizeof(float) * n_queries * n_neighbors); + uint32_t* filter_data = NULL; + + rmm::device_uvector index_data(n_rows * n_dim, stream); + rmm::device_uvector query_data(n_queries * n_dim, stream); + rmm::device_uvector neighbors_data(n_queries * n_neighbors, stream); + rmm::device_uvector distances_data(n_queries * n_neighbors, stream); - generate_random_data(index_data, n_rows * n_dim); - generate_random_data(query_data, n_queries * n_dim); + generate_random_data(index_data.data(), n_rows * n_dim); + generate_random_data(query_data.data(), n_queries * n_dim); run_brute_force(n_rows, n_queries, n_dim, n_neighbors, - index_data, - query_data, - distances_data, - neighbors_data, + index_data.data(), + query_data.data(), + filter_data, + distances_data.data(), + neighbors_data.data(), metric); - recall_eval(query_data, - index_data, - neighbors_data, - distances_data, + recall_eval(query_data.data(), + index_data.data(), + filter_data, + neighbors_data.data(), + distances_data.data(), n_queries, n_rows, n_dim, n_neighbors, metric); +} + +TEST(BruteForceC, BuildSearchWithFilter) +{ + int64_t n_rows = 8096; + int64_t n_queries = 128; + int64_t n_dim = 32; + uint32_t n_neighbors = 8; + + raft::resources handle; + auto stream = raft::resource::get_cuda_stream(handle); + + float sparsity = 0.2; + int64_t n_filter = (n_queries * n_rows + 31) / 32; + std::vector filter_h(n_filter); + int64_t nnz = create_sparse_matrix(n_queries, n_rows, sparsity, filter_h); + + cuvsDistanceType metric = L2Expanded; + bool select_min = cuvs::distance::is_min_close(metric); + + std::vector distances_ref_h( + n_queries * n_neighbors, + select_min ? std::numeric_limits::infinity() : std::numeric_limits::lowest()); + std::vector neighbors_ref_h(n_queries * n_neighbors, static_cast(0)); + + rmm::device_uvector index_data(n_rows * n_dim, stream); + rmm::device_uvector query_data(n_queries * n_dim, stream); + rmm::device_uvector neighbors_data(n_queries * n_neighbors, stream); + rmm::device_uvector distances_data(n_queries * n_neighbors, stream); + rmm::device_uvector filter_data(n_filter, stream); + + raft::copy(neighbors_data.data(), neighbors_ref_h.data(), n_queries * n_neighbors, stream); + raft::copy(distances_data.data(), distances_ref_h.data(), n_queries * n_neighbors, stream); + + generate_random_data(index_data.data(), n_rows * n_dim); + generate_random_data(query_data.data(), n_queries * n_dim); + + raft::copy(filter_data.data(), filter_h.data(), n_filter, stream); + + run_brute_force(n_rows, + n_queries, + n_dim, + n_neighbors, + index_data.data(), + query_data.data(), + filter_data.data(), + distances_data.data(), + neighbors_data.data(), + metric); - // delete device memory - cudaFree(index_data); - cudaFree(query_data); - cudaFree(neighbors_data); - cudaFree(distances_data); + recall_eval_with_filter(query_data.data(), + index_data.data(), + filter_h, + neighbors_data.data(), + distances_data.data(), + distances_ref_h, + neighbors_ref_h, + n_queries, + n_rows, + n_dim, + n_neighbors, + nnz, + metric); } diff --git a/cpp/test/neighbors/run_brute_force_c.c b/cpp/test/neighbors/run_brute_force_c.c index ed9e99970..ceb124731 100644 --- a/cpp/test/neighbors/run_brute_force_c.c +++ b/cpp/test/neighbors/run_brute_force_c.c @@ -15,6 +15,7 @@ */ #include +#include void run_brute_force(int64_t n_rows, int64_t n_queries, @@ -22,6 +23,7 @@ void run_brute_force(int64_t n_rows, uint32_t n_neighbors, float* index_data, float* query_data, + uint32_t* prefilter_data, float* distances_data, int64_t* neighbors_data, cuvsDistanceType metric) @@ -85,8 +87,30 @@ void run_brute_force(int64_t n_rows, distances_tensor.dl_tensor.shape = distances_shape; distances_tensor.dl_tensor.strides = NULL; + cuvsFilter prefilter; + + DLManagedTensor prefilter_tensor; + if (prefilter_data == NULL) { + prefilter.type = NO_FILTER; + prefilter.addr = (uintptr_t)NULL; + } else { + prefilter_tensor.dl_tensor.data = (void*)prefilter_data; + prefilter_tensor.dl_tensor.device.device_type = kDLCUDA; + prefilter_tensor.dl_tensor.ndim = 1; + prefilter_tensor.dl_tensor.dtype.code = kDLUInt; + prefilter_tensor.dl_tensor.dtype.bits = 32; + prefilter_tensor.dl_tensor.dtype.lanes = 1; + int64_t prefilter_shape[1] = {(n_queries * n_rows + 31) / 32}; + prefilter_tensor.dl_tensor.shape = prefilter_shape; + prefilter_tensor.dl_tensor.strides = NULL; + + prefilter.type = BITMAP; + prefilter.addr = (uintptr_t)&prefilter_tensor; + } + // search index - cuvsBruteForceSearch(res, index, &queries_tensor, &neighbors_tensor, &distances_tensor); + cuvsBruteForceSearch( + res, index, &queries_tensor, &neighbors_tensor, &distances_tensor, prefilter); // de-allocate index and res cuvsBruteForceIndexDestroy(index); diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index 3579215fd..21c3db5da 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -16,6 +16,7 @@ add_subdirectory(brute_force) add_subdirectory(cagra) add_subdirectory(ivf_flat) add_subdirectory(ivf_pq) +add_subdirectory(filters) # Set the list of Cython files to build set(cython_sources refine.pyx) diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py index 0ecd57d75..52bb1eef8 100644 --- a/python/cuvs/cuvs/neighbors/__init__.py +++ b/python/cuvs/cuvs/neighbors/__init__.py @@ -13,8 +13,15 @@ # limitations under the License. -from cuvs.neighbors import brute_force, cagra, ivf_flat, ivf_pq +from cuvs.neighbors import brute_force, cagra, filters, ivf_flat, ivf_pq from .refine import refine -__all__ = ["brute_force", "cagra", "ivf_flat", "ivf_pq", "refine"] +__all__ = [ + "brute_force", + "cagra", + "filters", + "ivf_flat", + "ivf_pq", + "refine", +] diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd index c57fa9e8d..183827916 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pxd @@ -20,6 +20,7 @@ from libc.stdint cimport uintptr_t from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor from cuvs.distance_type cimport cuvsDistanceType +from cuvs.neighbors.filters.filters cimport cuvsFilter, cuvsFilterType cdef extern from "cuvs/neighbors/brute_force.h" nogil: @@ -44,4 +45,5 @@ cdef extern from "cuvs/neighbors/brute_force.h" nogil: cuvsBruteForceIndex_t index, DLManagedTensor* queries, DLManagedTensor* neighbors, - DLManagedTensor* distances) except + + DLManagedTensor* distances, + cuvsFilter filter) except + diff --git a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx index 490f1d3ac..559302ccc 100644 --- a/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx +++ b/python/cuvs/cuvs/neighbors/brute_force/brute_force.pyx @@ -38,6 +38,7 @@ from cuvs.distance import DISTANCE_TYPES from cuvs.common.c_api cimport cuvsResources_t from cuvs.common.exceptions import check_cuvs +from cuvs.neighbors.filters import no_filter cdef class Index: @@ -129,7 +130,8 @@ def search(Index index, k, neighbors=None, distances=None, - resources=None): + resources=None, + prefilter=None): """ Find the k nearest neighbors for each query. @@ -147,10 +149,18 @@ def search(Index index, distances : Optional CUDA array interface compliant matrix shape (n_queries, k) If supplied, the distances to the neighbors will be written here in-place. (default None) + prefilter : Optional cuvs.neighbors.cuvsFilter can be used to filter + queries and neighbors based on a given bitmap. The filter + function should have a row-major layout and logical shape + [n_queries, n_samples], using the first n_samples bits to + indicate whether queries[0] should compute the distance with + index. + (default None) {resources_docstring} Examples -------- + >>> # Example without pre-filter >>> import cupy as cp >>> from cuvs.neighbors import brute_force >>> n_samples = 50000 @@ -170,6 +180,36 @@ def search(Index index, >>> distances, neighbors = brute_force.search(index, queries, k) >>> neighbors = cp.asarray(neighbors) >>> distances = cp.asarray(distances) + + Examples + -------- + >>> # Example with pre-filter + >>> import numpy as np + >>> import cupy as cp + >>> from cuvs.neighbors import brute_force, filters + >>> n_samples = 50000 + >>> n_features = 50 + >>> n_queries = 1000 + >>> dataset = cp.random.random_sample((n_samples, n_features), + ... dtype=cp.float32) + >>> # Build index + >>> index = brute_force.build(dataset, metric="sqeuclidean") + >>> # Search using the built index + >>> queries = cp.random.random_sample((n_queries, n_features), + ... dtype=cp.float32) + >>> # Build filters + >>> n_bitmap = np.ceil(n_samples * n_queries / 32).astype(int) + >>> # Create your own bitmap as the filter by replacing the random one. + >>> bitmap = cp.random.randint(1, 1000, size=(n_bitmap,), dtype=cp.uint32) + >>> prefilter = filters.from_bitmap(bitmap) + >>> k = 10 + >>> # Using a pooling allocator reduces overhead of temporary array + >>> # creation during search. This is useful if multiple searches + >>> # are performed with same query size. + >>> distances, neighbors = brute_force.search(index, queries, k, + ... prefilter=prefilter) + >>> neighbors = cp.asarray(neighbors) + >>> distances = cp.asarray(distances) """ if not index.trained: raise ValueError("Index needs to be built before calling search.") @@ -202,13 +242,17 @@ def search(Index index, cdef cydlpack.DLManagedTensor* distances_dlpack = \ cydlpack.dlpack_c(distances_cai) + if prefilter is None: + prefilter = no_filter() + with cuda_interruptible(): check_cuvs(cuvsBruteForceSearch( res, index.index, queries_dlpack, neighbors_dlpack, - distances_dlpack + distances_dlpack, + prefilter.prefilter )) return (distances, neighbors) diff --git a/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt new file mode 100644 index 000000000..8f281d1c8 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/filters/CMakeLists.txt @@ -0,0 +1,25 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources filters.pyx) +set(linked_libraries cuvs::cuvs cuvs::c_api) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX + neighbors_prefilter_ +) diff --git a/python/cuvs/cuvs/neighbors/filters/__init__.pxd b/python/cuvs/cuvs/neighbors/filters/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/python/cuvs/cuvs/neighbors/filters/__init__.py b/python/cuvs/cuvs/neighbors/filters/__init__.py new file mode 100644 index 000000000..2ad118965 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/filters/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .filters import Prefilter, from_bitmap, no_filter + +__all__ = ["no_filter", "from_bitmap", "Prefilter"] diff --git a/python/cuvs/cuvs/neighbors/filters/filters.pxd b/python/cuvs/cuvs/neighbors/filters/filters.pxd new file mode 100644 index 000000000..6ab4e91f9 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/filters/filters.pxd @@ -0,0 +1,30 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +from libc.stdint cimport uintptr_t + + +cdef extern from "cuvs/neighbors/common.h" nogil: + + ctypedef enum cuvsFilterType: + NO_FILTER + BITSET + BITMAP + + ctypedef struct cuvsFilter: + uintptr_t addr + cuvsFilterType type diff --git a/python/cuvs/cuvs/neighbors/filters/filters.pyx b/python/cuvs/cuvs/neighbors/filters/filters.pyx new file mode 100644 index 000000000..3a81cb786 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/filters/filters.pyx @@ -0,0 +1,97 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + +import numpy as np + +from libc.stdint cimport uintptr_t + +from cuvs.common cimport cydlpack + +from .filters cimport BITMAP, NO_FILTER, cuvsFilter + +from pylibraft.common.cai_wrapper import wrap_array +from pylibraft.neighbors.common import _check_input_array + + +cdef class Prefilter: + cdef public cuvsFilter prefilter + cdef public object parent + + def __init__(self, cuvsFilter prefilter, parent=None): + if parent is not None: + self.parent = parent + self.prefilter = prefilter + + +def no_filter(): + """ + Create a default pre-filter which filters nothing. + """ + cdef cuvsFilter filter + filter.type = NO_FILTER + filter.addr = NULL + return Prefilter(filter) + + +def from_bitmap(bitmap): + """ + Create a pre-filter from an array with type of uint32. + + Parameters + ---------- + bitmap : numpy.ndarray + An array with type of `uint32` where each bit in the array corresponds + to if a sample and query pair is greenlit (not filtered) or filtered. + The array is row-major, meaning the bits are ordered by rows first. + Each bit in a `uint32` element represents a different sample-query + pair. + + - Bit value of 1: The sample-query pair is greenlit (allowed). + - Bit value of 0: The sample-query pair is filtered. + + Returns + ------- + filter : cuvs.neighbors.filters.Prefilter + An instance of `Prefilter` that can be used to filter neighbors + based on the given bitmap. + {resources_docstring} + + Examples + -------- + + >>> import cupy as cp + >>> import numpy as np + >>> from cuvs.neighbors import filters + >>> + >>> n_samples = 50000 + >>> n_queries = 1000 + >>> + >>> n_bitmap = np.ceil(n_samples * n_queries / 32).astype(int) + >>> bitmap = cp.random.randint(1, 100, size=(n_bitmap,), dtype=cp.uint32) + >>> prefilter = filters.from_bitmap(bitmap) + """ + bitmap_cai = wrap_array(bitmap) + _check_input_array(bitmap_cai, [np.dtype('uint32')]) + + cdef cydlpack.DLManagedTensor* bitmap_dlpack = \ + cydlpack.dlpack_c(bitmap_cai) + + cdef cuvsFilter filter + filter.type = BITMAP + filter.addr = bitmap_dlpack + + return Prefilter(filter, parent=bitmap) diff --git a/python/cuvs/cuvs/test/test_brute_force.py b/python/cuvs/cuvs/test/test_brute_force.py index 146ab59f0..acf347ec3 100644 --- a/python/cuvs/cuvs/test/test_brute_force.py +++ b/python/cuvs/cuvs/test/test_brute_force.py @@ -18,7 +18,7 @@ from pylibraft.common import device_ndarray from scipy.spatial.distance import cdist -from cuvs.neighbors import brute_force +from cuvs.neighbors import brute_force, filters @pytest.mark.parametrize("n_index_rows", [32, 100]) @@ -62,6 +62,8 @@ def test_brute_force_knn( indices_device = device_ndarray(indices) distances_device = device_ndarray(distances) + prefilter = filters.no_filter() + brute_force_index = brute_force.build(index_device, metric) ret_distances, ret_indices = brute_force.search( brute_force_index, @@ -69,6 +71,7 @@ def test_brute_force_knn( k, neighbors=indices_device, distances=distances_device, + prefilter=prefilter, ) pw_dists = cdist(queries, index, metric=metric) @@ -88,3 +91,96 @@ def test_brute_force_knn( np.testing.assert_allclose( cpu_ordered[:k], gpu_dists, atol=1e-3, rtol=1e-3 ) + + +def create_sparse_array(shape, sparsity): + bits_per_uint32 = 32 + + num_bits = np.prod(shape) * bits_per_uint32 + num_ones = int(num_bits * sparsity) + + array = np.zeros(shape, dtype=np.uint32) + indices = np.random.choice(num_bits, num_ones, replace=False) + + for index in indices: + i = index // bits_per_uint32 + bit_position = index % bits_per_uint32 + array.flat[i] |= 1 << bit_position + + return array + + +@pytest.mark.parametrize("n_index_rows", [32, 100]) +@pytest.mark.parametrize("n_query_rows", [32, 100]) +@pytest.mark.parametrize("n_cols", [40, 100]) +@pytest.mark.parametrize("k", [1, 5, 32]) +@pytest.mark.parametrize("sparsity", [0.01, 0.2, 0.4]) +@pytest.mark.parametrize( + "metric", + [ + "euclidean", + "cosine", + "sqeuclidean", + ], +) +@pytest.mark.parametrize("inplace", [True]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_prefiltered_brute_force_knn( + n_index_rows, n_query_rows, n_cols, k, sparsity, inplace, metric, dtype +): + index = np.random.random_sample((n_index_rows, n_cols)).astype(dtype) + queries = np.random.random_sample((n_query_rows, n_cols)).astype(dtype) + bitmap = create_sparse_array( + (np.ceil(n_query_rows * n_index_rows / 32).astype(int)), sparsity + ) + + is_min = metric != "inner_product" + + indices = np.zeros((n_query_rows, k), dtype="int64") + distances = np.zeros((n_query_rows, k), dtype=dtype) + + index_device = device_ndarray(index) + queries_device = device_ndarray(queries) + indices_device = device_ndarray(indices) + distances_device = device_ndarray(distances) + bitmap_device = device_ndarray(bitmap) + prefilter = filters.from_bitmap(bitmap_device) + bitmap_device = None + brute_force_index = brute_force.build(index_device, metric) + ret_distances, ret_indices = brute_force.search( + brute_force_index, + queries_device, + k, + neighbors=indices_device, + distances=distances_device, + prefilter=prefilter, + ) + + pw_dists = cdist(queries, index, metric=metric) + + # convert bitmap to bool array. + bitmap_as_uint8 = bitmap.view(np.uint8) + bool_filter = np.unpackbits(bitmap_as_uint8) + bool_filter = bool_filter.reshape(-1, 4, 8) + bool_filter = np.flip(bool_filter, axis=2) + bool_filter = bool_filter.reshape(-1)[: (n_query_rows * n_index_rows)] + bool_filter = bool_filter.reshape(-1, n_index_rows) + bool_filter = np.logical_not(bool_filter) + + pw_dists[bool_filter] = np.inf if is_min else -np.inf + + distances_device = ret_distances if not inplace else distances_device + + actual_distances = distances_device.copy_to_host() + + actual_distances[actual_distances <= 1e-5] = 0.0 + argsort = np.argsort(pw_dists, axis=1) + + for i in range(pw_dists.shape[0]): + expected_indices = argsort[i] + gpu_dists = actual_distances[i] + + cpu_ordered = pw_dists[i, expected_indices] + np.testing.assert_allclose( + cpu_ordered[:k], gpu_dists, atol=1e-3, rtol=1e-3 + ) diff --git a/rust/cuvs/src/brute_force.rs b/rust/cuvs/src/brute_force.rs index c153359b8..63752ee9c 100644 --- a/rust/cuvs/src/brute_force.rs +++ b/rust/cuvs/src/brute_force.rs @@ -80,12 +80,18 @@ impl Index { distances: &ManagedTensor, ) -> Result<()> { unsafe { + let prefilter = ffi::cuvsFilter { + addr: 0, + type_: ffi::cuvsFilterType::NO_FILTER, + }; + check_cuvs(ffi::cuvsBruteForceSearch( res.0, self.0, queries.as_ptr(), neighbors.as_ptr(), distances.as_ptr(), + prefilter, )) } } @@ -117,9 +123,7 @@ mod tests { let dataset_host = ndarray::Array::::random((n_datapoints, n_features), Uniform::new(0., 1.0)); - let dataset = ManagedTensor::from(&dataset_host) - .to_device(&res) - .unwrap(); + let dataset = ManagedTensor::from(&dataset_host).to_device(&res).unwrap(); println!("dataset {:#?}", dataset_host); @@ -168,12 +172,12 @@ mod tests { assert_eq!(neighbors_host[[3, 0]], 3); } -/* - #[test] - fn test_cosine() { - test_bfknn(DistanceType::CosineExpanded); - } -*/ + /* + #[test] + fn test_cosine() { + test_bfknn(DistanceType::CosineExpanded); + } + */ #[flaky] fn test_l2() {