diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 718ca917e..a650c155d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -57,6 +57,8 @@ option(BUILD_SHARED_LIBS "Build cuvs shared libraries" ON) option(BUILD_TESTS "Build cuvs unit-tests" ON) option(BUILD_MICRO_BENCH "Build cuvs C++ micro benchmarks" OFF) option(BUILD_ANN_BENCH "Build cuvs ann benchmarks" OFF) +option(BUILD_C_LIBRARY "Build raft C API library" OFF) +option(BUILD_C_TESTS "Build raft C API tests" OFF) option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF @@ -71,6 +73,7 @@ option(CUVS_NVTX "Enable nvtx markers" OFF) if((BUILD_TESTS OR BUILD_MICRO_BENCH OR BUILD_ANN_BENCH + OR BUILD_C_LIBRARY ) AND NOT BUILD_CPU_ONLY ) @@ -80,6 +83,11 @@ endif() if(BUILD_CPU_ONLY) set(BUILD_SHARED_LIBS OFF) set(BUILD_TESTS OFF) + set(BUILD_C_LIBRARY OFF) +endif() + +if(NOT BUILD_C_LIBRARY) + set(BUILD_C_TESTS OFF) endif() # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to @@ -175,7 +183,11 @@ if(NOT BUILD_CPU_ONLY) include(cmake/thirdparty/get_raft.cmake) endif() -if(BUILD_TESTS) +if(BUILD_C_LIBRARY) + include(cmake/thirdparty/get_dlpack.cmake) +endif() + +if(BUILD_TESTS OR BUILD_C_TESTS) include(cmake/thirdparty/get_gtest.cmake) endif() @@ -286,6 +298,35 @@ target_compile_options( # ensure CUDA symbols aren't relocated to the middle of the debug build binaries target_link_options(cuvs PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") +# ################################################################################################## +# * cuvs_c ------------------------------------------------------------------------------- +if(BUILD_C_LIBRARY) + add_library(cuvs_c SHARED src/core/c_api.cpp src/neighbors/cagra_c.cpp) + + add_library(cuvs::c_api ALIAS cuvs_c) + + set_target_properties( + cuvs_c + PROPERTIES CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + EXPORT_NAME c_api + ) + + target_compile_options(cuvs_c PRIVATE "$<$:${CUVS_CXX_FLAGS}>") + + target_include_directories( + cuvs_c + PUBLIC "$" + INTERFACE "$" + ) + + target_link_libraries(cuvs_c PUBLIC cuvs::cuvs) + + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries + target_link_options(cuvs_c PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") +endif() + # ################################################################################################## # * install targets----------------------------------------------------------- rapids_cmake_install_lib_dir(lib_dir) @@ -305,17 +346,34 @@ install( DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} ) +if(BUILD_C_LIBRARY) + install( + TARGETS cuvs_c + DESTINATION ${lib_dir} + COMPONENT c_api + EXPORT cuvs-c-exports + ) +endif() + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cuvs/version_config.hpp COMPONENT cuvs DESTINATION include/cuvs ) +if(TARGET cuvs_c) + list(APPEND cuvs_components c_api) + list(APPEND cuvs_export_sets cuvs-c-exports) + set(CUVS_C_TARGET cuvs_c) +endif() + # Use `rapids_export` for 22.04 as it will have COMPONENT support rapids_export( INSTALL cuvs EXPORT_SET cuvs-exports - GLOBAL_TARGETS cuvs + COMPONENTS ${cuvs_components} + COMPONENTS_EXPORT_SET ${cuvs_export_sets} + GLOBAL_TARGETS cuvs ${CUVS_C_TARGET} NAMESPACE cuvs:: ) @@ -324,21 +382,26 @@ rapids_export( rapids_export( BUILD cuvs EXPORT_SET cuvs-exports - GLOBAL_TARGETS cuvs + COMPONENTS ${cuvs_components} + COMPONENTS_EXPORT_SET ${cuvs_export_sets} + GLOBAL_TARGETS cuvs ${CUVS_C_TARGET} NAMESPACE cuvs:: ) # ################################################################################################## # * shared test/bench headers ------------------------------------------------ -if(BUILD_TESTS OR BUILD_MICRO_BENCH) +if(BUILD_TESTS + OR BUILD_MICRO_BENCH + OR BUILD_C_TESTS +) include(internal/CMakeLists.txt) endif() # ################################################################################################## # * build test executable ---------------------------------------------------- -if(BUILD_TESTS) +if(BUILD_TESTS OR BUILD_C_TESTS) include(test/CMakeLists.txt) endif() diff --git a/cpp/cmake/thirdparty/get_dlpack.cmake b/cpp/cmake/thirdparty/get_dlpack.cmake new file mode 100644 index 000000000..7c90cb3d2 --- /dev/null +++ b/cpp/cmake/thirdparty/get_dlpack.cmake @@ -0,0 +1,41 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# This function finds dlpack and sets any additional necessary environment variables. +function(find_and_configure_dlpack VERSION) + + include(${rapids-cmake-dir}/find/generate_module.cmake) + rapids_find_generate_module(DLPACK HEADER_NAMES dlpack.h) + + rapids_cpm_find( + dlpack ${VERSION} + GIT_REPOSITORY https://github.com/dmlc/dlpack.git + GIT_TAG v${VERSION} + GIT_SHALLOW TRUE + DOWNLOAD_ONLY TRUE + OPTIONS "BUILD_MOCK OFF" + ) + + if(DEFINED dlpack_SOURCE_DIR) + # otherwise find_package(DLPACK) will set this variable + set(DLPACK_INCLUDE_DIR + "${dlpack_SOURCE_DIR}/include" + PARENT_SCOPE + ) + endif() +endfunction() + +set(CUVS_MIN_VERSION_dlpack 0.8) + +find_and_configure_dlpack(${CUVS_MIN_VERSION_dlpack}) diff --git a/cpp/include/cuvs/core/c_api.h b/cpp/include/cuvs/core/c_api.h new file mode 100644 index 000000000..b50032916 --- /dev/null +++ b/cpp/include/cuvs/core/c_api.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +/** + * @defgroup c_api C API Core Types and Functions + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief An opaque C handle for C++ type `raft::resources` + * + */ +typedef uintptr_t cuvsResources_t; + +/** + * @brief An enum denoting return values for function calls + * + */ +typedef enum { CUVS_ERROR, CUVS_SUCCESS } cuvsError_t; + +/** + * @brief Create an Initialized opaque C handle for C++ type `raft::resources` + * + * @param[in] res cuvsResources_t opaque C handle + * @return cuvsError_t + */ +cuvsError_t cuvsResourcesCreate(cuvsResources_t* res); + +/** + * @brief Destroy and de-allocate opaque C handle for C++ type `raft::resources` + * + * @param[in] res cuvsResources_t opaque C handle + * @return cuvsError_t + */ +cuvsError_t cuvsResourcesDestroy(cuvsResources_t res); + +/** + * @brief Set cudaStream_t on cuvsResources_t to queue CUDA kernels on APIs + * that accept a cuvsResources_t handle + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] stream cudaStream_t stream to queue CUDA kernels + * @return cuvsError_t + */ +cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream); + +#ifdef __cplusplus +} +#endif + +/** @} */ diff --git a/cpp/include/cuvs/core/detail/interop.hpp b/cpp/include/cuvs/core/detail/interop.hpp new file mode 100644 index 000000000..f218dc554 --- /dev/null +++ b/cpp/include/cuvs/core/detail/interop.hpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace cuvs::core::detail { + +template +DLDevice accessor_type_to_DLDevice() +{ + if constexpr (AccessorType::is_host_accessible and AccessorType::is_device_accessible) { + return DLDevice{kDLCUDAManaged}; + } else if constexpr (AccessorType::is_device_accessible) { + return DLDevice{kDLCUDA}; + } else if constexpr (AccessorType::is_host_accessible) { + return DLDevice{kDLCPU}; + } +} + +template +DLDataType data_type_to_DLDataType() +{ + uint8_t const bits{sizeof(T) * 8}; + uint16_t const lanes{1}; + if constexpr (std::is_floating_point_v) { + return DLDataType{kDLFloat, bits, lanes}; + } else if constexpr (std::is_signed_v) { + return DLDataType{kDLInt, bits, lanes}; + } else { + return DLDataType{kDLUInt, bits, lanes}; + } +} + +bool is_dlpack_device_compatible(DLTensor tensor) +{ + return tensor.device.device_type == kDLCUDAManaged || tensor.device.device_type == kDLCUDAHost || + tensor.device.device_type == kDLCUDA; +} + +bool is_dlpack_host_compatible(DLTensor tensor) +{ + return tensor.device.device_type == kDLCUDAManaged || tensor.device.device_type == kDLCUDAHost || + tensor.device.device_type == kDLCPU; +} + +template > +MdspanType from_dlpack(DLManagedTensor* managed_tensor) +{ + auto tensor = managed_tensor->dl_tensor; + + auto to_data_type = data_type_to_DLDataType(); + RAFT_EXPECTS(to_data_type.code == tensor.dtype.code, + "code mismatch between return mdspan and DLTensor"); + RAFT_EXPECTS(to_data_type.bits == tensor.dtype.bits, + "bits mismatch between return mdspan and DLTensor"); + RAFT_EXPECTS(to_data_type.lanes == tensor.dtype.lanes, + "lanes mismatch between return mdspan and DLTensor"); + RAFT_EXPECTS(tensor.dtype.lanes == 1, "More than 1 DLTensor lanes not supported"); + RAFT_EXPECTS(tensor.strides == nullptr, "Strided memory layout for DLTensor not supported"); + + auto to_device = accessor_type_to_DLDevice(); + if (to_device.device_type == kDLCUDA) { + RAFT_EXPECTS(is_dlpack_device_compatible(tensor), + "device_type mismatch between return mdspan and DLTensor"); + } else if (to_device.device_type == kDLCPU) { + RAFT_EXPECTS(is_dlpack_host_compatible(tensor), + "device_type mismatch between return mdspan and DLTensor"); + } + + RAFT_EXPECTS(MdspanType::extents_type::rank() == tensor.ndim, + "ndim mismatch between return mdspan and DLTensor"); + + // auto exts = typename MdspanType::extents_type{tensor.shape}; + std::array shape{}; + for (int64_t i = 0; i < tensor.ndim; ++i) { + shape[i] = tensor.shape[i]; + } + auto exts = typename MdspanType::extents_type{shape}; + + return MdspanType{reinterpret_cast(tensor.data), exts}; +} + +} // namespace cuvs::core::detail diff --git a/cpp/include/cuvs/core/interop.hpp b/cpp/include/cuvs/core/interop.hpp new file mode 100644 index 000000000..9510022db --- /dev/null +++ b/cpp/include/cuvs/core/interop.hpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "detail/interop.hpp" + +namespace cuvs::core { + +/** + * @defgroup interop Interoperability between `mdspan` and `DLManagedTensor` + * @{ + */ + +/** + * @brief Check if DLTensor has device accessible memory. + * This function returns true for `DLDeviceType` of values + * `kDLCUDA`, `kDLCUDAHost`, or `kDLCUDAManaged` + * + * @param[in] tensor DLTensor object to check underlying memory type + * @return bool + */ +bool is_dlpack_device_compatible(DLTensor tensor) +{ + return detail::is_dlpack_device_compatible(tensor); +} + +/** + * @brief Check if DLTensor has host accessible memory. + * This function returns true for `DLDeviceType` of values + * `kDLCPU`, `kDLCUDAHost`, or `kDLCUDAManaged` + * + * @param tensor DLTensor object to check underlying memory type + * @return bool + */ +bool is_dlpack_host_compatible(DLTensor tensor) +{ + return detail::is_dlpack_host_compatible(tensor); +} + +/** + * @brief Convert a DLManagedTensor to an mdspan + * NOTE: This function only supports compact row-major layouts. + * + * @code {.cpp} + * #include + * #include + * // We have a `DLManagedTensor` with `DLDeviceType == kDLCUDA`, + * // `DLDataType.code == kDLFloat` and `DLDataType.bits == 8` + * DLManagedTensor tensor; + * // declare the return type + * using mdpsan_type = raft::device_mdspan; + * auto mds = raft::core::from_dlpack(&tensor); + * @endcode + * + * @tparam MdspanType + * @tparam typename + * @param[in] managed_tensor + * @return MdspanType + */ +template > +MdspanType from_dlpack(DLManagedTensor* managed_tensor) +{ + return detail::from_dlpack(managed_tensor); +} + +/** + * @} + */ + +} // namespace cuvs::core diff --git a/cpp/include/cuvs/neighbors/cagra_c.h b/cpp/include/cuvs/neighbors/cagra_c.h new file mode 100644 index 000000000..105f4f1b9 --- /dev/null +++ b/cpp/include/cuvs/neighbors/cagra_c.h @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +/** + * @defgroup cagra_c C API for CUDA ANN Graph-based nearest neighbor search + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Enum to denote which ANN algorithm is used to build CAGRA graph + * + */ +enum cagraGraphBuildAlgo { + /* Use IVF-PQ to build all-neighbors knn graph */ + IVF_PQ, + /* Experimental, use NN-Descent to build all-neighbors knn graph */ + NN_DESCENT +}; + +/** + * @brief Supplemental parameters to build CAGRA Index + * + */ +struct cagraIndexParams { + /** Degree of input graph for pruning. */ + size_t intermediate_graph_degree = 128; + /** Degree of output graph. */ + size_t graph_degree = 64; + /** ANN algorithm to build knn graph. */ + cagraGraphBuildAlgo build_algo = IVF_PQ; + /** Number of Iterations to run if building with NN_DESCENT */ + size_t nn_descent_niter = 20; +}; + +/** + * @brief Enum to denote algorithm used to search CAGRA Index + * + */ +enum cagraSearchAlgo { + /** For large batch sizes. */ + SINGLE_CTA, + /** For small batch sizes. */ + MULTI_CTA, + MULTI_KERNEL, + AUTO +}; + +/** + * @brief Enum to denote Hash Mode used while searching CAGRA index + * + */ +enum cagraHashMode { HASH, SMALL, AUTO_HASH }; + +/** + * @brief Supplemental parameters to search CAGRA index + * + */ +typedef struct { + /** Maximum number of queries to search at the same time (batch size). Auto select when 0.*/ + size_t max_queries = 0; + + /** Number of intermediate search results retained during the search. + * + * This is the main knob to adjust trade off between accuracy and search speed. + * Higher values improve the search accuracy. + */ + size_t itopk_size = 64; + + /** Upper limit of search iterations. Auto select when 0.*/ + size_t max_iterations = 0; + + // In the following we list additional search parameters for fine tuning. + // Reasonable default values are automatically chosen. + + /** Which search implementation to use. */ + cagraSearchAlgo algo = AUTO; + + /** Number of threads used to calculate a single distance. 4, 8, 16, or 32. */ + size_t team_size = 0; + + /** Number of graph nodes to select as the starting point for the search in each iteration. aka + * search width?*/ + size_t search_width = 1; + /** Lower limit of search iterations. */ + size_t min_iterations = 0; + + /** Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0. */ + size_t thread_block_size = 0; + /** Hashmap type. Auto selection when AUTO. */ + cagraHashMode hashmap_mode = AUTO_HASH; + /** Lower limit of hashmap bit length. More than 8. */ + size_t hashmap_min_bitlen = 0; + /** Upper limit of hashmap fill rate. More than 0.1, less than 0.9.*/ + float hashmap_max_fill_rate = 0.5; + + /** Number of iterations of initial random seed node selection. 1 or more. */ + uint32_t num_random_samplings = 1; + /** Bit mask used for initial random seed node selection. */ + uint64_t rand_xor_mask = 0x128394; +} cagraSearchParams; + +/** + * @brief Struct to hold address of cuvs::neighbors::cagra::index and its active trained dtype + * + */ +typedef struct { + uintptr_t addr; + DLDataType dtype; + +} cagraIndex; + +typedef cagraIndex* cagraIndex_t; + +/** + * @brief Allocate CAGRA index + * + * @param[in] index cagraIndex_t to allocate + * @return cagraError_t + */ +cuvsError_t cagraIndexCreate(cagraIndex_t* index); + +/** + * @brief De-allocate CAGRA index + * + * @param[in] index cagraIndex_t to de-allocate + */ +cuvsError_t cagraIndexDestroy(cagraIndex_t index); + +/** + * @brief Build a CAGRA index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`, + * or `kDLCPU`. Also, acceptable underlying types are: + * 1. `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * 2. `kDLDataType.code == kDLInt` and `kDLDataType.bits = 8` + * 3. `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 8` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor dataset; + * + * // Create CAGRA index + * cagraIndex_t index; + * cuvsError_t index_create_status = cagraIndexCreate(&index); + * + * // Build the CAGRA Index + * cuvsError_t build_status = cagraBuild(res, params, &dataset, index); + * + * // de-allocate `index` and `res` + * cuvsError_t index_destroy_status = cagraIndexDestroy(index); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cagraIndexParams used to build CAGRA index + * @param[in] dataset DLManagedTensor* training dataset + * @param[out] index cagraIndex_t Newly built CAGRA index + * @return cuvsError_t + */ +cuvsError_t cagraBuild(cuvsResources_t res, + cagraIndexParams params, + DLManagedTensor* dataset, + cagraIndex_t index); + +/** + * @brief Build a CAGRA index with a `DLManagedTensor` which has underlying + * `DLDeviceType` equal to `kDLCUDA`, `kDLCUDAHost`, `kDLCUDAManaged`. + * It is also important to note that the CAGRA Index must have been built + * with the same type of `queries`, such that `index.dtype.code == + * queries.dl_tensor.dtype.code` Types for input are: + * 1. `queries`: kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * 2. `neighbors`: `kDLDataType.code == kDLUInt` and `kDLDataType.bits = 32` + * 3. `distances`: `kDLDataType.code == kDLFloat` and `kDLDataType.bits = 32` + * + * @code {.c} + * #include + * #include + * + * // Create cuvsResources_t + * cuvsResources_t res; + * cuvsError_t res_create_status = cuvsResourcesCreate(&res); + * + * // Assume a populated `DLManagedTensor` type here + * DLManagedTensor dataset; + * DLManagedTensor queries; + * DLManagedTensor neighbors; + * + * // Search the `index` built using `cagraBuild` + * cagraSearchParams params; + * cuvsError_t search_status = cagraSearch(res, params, index, queries, neighbors, distances); + * + * // de-allocate `index` and `res` + * cuvsError_t index_destroy_status = cagraIndexDestroy(index); + * cuvsError_t res_destroy_status = cuvsResourcesDestroy(res); + * @endcode + * + * @param[in] res cuvsResources_t opaque C handle + * @param[in] params cagraSearchParams used to search CAGRA index + * @param[in] index cagraIndex which has been returned by `cagraBuild` + * @param[in] queries DLManagedTensor* queries dataset to search + * @param[out] neighbors DLManagedTensor* output `k` neighbors for queries + * @param[out] distances DLManagedTensor* output `k` distances for queries + */ +cuvsError_t cagraSearch(cuvsResources_t res, + cagraSearchParams params, + cagraIndex_t index, + DLManagedTensor* queries, + DLManagedTensor* neighbors, + DLManagedTensor* distances); + +#ifdef __cplusplus +} +#endif + +/** + * @} + */ diff --git a/cpp/src/core/c_api.cpp b/cpp/src/core/c_api.cpp new file mode 100644 index 000000000..133021d77 --- /dev/null +++ b/cpp/src/core/c_api.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +extern "C" cuvsError_t cuvsResourcesCreate(cuvsResources_t* res) +{ + cuvsError_t status; + try { + auto res_ptr = new raft::resources{}; + *res = reinterpret_cast(res_ptr); + status = CUVS_SUCCESS; + } catch (...) { + status = CUVS_ERROR; + } + return status; +} + +extern "C" cuvsError_t cuvsResourcesDestroy(cuvsResources_t res) +{ + cuvsError_t status; + try { + auto res_ptr = reinterpret_cast(res); + delete res_ptr; + status = CUVS_SUCCESS; + } catch (...) { + status = CUVS_ERROR; + } + return status; +} + +extern "C" cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream) +{ + cuvsError_t status; + try { + auto res_ptr = reinterpret_cast(res); + raft::resource::set_cuda_stream(*res_ptr, static_cast(stream)); + status = CUVS_SUCCESS; + } catch (...) { + status = CUVS_ERROR; + } + return status; +} diff --git a/cpp/src/neighbors/cagra_c.cpp b/cpp/src/neighbors/cagra_c.cpp new file mode 100644 index 000000000..1c9403efe --- /dev/null +++ b/cpp/src/neighbors/cagra_c.cpp @@ -0,0 +1,203 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace { + +template +void* _build(cuvsResources_t res, cagraIndexParams params, DLManagedTensor* dataset_tensor) +{ + auto dataset = dataset_tensor->dl_tensor; + + auto res_ptr = reinterpret_cast(res); + auto index = new cuvs::neighbors::cagra::index(*res_ptr); + + auto build_params = cuvs::neighbors::cagra::index_params(); + build_params.intermediate_graph_degree = params.intermediate_graph_degree; + build_params.graph_degree = params.graph_degree; + build_params.build_algo = + static_cast(params.build_algo); + build_params.nn_descent_niter = params.nn_descent_niter; + + if (cuvs::core::is_dlpack_device_compatible(dataset)) { + using mdspan_type = raft::device_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + cuvs::neighbors::cagra::build_device(*res_ptr, build_params, mds, *index); + } else if (cuvs::core::is_dlpack_host_compatible(dataset)) { + using mdspan_type = raft::host_matrix_view; + auto mds = cuvs::core::from_dlpack(dataset_tensor); + cuvs::neighbors::cagra::build_host(*res_ptr, build_params, mds, *index); + } + + return index; +} + +template +void _search(cuvsResources_t res, + cagraSearchParams params, + cagraIndex index, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + auto search_params = cuvs::neighbors::cagra::search_params(); + search_params.max_queries = params.max_queries; + search_params.itopk_size = params.itopk_size; + search_params.max_iterations = params.max_iterations; + search_params.algo = static_cast(params.algo); + search_params.team_size = params.team_size; + search_params.search_width = params.search_width; + search_params.min_iterations = params.min_iterations; + search_params.thread_block_size = params.thread_block_size; + search_params.hashmap_mode = static_cast(params.hashmap_mode); + search_params.hashmap_min_bitlen = params.hashmap_min_bitlen; + search_params.hashmap_max_fill_rate = params.hashmap_max_fill_rate; + search_params.num_random_samplings = params.num_random_samplings; + search_params.rand_xor_mask = params.rand_xor_mask; + + using queries_mdspan_type = raft::device_matrix_view; + using neighbors_mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; + auto queries_mds = cuvs::core::from_dlpack(queries_tensor); + auto neighbors_mds = cuvs::core::from_dlpack(neighbors_tensor); + auto distances_mds = cuvs::core::from_dlpack(distances_tensor); + cuvs::neighbors::cagra::search( + *res_ptr, search_params, *index_ptr, queries_mds, neighbors_mds, distances_mds); +} + +} // namespace + +extern "C" cuvsError_t cagraIndexCreate(cagraIndex_t* index) +{ + try { + *index = new cagraIndex{}; + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} + +extern "C" cuvsError_t cagraIndexDestroy(cagraIndex_t index_c_ptr) +{ + try { + auto index = *index_c_ptr; + + if (index.dtype.code == kDLFloat) { + auto index_ptr = + reinterpret_cast*>(index.addr); + delete index_ptr; + } else if (index.dtype.code == kDLInt) { + auto index_ptr = + reinterpret_cast*>(index.addr); + delete index_ptr; + } else if (index.dtype.code == kDLUInt) { + auto index_ptr = + reinterpret_cast*>(index.addr); + delete index_ptr; + } + delete index_c_ptr; + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} + +extern "C" cuvsError_t cagraBuild(cuvsResources_t res, + cagraIndexParams params, + DLManagedTensor* dataset_tensor, + cagraIndex_t index) +{ + try { + auto dataset = dataset_tensor->dl_tensor; + + if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) { + index->addr = reinterpret_cast(_build(res, params, dataset_tensor)); + index->dtype.code = kDLFloat; + } else if (dataset.dtype.code == kDLInt && dataset.dtype.bits == 8) { + index->addr = reinterpret_cast(_build(res, params, dataset_tensor)); + index->dtype.code = kDLInt; + } else if (dataset.dtype.code == kDLUInt && dataset.dtype.bits == 8) { + index->addr = reinterpret_cast(_build(res, params, dataset_tensor)); + index->dtype.code = kDLUInt; + } else { + RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d", + dataset.dtype.code, + dataset.dtype.bits); + } + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} + +extern "C" cuvsError_t cagraSearch(cuvsResources_t res, + cagraSearchParams params, + cagraIndex_t index_c_ptr, + DLManagedTensor* queries_tensor, + DLManagedTensor* neighbors_tensor, + DLManagedTensor* distances_tensor) +{ + try { + auto queries = queries_tensor->dl_tensor; + auto neighbors = neighbors_tensor->dl_tensor; + auto distances = distances_tensor->dl_tensor; + + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(queries), + "queries should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(neighbors), + "queries should have device compatible memory"); + RAFT_EXPECTS(cuvs::core::is_dlpack_device_compatible(distances), + "queries should have device compatible memory"); + + RAFT_EXPECTS(neighbors.dtype.code == kDLUInt && neighbors.dtype.bits == 32, + "neighbors should be of type uint32_t"); + RAFT_EXPECTS(distances.dtype.code == kDLFloat && neighbors.dtype.bits == 32, + "neighbors should be of type float32"); + + auto index = *index_c_ptr; + RAFT_EXPECTS(queries.dtype.code == index.dtype.code, "type mismatch between index and queries"); + + if (queries.dtype.code == kDLFloat && queries.dtype.bits == 32) { + _search(res, params, index, queries_tensor, neighbors_tensor, distances_tensor); + } else if (queries.dtype.code == kDLInt && queries.dtype.bits == 8) { + _search(res, params, index, queries_tensor, neighbors_tensor, distances_tensor); + } else if (queries.dtype.code == kDLUInt && queries.dtype.bits == 8) { + _search(res, params, index, queries_tensor, neighbors_tensor, distances_tensor); + } else { + RAFT_FAIL("Unsupported queries DLtensor dtype: %d and bits: %d", + queries.dtype.code, + queries.dtype.bits); + } + return CUVS_SUCCESS; + } catch (...) { + return CUVS_ERROR; + } +} diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 4c3ceafa6..ed194eaae 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -21,7 +21,7 @@ rapids_test_init() function(ConfigureTest) - set(options OPTIONAL NOCUDA) + set(options OPTIONAL NOCUDA C_LIB) set(oneValueArgs NAME GPUS PERCENT) set(multiValueArgs PATH TARGETS CONFIGURATIONS) @@ -46,8 +46,14 @@ function(ConfigureTest) add_executable(${TEST_NAME} ${_CUVS_TEST_PATH}) target_link_libraries( ${TEST_NAME} - PRIVATE cuvs raft::raft GTest::gtest GTest::gtest_main Threads::Threads - $ $ + PRIVATE cuvs + raft::raft + GTest::gtest + GTest::gtest_main + Threads::Threads + $ + $ + $<$:cuvs::c_api> ) set_target_properties( ${TEST_NAME} @@ -96,7 +102,21 @@ if(BUILD_TESTS) ) endif() +if(BUILD_C_TESTS) + ConfigureTest(NAME INTEROP_TEST PATH test/core/interop.cu C_LIB) + + ConfigureTest(NAME CAGRA_C_TEST PATH test/neighbors/ann_cagra_c.cu C_LIB) +endif() + # ################################################################################################## # Install tests #################################################################################### # ################################################################################################## rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gtests/libcuvs) + +if(BUILD_C_TESTS) + enable_language(C) + + add_executable(cuvs_c_test test/core/c_api.c) + + target_link_libraries(cuvs_c_test PUBLIC cuvs::c_api) +endif() diff --git a/cpp/test/core/c_api.c b/cpp/test/core/c_api.c new file mode 100644 index 000000000..a5b73d8fb --- /dev/null +++ b/cpp/test/core/c_api.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +int main() +{ + // Create resources + cuvsResources_t res; + cuvsError_t create_error = cuvsResourcesCreate(&res); + if (create_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + + // Set CUDA stream + cudaStream_t stream; + cudaStreamCreate(&stream); + cuvsError_t stream_error = cuvsStreamSet(res, stream); + if (stream_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + + // Destroy resources + cuvsError_t destroy_error = cuvsResourcesDestroy(res); + if (destroy_error == CUVS_ERROR) { exit(EXIT_FAILURE); } + + return 0; +} diff --git a/cpp/test/core/interop.cu b/cpp/test/core/interop.cu new file mode 100644 index 000000000..67b853c3c --- /dev/null +++ b/cpp/test/core/interop.cu @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include +#include +#include + +#include +#include + +#include +#include + +namespace cuvs::core { + +TEST(Interop, FromDLPack) +{ + raft::resources res; + auto data = raft::make_host_vector(res, 2); + data(0) = 5; + data(1) = 10; + + auto device = DLDevice{kDLCPU}; + auto data_type = DLDataType{kDLFloat, 4 * 8, 1}; + auto shape = std::vector{2}; + + auto tensor = DLTensor{data.data_handle(), device, 1, data_type, shape.data()}; + auto managed_tensor = DLManagedTensor{tensor}; + + using mdspan_type = raft::host_mdspan>; + auto out = from_dlpack(&managed_tensor); + + ASSERT_EQ(out.rank(), data.rank()); + ASSERT_EQ(out.extent(0), data.extent(0)); + ASSERT_EQ(out(0), data(0)); + ASSERT_EQ(out(1), data(1)); +} + +} // namespace cuvs::core diff --git a/cpp/test/neighbors/ann_cagra_c.cu b/cpp/test/neighbors/ann_cagra_c.cu new file mode 100644 index 000000000..1a8dc920f --- /dev/null +++ b/cpp/test/neighbors/ann_cagra_c.cu @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include +#include + +#include +#include + +#include +#include +#include + +float dataset[4][2] = {{0.74021935, 0.9209938}, + {0.03902049, 0.9689629}, + {0.92514056, 0.4463501}, + {0.6673192, 0.10993068}}; +float queries[4][2] = {{0.48216683, 0.0428398}, + {0.5084142, 0.6545497}, + {0.51260436, 0.2643005}, + {0.05198065, 0.5789965}}; + +uint32_t neighbors_exp[4] = {3, 0, 3, 1}; +float distances_exp[4] = {0.03878258, 0.12472608, 0.04776672, 0.15224178}; + +TEST(CagraC, BuildSearch) +{ + // create cuvsResources_t + cuvsResources_t res; + cuvsResourcesCreate(&res); + + // create dataset DLTensor + DLManagedTensor dataset_tensor; + dataset_tensor.dl_tensor.data = dataset; + dataset_tensor.dl_tensor.device.device_type = kDLCPU; + dataset_tensor.dl_tensor.ndim = 2; + dataset_tensor.dl_tensor.dtype.code = kDLFloat; + dataset_tensor.dl_tensor.dtype.bits = 32; + dataset_tensor.dl_tensor.dtype.lanes = 1; + int64_t dataset_shape[2] = {4, 2}; + dataset_tensor.dl_tensor.shape = dataset_shape; + dataset_tensor.dl_tensor.strides = nullptr; + + // create index + cagraIndex_t index; + cagraIndexCreate(&index); + + // build index + cagraIndexParams build_params; + cagraBuild(res, build_params, &dataset_tensor, index); + + // create queries DLTensor + float* queries_d; + cudaMalloc(&queries_d, sizeof(float) * 4 * 2); + cudaMemcpy(queries_d, queries, sizeof(float) * 4 * 2, cudaMemcpyDefault); + + DLManagedTensor queries_tensor; + queries_tensor.dl_tensor.data = queries_d; + queries_tensor.dl_tensor.device.device_type = kDLCUDA; + queries_tensor.dl_tensor.ndim = 2; + queries_tensor.dl_tensor.dtype.code = kDLFloat; + queries_tensor.dl_tensor.dtype.bits = 32; + queries_tensor.dl_tensor.dtype.lanes = 1; + int64_t queries_shape[2] = {4, 2}; + queries_tensor.dl_tensor.shape = queries_shape; + queries_tensor.dl_tensor.strides = nullptr; + + // create neighbors DLTensor + uint32_t* neighbors_d; + cudaMalloc(&neighbors_d, sizeof(uint32_t) * 4); + + DLManagedTensor neighbors_tensor; + neighbors_tensor.dl_tensor.data = neighbors_d; + neighbors_tensor.dl_tensor.device.device_type = kDLCUDA; + neighbors_tensor.dl_tensor.ndim = 2; + neighbors_tensor.dl_tensor.dtype.code = kDLUInt; + neighbors_tensor.dl_tensor.dtype.bits = 32; + neighbors_tensor.dl_tensor.dtype.lanes = 1; + int64_t neighbors_shape[2] = {4, 1}; + neighbors_tensor.dl_tensor.shape = neighbors_shape; + neighbors_tensor.dl_tensor.strides = nullptr; + + // create distances DLTensor + float* distances_d; + cudaMalloc(&distances_d, sizeof(float) * 4); + + DLManagedTensor distances_tensor; + distances_tensor.dl_tensor.data = distances_d; + distances_tensor.dl_tensor.device.device_type = kDLCUDA; + distances_tensor.dl_tensor.ndim = 2; + distances_tensor.dl_tensor.dtype.code = kDLFloat; + distances_tensor.dl_tensor.dtype.bits = 32; + distances_tensor.dl_tensor.dtype.lanes = 1; + int64_t distances_shape[2] = {4, 1}; + distances_tensor.dl_tensor.shape = distances_shape; + distances_tensor.dl_tensor.strides = nullptr; + + // search index + cagraSearchParams search_params; + cagraSearch(res, search_params, index, &queries_tensor, &neighbors_tensor, &distances_tensor); + + // verify output + ASSERT_TRUE(cuvs::devArrMatchHost(neighbors_exp, neighbors_d, 4, cuvs::Compare())); + ASSERT_TRUE( + cuvs::devArrMatchHost(distances_exp, distances_d, 4, cuvs::CompareApprox(0.001f))); + + // delete device memory + cudaFree(queries_d); + cudaFree(neighbors_d); + cudaFree(distances_d); + + // de-allocate index and res + cagraIndexDestroy(index); + cuvsResourcesDestroy(res); +}