From f6a753866f617af74e7827ae89f9314e77065b1f Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 22 Jul 2024 14:43:23 -0700 Subject: [PATCH 1/4] k --- cpp/CMakeLists.txt | 6 +- cpp/bench/ann/CMakeLists.txt | 109 ++++++-------- cpp/cmake/patches/faiss_override.json | 9 ++ cpp/cmake/patches/ggnn_override.json | 16 ++ cpp/cmake/patches/hnswlib_override.json | 16 ++ cpp/cmake/thirdparty/get_faiss.cmake | 187 +++++++++++++----------- cpp/cmake/thirdparty/get_ggnn.cmake | 43 +++--- cpp/cmake/thirdparty/get_hnswlib.cmake | 76 +++++----- cpp/test/CMakeLists.txt | 82 +++++------ 9 files changed, 278 insertions(+), 266 deletions(-) create mode 100644 cpp/cmake/patches/faiss_override.json create mode 100644 cpp/cmake/patches/ggnn_override.json create mode 100644 cpp/cmake/patches/hnswlib_override.json diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5060f4591..d204a9a23 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -682,13 +682,13 @@ rapids_export( # * build test executable ---------------------------------------------------- if(BUILD_TESTS OR BUILD_C_TESTS) - include(internal/CMakeLists.txt) - include(test/CMakeLists.txt) + add_subdirectory(internal) + add_subdirectory(test) endif() # ################################################################################################## # * build ann benchmark executable ----------------------------------------------- if(BUILD_ANN_BENCH) - include(bench/ann/CMakeLists.txt) + add_subdirectory(bench/ann/) endif() diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 6e9e66fad..80c1f3530 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -12,6 +12,8 @@ # the License. # ============================================================================= +list(APPEND CMAKE_MODULE_PATH "${CUVS_SOURCE_DIR}") + # ################################################################################################## # * benchmark options ------------------------------------------------------------------------------ @@ -39,31 +41,18 @@ option(CUVS_ANN_BENCH_SINGLE_EXE find_package(Threads REQUIRED) +set(CUVS_ANN_BENCH_USE_FAISS ON) +set(CUVS_FAISS_ENABLE_GPU ON) +set(CUVS_USE_FAISS_STATIC ON) + if(BUILD_CPU_ONLY) set(CUVS_FAISS_ENABLE_GPU OFF) - set(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT OFF) - set(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT OFF) - set(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ OFF) set(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT OFF) set(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OFF) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA OFF) set(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OFF) set(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB OFF) set(CUVS_ANN_BENCH_USE_GGNN OFF) -else() - set(CUVS_FAISS_ENABLE_GPU ON) -endif() - -set(CUVS_ANN_BENCH_USE_FAISS OFF) -if(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT - OR CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ - OR CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT - OR CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT - OR CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ - OR CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT -) - set(CUVS_ANN_BENCH_USE_FAISS ON) - set(CUVS_USE_FAISS_STATIC ON) endif() set(CUVS_ANN_BENCH_USE_CUVS OFF) @@ -80,21 +69,17 @@ endif() # * Fetch requirements ------------------------------------------------------------- if(CUVS_ANN_BENCH_USE_HNSWLIB OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) - include(cmake/thirdparty/get_hnswlib.cmake) + include(cmake/thirdparty/get_hnswlib) endif() -include(cmake/thirdparty/get_nlohmann_json.cmake) +include(cmake/thirdparty/get_nlohmann_json) if(CUVS_ANN_BENCH_USE_GGNN) - include(cmake/thirdparty/get_ggnn.cmake) + include(cmake/thirdparty/get_ggnn) endif() if(CUVS_ANN_BENCH_USE_FAISS) - # We need to ensure that faiss has all the conda information. So we currently use the very ugly - # hammer of `link_libraries` to ensure that all targets in this directory and the faiss directory - # will have the conda includes/link dirs - link_libraries($) - include(cmake/thirdparty/get_faiss.cmake) + include(cmake/thirdparty/get_faiss) endif() # ################################################################################################## @@ -154,8 +139,6 @@ function(ConfigureAnnBench) $<$:CUDA::cudart_static> $ $ - -static-libgcc - -static-libstdc++ ) set_target_properties( @@ -210,27 +193,34 @@ endif() if(CUVS_ANN_BENCH_USE_HNSWLIB) ConfigureAnnBench( - NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib + NAME HNSWLIB PATH src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib ) endif() if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) ConfigureAnnBench( - NAME CUVS_IVF_PQ PATH bench/ann/src/cuvs/cuvs_benchmark.cu - $<$:bench/ann/src/cuvs/cuvs_ivf_pq.cu> LINKS cuvs + NAME CUVS_IVF_PQ + PATH + src/cuvs/cuvs_benchmark.cu + src/cuvs/cuvs_ivf_pq.cu + LINKS cuvs ) endif() if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT) ConfigureAnnBench( - NAME CUVS_IVF_FLAT PATH bench/ann/src/cuvs/cuvs_benchmark.cu - $<$:bench/ann/src/cuvs/cuvs_ivf_flat.cu> LINKS cuvs + NAME CUVS_IVF_FLAT + PATH + src/cuvs/cuvs_benchmark.cu + src/cuvs/cuvs_ivf_flat.cu + LINKS + cuvs ) endif() if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE) - ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH bench/ann/src/cuvs/cuvs_benchmark.cu LINKS cuvs) + ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH src/cuvs/cuvs_benchmark.cu LINKS cuvs) endif() if(CUVS_ANN_BENCH_USE_CUVS_CAGRA) @@ -238,11 +228,11 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA) NAME CUVS_CAGRA PATH - bench/ann/src/cuvs/cuvs_benchmark.cu - $<$:bench/ann/src/cuvs/cuvs_cagra_float.cu> - $<$:bench/ann/src/cuvs/cuvs_cagra_half.cu> - $<$:bench/ann/src/cuvs/cuvs_cagra_int8_t.cu> - $<$:bench/ann/src/cuvs/cuvs_cagra_uint8_t.cu> + src/cuvs/cuvs_benchmark.cu + src/cuvs/cuvs_cagra_float.cu + src/cuvs/cuvs_cagra_half.cu + src/cuvs/cuvs_cagra_int8_t.cu + src/cuvs/cuvs_cagra_uint8_t.cu LINKS cuvs ) @@ -250,78 +240,65 @@ endif() if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) ConfigureAnnBench( - NAME CUVS_CAGRA_HNSWLIB PATH bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs + NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs hnswlib::hnswlib ) endif() -set(CUVS_FAISS_TARGETS faiss::faiss) -if(TARGET faiss::faiss_avx2) - set(CUVS_FAISS_TARGETS faiss::faiss_avx2) -endif() - message("CUVS_FAISS_TARGETS: ${CUVS_FAISS_TARGETS}") message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") if(CUVS_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( - NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS + NAME FAISS_CPU_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS} ) endif() if(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) ConfigureAnnBench( - NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS + NAME FAISS_CPU_IVF_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS} ) endif() if(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ConfigureAnnBench( - NAME FAISS_CPU_IVF_PQ PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS + NAME FAISS_CPU_IVF_PQ PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS} ) endif() -if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT) +if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT AND CUVS_FAISS_ENABLE_GPU) ConfigureAnnBench( - NAME FAISS_GPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS + NAME FAISS_GPU_IVF_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} ) endif() -if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ) +if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ AND CUVS_FAISS_ENABLE_GPU) ConfigureAnnBench( - NAME FAISS_GPU_IVF_PQ PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS + NAME FAISS_GPU_IVF_PQ PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} ) endif() -if(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT) +if(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT AND CUVS_FAISS_ENABLE_GPU) ConfigureAnnBench( - NAME FAISS_GPU_FLAT PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} + NAME FAISS_GPU_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS} ) endif() if(CUVS_ANN_BENCH_USE_GGNN) - include(cmake/thirdparty/get_glog.cmake) + include(cmake/thirdparty/get_glog) ConfigureAnnBench( - NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu LINKS glog::glog ggnn::ggnn CUDA::curand + NAME GGNN PATH src/ggnn/ggnn_benchmark.cu LINKS glog::glog ggnn::ggnn CUDA::curand ) endif() # ################################################################################################## # * Dynamically-loading ANN_BENCH executable ------------------------------------------------------- if(CUVS_ANN_BENCH_SINGLE_EXE) - add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp) - - # Build and link static version of the GBench to keep ANN_BENCH self-contained. - get_target_property(TMP_PROP benchmark::benchmark SOURCES) - add_library(benchmark_static STATIC ${TMP_PROP}) - get_target_property(TMP_PROP benchmark::benchmark INCLUDE_DIRECTORIES) - target_include_directories(benchmark_static PUBLIC ${TMP_PROP}) - get_target_property(TMP_PROP benchmark::benchmark LINK_LIBRARIES) - target_link_libraries(benchmark_static PUBLIC ${TMP_PROP}) + add_executable(ANN_BENCH src/common/benchmark.cpp) target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) @@ -329,12 +306,10 @@ if(CUVS_ANN_BENCH_SINGLE_EXE) ANN_BENCH PRIVATE raft::raft nlohmann_json::nlohmann_json - benchmark_static + benchmark::benchmark dl - -static-libgcc fmt::fmt-header-only spdlog::spdlog_header_only - -static-libstdc++ $<$:CUDA::nvtx3> ) set_target_properties( diff --git a/cpp/cmake/patches/faiss_override.json b/cpp/cmake/patches/faiss_override.json new file mode 100644 index 000000000..c39abdc2b --- /dev/null +++ b/cpp/cmake/patches/faiss_override.json @@ -0,0 +1,9 @@ +{ + "packages" : { + "faiss" : { + "version": "1.7.4", + "git_url": "https://github.com/facebookresearch/faiss.git", + "git_tag": "main" + } + } + } \ No newline at end of file diff --git a/cpp/cmake/patches/ggnn_override.json b/cpp/cmake/patches/ggnn_override.json new file mode 100644 index 000000000..c9a1b6978 --- /dev/null +++ b/cpp/cmake/patches/ggnn_override.json @@ -0,0 +1,16 @@ +{ + "packages" : { + "ggnn" : { + "version": "0.5", + "git_url": "https://github.com/cgtuebingen/ggnn.git", + "git_tag": "release_${version}", + "patches" : [ + { + "file" : "${current_json_dir}/ggnn.diff", + "issue" : "Correct compilation issues", + "fixed_in" : "" + } + ] + } + } + } \ No newline at end of file diff --git a/cpp/cmake/patches/hnswlib_override.json b/cpp/cmake/patches/hnswlib_override.json new file mode 100644 index 000000000..aef2da772 --- /dev/null +++ b/cpp/cmake/patches/hnswlib_override.json @@ -0,0 +1,16 @@ +{ + "packages" : { + "hnswlib" : { + "version": "0.6.2", + "git_url": "https://github.com/nmslib/hnswlib.git", + "git_tag": "v${version}", + "patches" : [ + { + "file" : "${current_json_dir}/hnswlib.diff", + "issue" : "Correct compilation issues", + "fixed_in" : "" + } + ] + } + } + } \ No newline at end of file diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 89446332d..d6261d248 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -15,95 +15,104 @@ #============================================================================= function(find_and_configure_faiss) - set(oneValueArgs VERSION REPOSITORY PINNED_TAG BUILD_STATIC_LIBS EXCLUDE_FROM_ALL ENABLE_GPU) - cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN} ) + set(oneValueArgs VERSION REPOSITORY PINNED_TAG BUILD_STATIC_LIBS EXCLUDE_FROM_ALL ENABLE_GPU) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + rapids_find_generate_module(faiss + HEADER_NAMES faiss/IndexFlat.h + LIBRARY_NAMES faiss + ) + + set(patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches") + rapids_cpm_package_override("${patch_dir}/faiss_override.json") + + include("${rapids-cmake-dir}/cpm/detail/package_details.cmake") + rapids_cpm_package_details(faiss version repository tag shallow exclude) + + include("${rapids-cmake-dir}/cpm/detail/generate_patch_command.cmake") + rapids_cpm_generate_patch_command(faiss ${version} patch_command) + + set(BUILD_SHARED_LIBS ON) + if (PKG_BUILD_STATIC_LIBS) + set(BUILD_SHARED_LIBS OFF) + set(CPM_DOWNLOAD_faiss ON) + endif() + + include(cmake/modules/FindAVX) + # Link against AVX CPU lib if it exists + set(CUVS_FAISS_OPT_LEVEL "generic") + if(CXX_AVX2_FOUND) + set(CUVS_FAISS_OPT_LEVEL "avx2") + endif() + + rapids_cpm_find(faiss ${version} + GLOBAL_TARGETS faiss faiss_avx2 faiss_gpu faiss::faiss faiss::faiss_avx2 + CPM_ARGS + GIT_REPOSITORY ${repository} + GIT_TAG ${tag} + GIT_SHALLOW ${shallow} ${patch_command} + EXCLUDE_FROM_ALL ${exclude} + OPTIONS + "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}" + "FAISS_ENABLE_PYTHON OFF" + "FAISS_OPT_LEVEL ${CUVS_FAISS_OPT_LEVEL}" + "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" + "BUILD_TESTING OFF" + "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" + ) + + + include("${rapids-cmake-dir}/cpm/detail/display_patch_status.cmake") + rapids_cpm_display_patch_status(faiss) + + if(TARGET faiss AND NOT TARGET faiss::faiss) + add_library(faiss::faiss ALIAS faiss) + # We need to ensure that faiss has all the conda information. So we use this approach so that + # faiss will have the conda includes/link dirs + target_link_libraries(faiss PRIVATE $) + endif() + if(TARGET faiss_avx2 AND NOT TARGET faiss::faiss_avx2) + add_library(faiss::faiss_avx2 ALIAS faiss_avx2) + # We need to ensure that faiss has all the conda information. So we use this approach so that + # faiss will have the conda includes/link dirs + target_link_libraries(faiss_avx2 PRIVATE $) + endif() + if(TARGET faiss_gpu AND NOT TARGET faiss::faiss_gpu) + add_library(faiss::faiss_gpu ALIAS faiss_gpu) + # We need to ensure that faiss has all the conda information. So we use this approach so that + # faiss will have the conda includes/link dirs + target_link_libraries(faiss_gpu PRIVATE $) + endif() + + if(faiss_ADDED) + rapids_export(BUILD faiss + EXPORT_SET faiss-targets + GLOBAL_TARGETS ${CUVS_FAISS_EXPORT_GLOBAL_TARGETS} + NAMESPACE faiss::) + endif() + + # Need to tell CMake to rescan the link group of faiss::faiss_gpu and faiss + # so that we get proper link order when they are static + # + # We don't look at the existence of `faiss_avx2` as it will always exist + # even when CXX_AVX2_FOUND is false. In addition for arm builds the + # faiss_avx2 is marked as `EXCLUDE_FROM_ALL` so we don't want to add + # a dependency to it. Adding a dependency will cause it to compile, + # and fail due to invalid compiler flags. + if(PKG_ENABLE_GPU AND PKG_BUILD_STATIC_LIBS AND CXX_AVX2_FOUND) + set(CUVS_FAISS_TARGETS "$,faiss::faiss_avx2>" PARENT_SCOPE) + elseif(PKG_ENABLE_GPU AND PKG_BUILD_STATIC_LIBS) + set(CUVS_FAISS_TARGETS "$,faiss::faiss>" PARENT_SCOPE) + elseif(CXX_AVX2_FOUND) + set(CUVS_FAISS_TARGETS faiss::faiss_avx2 PARENT_SCOPE) + else() + set(CUVS_FAISS_TARGETS faiss::faiss PARENT_SCOPE) + endif() - rapids_find_generate_module(faiss - HEADER_NAMES faiss/IndexFlat.h - LIBRARY_NAMES faiss - ) - - set(BUILD_SHARED_LIBS ON) - if (PKG_BUILD_STATIC_LIBS) - set(BUILD_SHARED_LIBS OFF) - set(CPM_DOWNLOAD_faiss ON) - endif() - - include(cmake/modules/FindAVX.cmake) - - # Link against AVX CPU lib if it exists - set(CUVS_FAISS_GLOBAL_TARGETS faiss::faiss) - set(CUVS_FAISS_EXPORT_GLOBAL_TARGETS faiss) - set(CUVS_FAISS_OPT_LEVEL "generic") - if(CXX_AVX_FOUND) - set(CUVS_FAISS_OPT_LEVEL "avx2") - list(APPEND CUVS_FAISS_GLOBAL_TARGETS faiss::faiss_avx2) - list(APPEND CUVS_FAISS_EXPORT_GLOBAL_TARGETS faiss_avx2) - endif() - - rapids_cpm_find(faiss ${PKG_VERSION} - GLOBAL_TARGETS ${CUVS_FAISS_GLOBAL_TARGETS} - CPM_ARGS - GIT_REPOSITORY ${PKG_REPOSITORY} - GIT_TAG ${PKG_PINNED_TAG} - EXCLUDE_FROM_ALL ${PKG_EXCLUDE_FROM_ALL} - OPTIONS - "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}" - "FAISS_ENABLE_PYTHON OFF" - "FAISS_OPT_LEVEL ${CUVS_FAISS_OPT_LEVEL}" - "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" - "BUILD_TESTING OFF" - "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" - ) - - if(TARGET faiss AND NOT TARGET faiss::faiss) - add_library(faiss::faiss ALIAS faiss) - endif() - - if(CXX_AVX_FOUND) - - if(TARGET faiss_avx2 AND NOT TARGET faiss::faiss_avx2) - add_library(faiss::faiss_avx2 ALIAS faiss_avx2) - endif() - endif() - - - if(faiss_ADDED) - rapids_export(BUILD faiss - EXPORT_SET faiss-targets - GLOBAL_TARGETS ${CUVS_FAISS_EXPORT_GLOBAL_TARGETS} - NAMESPACE faiss::) - endif() - - # We generate the faiss-config files when we built faiss locally, so always do `find_dependency` - rapids_export_package(BUILD OpenMP cuvs-ann-bench-exports) # faiss uses openMP but doesn't export a need for it - rapids_export_package(BUILD faiss cuvs-ann-bench-exports GLOBAL_TARGETS ${CUVS_FAISS_GLOBAL_TARGETS} ${CUVS_FAISS_EXPORT_GLOBAL_TARGETS}) - rapids_export_package(INSTALL faiss cuvs-ann-bench-exports GLOBAL_TARGETS ${CUVS_FAISS_GLOBAL_TARGETS} ${CUVS_FAISS_EXPORT_GLOBAL_TARGETS}) - - # Tell cmake where it can find the generated faiss-config.cmake we wrote. - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD faiss [=[${CMAKE_CURRENT_LIST_DIR}]=] - EXPORT_SET cuvs-ann-bench-exports) endfunction() -if(NOT CUVS_FAISS_GIT_TAG) - # TODO: Remove this once faiss supports FAISS_USE_CUDA_TOOLKIT_STATIC - # (https://github.com/facebookresearch/faiss/pull/2446) - set(CUVS_FAISS_GIT_TAG fea/statically-link-ctk) - # set(CUVS_FAISS_GIT_TAG bde7c0027191f29c9dadafe4f6e68ca0ee31fb30) -endif() - -if(NOT CUVS_FAISS_GIT_REPOSITORY) - # TODO: Remove this once faiss supports FAISS_USE_CUDA_TOOLKIT_STATIC - # (https://github.com/facebookresearch/faiss/pull/2446) - set(CUVS_FAISS_GIT_REPOSITORY https://github.com/cjnolet/faiss.git) - # set(CUVS_FAISS_GIT_REPOSITORY https://github.com/facebookresearch/faiss.git) -endif() - -find_and_configure_faiss(VERSION 1.7.4 - REPOSITORY ${CUVS_FAISS_GIT_REPOSITORY} - PINNED_TAG ${CUVS_FAISS_GIT_TAG} - BUILD_STATIC_LIBS ${CUVS_USE_FAISS_STATIC} - EXCLUDE_FROM_ALL ${CUVS_EXCLUDE_FAISS_FROM_ALL} - ENABLE_GPU ${CUVS_FAISS_ENABLE_GPU}) +find_and_configure_faiss( + BUILD_STATIC_LIBS ${CUVS_USE_FAISS_STATIC} + ENABLE_GPU ${CUVS_FAISS_ENABLE_GPU} +) diff --git a/cpp/cmake/thirdparty/get_ggnn.cmake b/cpp/cmake/thirdparty/get_ggnn.cmake index 8137ef84e..2ccfbc64d 100644 --- a/cpp/cmake/thirdparty/get_ggnn.cmake +++ b/cpp/cmake/thirdparty/get_ggnn.cmake @@ -15,29 +15,30 @@ #============================================================================= function(find_and_configure_ggnn) - set(oneValueArgs VERSION REPOSITORY PINNED_TAG) - cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN} ) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + set(patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches") + rapids_cpm_package_override("${patch_dir}/ggnn_override.json") - set(patch_files_to_run "${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/ggnn.diff") - set(patch_issues_to_ref "fix compile issues") - set(patch_script "${CMAKE_BINARY_DIR}/rapids-cmake/patches/ggnn/patch.cmake") - set(log_file "${CMAKE_BINARY_DIR}/rapids-cmake/patches/ggnn/log") - string(TIMESTAMP current_year "%Y" UTC) - configure_file(${rapids-cmake-dir}/cpm/patches/command_template.cmake.in "${patch_script}" - @ONLY) + include("${rapids-cmake-dir}/cpm/detail/package_details.cmake") + rapids_cpm_package_details(ggnn version repository tag shallow exclude) + + include("${rapids-cmake-dir}/cpm/detail/generate_patch_command.cmake") + rapids_cpm_generate_patch_command(ggnn ${version} patch_command) rapids_cpm_find( - ggnn ${PKG_VERSION} + ggnn ${version} GLOBAL_TARGETS ggnn::ggnn CPM_ARGS - GIT_REPOSITORY ${PKG_REPOSITORY} - GIT_TAG ${PKG_PINNED_TAG} - GIT_SHALLOW TRUE + GIT_REPOSITORY ${repository} + GIT_TAG ${tag} + GIT_SHALLOW ${shallow} ${patch_command} DOWNLOAD_ONLY ON - PATCH_COMMAND ${CMAKE_COMMAND} -P ${patch_script} ) + + include("${rapids-cmake-dir}/cpm/detail/display_patch_status.cmake") + rapids_cpm_display_patch_status(ggnn) + if(NOT TARGET ggnn::ggnn) add_library(ggnn INTERFACE) target_include_directories(ggnn INTERFACE "$") @@ -45,14 +46,4 @@ function(find_and_configure_ggnn) endif() endfunction() -if(NOT RAFT_GGNN_GIT_TAG) - set(RAFT_GGNN_GIT_TAG release_0.5) -endif() - -if(NOT RAFT_GGNN_GIT_REPOSITORY) - set(RAFT_GGNN_GIT_REPOSITORY https://github.com/cgtuebingen/ggnn.git) -endif() -find_and_configure_ggnn(VERSION 0.5 - REPOSITORY ${RAFT_GGNN_GIT_REPOSITORY} - PINNED_TAG ${RAFT_GGNN_GIT_TAG} - ) +find_and_configure_ggnn() diff --git a/cpp/cmake/thirdparty/get_hnswlib.cmake b/cpp/cmake/thirdparty/get_hnswlib.cmake index 054a12f1e..2e6c895e5 100644 --- a/cpp/cmake/thirdparty/get_hnswlib.cmake +++ b/cpp/cmake/thirdparty/get_hnswlib.cmake @@ -15,78 +15,74 @@ #============================================================================= function(find_and_configure_hnswlib) - set(oneValueArgs VERSION REPOSITORY PINNED_TAG EXCLUDE_FROM_ALL) - cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" - "${multiValueArgs}" ${ARGN} ) + set(oneValueArgs) - set(patch_files_to_run "${CMAKE_CURRENT_SOURCE_DIR}/cmake/patches/hnswlib.diff") - set(patch_issues_to_ref "fix compile issues") - set(patch_script "${CMAKE_BINARY_DIR}/rapids-cmake/patches/hnswlib/patch.cmake") - set(log_file "${CMAKE_BINARY_DIR}/rapids-cmake/patches/hnswlib/log") - string(TIMESTAMP current_year "%Y" UTC) - configure_file(${rapids-cmake-dir}/cpm/patches/command_template.cmake.in "${patch_script}" - @ONLY) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + set(patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../patches") + rapids_cpm_package_override("${patch_dir}/hnswlib_override.json") + + include("${rapids-cmake-dir}/cpm/detail/package_details.cmake") + rapids_cpm_package_details(hnswlib version repository tag shallow exclude) + + include("${rapids-cmake-dir}/cpm/detail/generate_patch_command.cmake") + rapids_cpm_generate_patch_command(hnswlib ${version} patch_command) rapids_cpm_find( - hnswlib ${PKG_VERSION} - GLOBAL_TARGETS hnswlib::hnswlib - BUILD_EXPORT_SET raft-exports - INSTALL_EXPORT_SET raft-exports + hnswlib ${version} + GLOBAL_TARGETS hnswlib hnswlib::hnswlib CPM_ARGS - GIT_REPOSITORY ${PKG_REPOSITORY} - GIT_TAG ${PKG_PINNED_TAG} - GIT_SHALLOW TRUE + GIT_REPOSITORY ${repository} + GIT_TAG ${tag} + GIT_SHALLOW ${shallow} ${patch_command} + EXCLUDE_FROM_ALL ${exclude} DOWNLOAD_ONLY ON - PATCH_COMMAND ${CMAKE_COMMAND} -P ${patch_script} ) + + include("${rapids-cmake-dir}/cpm/detail/display_patch_status.cmake") + rapids_cpm_display_patch_status(hnswlib) + if(NOT TARGET hnswlib::hnswlib) add_library(hnswlib INTERFACE ) add_library(hnswlib::hnswlib ALIAS hnswlib) target_include_directories(hnswlib INTERFACE "$" "$") + endif() - if(NOT PKG_EXCLUDE_FROM_ALL) - install(TARGETS hnswlib EXPORT hnswlib-exports) + if(hnswlib_ADDED) + # write build export rules + install(TARGETS hnswlib EXPORT hnswlib-exports) + if(NOT exclude) install(DIRECTORY "${hnswlib_SOURCE_DIR}/hnswlib/" DESTINATION include/hnswlib) # write install export rules rapids_export( INSTALL hnswlib - VERSION ${PKG_VERSION} + VERSION ${version} EXPORT_SET hnswlib-exports GLOBAL_TARGETS hnswlib NAMESPACE hnswlib::) endif() - # write build export rules rapids_export( BUILD hnswlib - VERSION ${PKG_VERSION} + VERSION ${version} EXPORT_SET hnswlib-exports GLOBAL_TARGETS hnswlib NAMESPACE hnswlib::) - include("${rapids-cmake-dir}/export/find_package_root.cmake") + include("${rapids-cmake-dir}/export/package.cmake") + rapids_export_package(INSTALL hnswlib cuvs-exports VERSION ${version} GLOBAL_TARGETS hnswlib hnswlib::hnswlib) + rapids_export_package(BUILD hnswlib cuvs-exports VERSION ${version} GLOBAL_TARGETS hnswlib hnswlib::hnswlib) + - # When using RAFT from the build dir, ensure hnswlib is also found in RAFT's build dir. This - # line adds `set(hnswlib_ROOT "${CMAKE_CURRENT_LIST_DIR}")` to build/raft-dependencies.cmake + # When using cuVS from the build dir, ensure hnswlib is also found in cuVS' build dir. This + # line adds `set(hnswlib_ROOT "${CMAKE_CURRENT_LIST_DIR}")` to build/cuvs-dependencies.cmake + include("${rapids-cmake-dir}/export/find_package_root.cmake") rapids_export_find_package_root( - BUILD hnswlib [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET raft-exports + BUILD hnswlib [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cuvs-exports ) endif() endfunction() - -if(NOT CUVS_HNSWLIB_GIT_TAG) - set(CUVS_HNSWLIB_GIT_TAG v0.6.2) -endif() - -if(NOT CUVS_HNSWLIB_GIT_REPOSITORY) - set(CUVS_HNSWLIB_GIT_REPOSITORY https://github.com/nmslib/hnswlib.git) -endif() -find_and_configure_hnswlib(VERSION 0.6.2 - REPOSITORY ${CUVS_HNSWLIB_GIT_REPOSITORY} - PINNED_TAG ${CUVS_HNSWLIB_GIT_TAG} - EXCLUDE_FROM_ALL OFF - ) +find_and_configure_hnswlib() diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 7921fffd3..336daa82a 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -91,22 +91,22 @@ endfunction() if(BUILD_TESTS) ConfigureTest( - NAME NEIGHBORS_TEST PATH test/neighbors/brute_force.cu - test/neighbors/brute_force_prefiltered.cu test/neighbors/refine.cu GPUS 1 PERCENT 100 + NAME NEIGHBORS_TEST PATH neighbors/brute_force.cu + neighbors/brute_force_prefiltered.cu neighbors/refine.cu GPUS 1 PERCENT 100 ) ConfigureTest( - NAME CLUSTER_TEST PATH test/cluster/kmeans.cu test/cluster/kmeans_balanced.cu - test/cluster/kmeans_find_k.cu test/cluster/linkage.cu GPUS 1 PERCENT 100 + NAME CLUSTER_TEST PATH cluster/kmeans.cu cluster/kmeans_balanced.cu + cluster/kmeans_find_k.cu cluster/linkage.cu GPUS 1 PERCENT 100 ) ConfigureTest( NAME NEIGHBORS_ANN_IVF_FLAT_TEST PATH - test/neighbors/ann_ivf_flat/test_float_int64_t.cu - test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu + neighbors/ann_ivf_flat/test_float_int64_t.cu + neighbors/ann_ivf_flat/test_int8_t_int64_t.cu + neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu GPUS 1 PERCENT @@ -117,9 +117,9 @@ if(BUILD_TESTS) NAME NEIGHBORS_ANN_IVF_PQ_TEST PATH - test/neighbors/ann_ivf_pq/test_float_int64_t.cu - test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu + neighbors/ann_ivf_pq/test_float_int64_t.cu + neighbors/ann_ivf_pq/test_int8_t_int64_t.cu + neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu GPUS 1 PERCENT @@ -130,9 +130,9 @@ if(BUILD_TESTS) NAME NEIGHBORS_ANN_CAGRA_TEST PATH - test/neighbors/ann_cagra/test_float_uint32_t.cu - test/neighbors/ann_cagra/test_int8_t_uint32_t.cu - test/neighbors/ann_cagra/test_uint8_t_uint32_t.cu + neighbors/ann_cagra/test_float_uint32_t.cu + neighbors/ann_cagra/test_int8_t_uint32_t.cu + neighbors/ann_cagra/test_uint8_t_uint32_t.cu GPUS 1 PERCENT @@ -143,9 +143,9 @@ if(BUILD_TESTS) NAME NEIGHBORS_ANN_NN_DESCENT_TEST PATH - test/neighbors/ann_nn_descent/test_float_uint32_t.cu - test/neighbors/ann_nn_descent/test_int8_t_uint32_t.cu - test/neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu + neighbors/ann_nn_descent/test_float_uint32_t.cu + neighbors/ann_nn_descent/test_int8_t_uint32_t.cu + neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu GPUS 1 PERCENT @@ -160,22 +160,22 @@ if(BUILD_TESTS) NAME DISTANCE_TEST PATH - test/distance/dist_canberra.cu - test/distance/dist_correlation.cu - test/distance/dist_cos.cu - test/distance/dist_hamming.cu - test/distance/dist_hellinger.cu - test/distance/dist_inner_product.cu - test/distance/dist_jensen_shannon.cu - test/distance/dist_kl_divergence.cu - test/distance/dist_l1.cu - test/distance/dist_l2_exp.cu - test/distance/dist_l2_sqrt_exp.cu - test/distance/dist_l_inf.cu - test/distance/dist_lp_unexp.cu - test/distance/dist_russell_rao.cu - test/distance/masked_nn.cu - test/sparse/neighbors/cross_component_nn.cu + distance/dist_canberra.cu + distance/dist_correlation.cu + distance/dist_cos.cu + distance/dist_hamming.cu + distance/dist_hellinger.cu + distance/dist_inner_product.cu + distance/dist_jensen_shannon.cu + distance/dist_kl_divergence.cu + distance/dist_l1.cu + distance/dist_l2_exp.cu + distance/dist_l2_sqrt_exp.cu + distance/dist_l_inf.cu + distance/dist_lp_unexp.cu + distance/dist_russell_rao.cu + distance/masked_nn.cu + sparse/neighbors/cross_component_nn.cu GPUS 1 PERCENT @@ -184,27 +184,27 @@ if(BUILD_TESTS) endif() if(BUILD_C_TESTS) - ConfigureTest(NAME INTEROP_TEST PATH test/core/interop.cu C_LIB) + ConfigureTest(NAME INTEROP_TEST PATH core/interop.cu C_LIB) ConfigureTest( - NAME DISTANCE_C_TEST PATH test/distance/run_pairwise_distance_c.c - test/distance/pairwise_distance_c.cu C_LIB + NAME DISTANCE_C_TEST PATH distance/run_pairwise_distance_c.c + distance/pairwise_distance_c.cu C_LIB ) ConfigureTest( - NAME BRUTEFORCE_C_TEST PATH test/neighbors/run_brute_force_c.c test/neighbors/brute_force_c.cu + NAME BRUTEFORCE_C_TEST PATH neighbors/run_brute_force_c.c neighbors/brute_force_c.cu C_LIB ) ConfigureTest( - NAME IVF_FLAT_C_TEST PATH test/neighbors/run_ivf_flat_c.c test/neighbors/ann_ivf_flat_c.cu + NAME IVF_FLAT_C_TEST PATH neighbors/run_ivf_flat_c.c neighbors/ann_ivf_flat_c.cu C_LIB ) ConfigureTest( - NAME IVF_PQ_C_TEST PATH test/neighbors/run_ivf_pq_c.c test/neighbors/ann_ivf_pq_c.cu C_LIB + NAME IVF_PQ_C_TEST PATH neighbors/run_ivf_pq_c.c neighbors/ann_ivf_pq_c.cu C_LIB ) - ConfigureTest(NAME CAGRA_C_TEST PATH test/neighbors/ann_cagra_c.cu C_LIB) + ConfigureTest(NAME CAGRA_C_TEST PATH neighbors/ann_cagra_c.cu C_LIB) endif() # ################################################################################################## @@ -215,9 +215,9 @@ rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gt if(BUILD_C_TESTS) enable_language(C) - add_executable(cuvs_c_test test/core/c_api.c) + add_executable(cuvs_c_test core/c_api.c) target_link_libraries(cuvs_c_test PUBLIC cuvs::c_api) - add_executable(cuvs_c_neighbors_test test/neighbors/c_api.c) + add_executable(cuvs_c_neighbors_test neighbors/c_api.c) target_link_libraries(cuvs_c_neighbors_test PUBLIC cuvs::c_api) endif() From e2b8f54cd2960ed82f3f4797c5d74b518900c7f5 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 22 Jul 2024 15:33:53 -0700 Subject: [PATCH 2/4] faiss deps in devcontainer --- .devcontainer/Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9d35e3f97..594ba8c3c 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -5,6 +5,13 @@ ARG PYTHON_PACKAGE_MANAGER=conda FROM ${BASE} as pip-base +RUN apt update -y \ + && DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \ + # faiss dependencies + libblas-dev \ + liblapack-dev \ + && rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*; + ENV DEFAULT_VIRTUAL_ENV=rapids FROM ${BASE} as conda-base From f66bbe3f4872b0dcf9cec208d4862b09bf7a4c56 Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Mon, 22 Jul 2024 15:43:10 -0700 Subject: [PATCH 3/4] small change to test CMake --- cpp/test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 336daa82a..3495b2344 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -153,7 +153,7 @@ if(BUILD_TESTS) ) if(BUILD_CAGRA_HNSWLIB) - ConfigureTest(NAME NEIGHBORS_HNSW_TEST PATH test/neighbors/hnsw.cu GPUS 1 PERCENT 100) + ConfigureTest(NAME NEIGHBORS_HNSW_TEST PATH neighbors/hnsw.cu GPUS 1 PERCENT 100) endif() ConfigureTest( From 9e6d311138e1cada8627687b22040d595ba69963 Mon Sep 17 00:00:00 2001 From: "Artem M. Chirkin" <9253178+achirkin@users.noreply.github.com> Date: Wed, 24 Jul 2024 00:22:01 +0200 Subject: [PATCH 4/4] CAGRA: reduce argument count in select_and_run() kernel wrappers (#227) A small change that reduces the number of arguments in one of the wrapper layers in the detail namespace of CAGRA. The goal is twofold: 1) Simplify the overly long signature of `selet_and_run` (which has many instances) 2) Give access to all search parameters for future upgrades of the search kernel This is to simplify the integration (and review) of the persistent kernel (https://github.com/rapidsai/cuvs/pull/215). No performance or functional changes expected. Authors: - Artem M. Chirkin (https://github.com/achirkin) Approvers: - Tamas Bela Feher (https://github.com/tfeher) URL: https://github.com/rapidsai/cuvs/pull/227 --- .../detail/cagra/search_multi_cta.cuh | 7 +--- .../detail/cagra/search_multi_cta_inst.cuh | 7 +--- .../cagra/search_multi_cta_kernel-ext.cuh | 40 ++++++------------- .../cagra/search_multi_cta_kernel-inl.cuh | 19 ++++----- .../detail/cagra/search_single_cta.cuh | 7 +--- .../detail/cagra/search_single_cta_inst.cuh | 7 +--- .../cagra/search_single_cta_kernel-ext.cuh | 27 +++---------- .../cagra/search_single_cta_kernel-inl.cuh | 21 ++++------ 8 files changed, 38 insertions(+), 97 deletions(-) diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh index a6a20ca49..efbf9b56d 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta.cuh @@ -230,6 +230,7 @@ struct search : public search_plan_impl { num_queries, dev_seed_ptr, num_executed_iterations, + *this, topk, thread_block_size, result_buffer_size, @@ -237,13 +238,7 @@ struct search : public search_plan_impl { hash_bitlen, hashmap.data(), num_cta_per_query, - num_random_samplings, - rand_xor_mask, num_seeds, - itopk_size, - search_width, - min_iterations, - max_iterations, sample_filter, this->metric, stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh index e28389f38..b1cfaf870 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_inst.cuh @@ -32,6 +32,7 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { const uint32_t num_queries, \ const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ uint32_t* const num_executed_iterations, \ + const search_params& ps, \ uint32_t topk, \ uint32_t block_size, \ uint32_t result_buffer_size, \ @@ -39,13 +40,7 @@ namespace cuvs::neighbors::cagra::detail::multi_cta_search { int64_t hash_bitlen, \ typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ SAMPLE_FILTER_T sample_filter, \ cuvs::distance::DistanceType metric, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index 495ec6a4d..b00d6617c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -27,33 +27,29 @@ namespace multi_cta_search { #ifdef CUVS_EXPLICIT_INSTANTIATE_ONLY template + unsigned DATASET_BLOCK_DIM, + typename DATASET_DESCRIPTOR_T, + typename SAMPLE_FILTER_T> void select_and_run( DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, - typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, - typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, - const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, + typename DATASET_DESCRIPTOR_T::INDEX_T* const topk_indices_ptr, // [num_queries, topk] + typename DATASET_DESCRIPTOR_T::DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] + const typename DATASET_DESCRIPTOR_T::DATA_T* const queries_ptr, // [num_queries, dataset_dim] const uint32_t num_queries, - const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, - uint32_t* const num_executed_iterations, + const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] + uint32_t* const num_executed_iterations, // [num_queries,] + const search_params& ps, uint32_t topk, - uint32_t block_size, + // multi_cta_search (params struct) + uint32_t block_size, // uint32_t result_buffer_size, uint32_t smem_size, int64_t hash_bitlen, typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, uint32_t num_cta_per_query, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, SAMPLE_FILTER_T sample_filter, cuvs::distance::DistanceType metric, cudaStream_t stream) RAFT_EXPLICIT; @@ -75,6 +71,7 @@ void select_and_run( const uint32_t num_queries, \ const INDEX_T* dev_seed_ptr, \ uint32_t* const num_executed_iterations, \ + const search_params& ps, \ uint32_t topk, \ uint32_t block_size, \ uint32_t result_buffer_size, \ @@ -82,13 +79,7 @@ void select_and_run( int64_t hash_bitlen, \ INDEX_T* hashmap_ptr, \ uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ SAMPLE_FILTER_T sample_filter, \ cuvs::distance::DistanceType metric, \ cudaStream_t stream); @@ -160,6 +151,7 @@ instantiate_kernel_selection( const uint32_t num_queries, \ const INDEX_T* dev_seed_ptr, \ uint32_t* const num_executed_iterations, \ + const search_params& ps, \ uint32_t topk, \ uint32_t block_size, \ uint32_t result_buffer_size, \ @@ -167,13 +159,7 @@ instantiate_kernel_selection( int64_t hash_bitlen, \ INDEX_T* hashmap_ptr, \ uint32_t num_cta_per_query, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ SAMPLE_FILTER_T sample_filter, \ cuvs::distance::DistanceType metric, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 90e699f48..4d2030c6c 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -458,6 +458,7 @@ void select_and_run( const uint32_t num_queries, const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] + const search_params& ps, uint32_t topk, // multi_cta_search (params struct) uint32_t block_size, // @@ -466,13 +467,7 @@ void select_and_run( int64_t hash_bitlen, typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, uint32_t num_cta_per_query, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, SAMPLE_FILTER_T sample_filter, cuvs::distance::DistanceType metric, cudaStream_t stream) @@ -507,16 +502,16 @@ void select_and_run( queries_ptr, graph.data_handle(), graph.extent(1), - num_random_samplings, - rand_xor_mask, + ps.num_random_samplings, + ps.rand_xor_mask, dev_seed_ptr, num_seeds, hashmap_ptr, hash_bitlen, - itopk_size, - search_width, - min_iterations, - max_iterations, + ps.itopk_size, + ps.search_width, + ps.min_iterations, + ps.max_iterations, num_executed_iterations, sample_filter, metric); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh index b8e1726e7..0a101cbfe 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta.cuh @@ -233,6 +233,7 @@ struct search : search_plan_impl { num_queries, dev_seed_ptr, num_executed_iterations, + *this, topk, num_itopk_candidates, static_cast(thread_block_size), @@ -241,13 +242,7 @@ struct search : search_plan_impl { hashmap.data(), small_hash_bitlen, small_hash_reset_interval, - num_random_samplings, - rand_xor_mask, num_seeds, - itopk_size, - search_width, - min_iterations, - max_iterations, sample_filter, this->metric, stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh index b3d75e923..a4581d15e 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_inst.cuh @@ -32,6 +32,7 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { const uint32_t num_queries, \ const typename DATASET_DESC_T::INDEX_T* dev_seed_ptr, \ uint32_t* const num_executed_iterations, \ + const search_params& ps, \ uint32_t topk, \ uint32_t num_itopk_candidates, \ uint32_t block_size, \ @@ -40,13 +41,7 @@ namespace cuvs::neighbors::cagra::detail::single_cta_search { typename DATASET_DESC_T::INDEX_T* hashmap_ptr, \ size_t small_hash_bitlen, \ size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ SAMPLE_FILTER_T sample_filter, \ cuvs::distance::DistanceType metric, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index dfcdec28f..79f6e153c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -26,10 +26,10 @@ namespace single_cta_search { #ifdef CUVS_EXPLICIT_INSTANTIATE_ONLY template -void select_and_run( // raft::resources const& res, +void select_and_run( DATASET_DESCRIPTOR_T dataset_desc, raft::device_matrix_view graph, @@ -39,21 +39,16 @@ void select_and_run( // raft::resources const& res, const uint32_t num_queries, const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] + const search_params& ps, uint32_t topk, uint32_t num_itopk_candidates, - uint32_t block_size, + uint32_t block_size, // uint32_t smem_size, int64_t hash_bitlen, typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, SAMPLE_FILTER_T sample_filter, cuvs::distance::DistanceType metric, cudaStream_t stream) RAFT_EXPLICIT; @@ -76,6 +71,7 @@ void select_and_run( // raft::resources const& res, const uint32_t num_queries, \ const INDEX_T* dev_seed_ptr, \ uint32_t* const num_executed_iterations, \ + const search_params& ps, \ uint32_t topk, \ uint32_t num_itopk_candidates, \ uint32_t block_size, \ @@ -84,13 +80,7 @@ void select_and_run( // raft::resources const& res, INDEX_T* hashmap_ptr, \ size_t small_hash_bitlen, \ size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ SAMPLE_FILTER_T sample_filter, \ cuvs::distance::DistanceType metric, \ cudaStream_t stream); @@ -162,6 +152,7 @@ instantiate_single_cta_select_and_run( const uint32_t num_queries, \ const INDEX_T* dev_seed_ptr, \ uint32_t* const num_executed_iterations, \ + const search_params& ps, \ uint32_t topk, \ uint32_t num_itopk_candidates, \ uint32_t block_size, \ @@ -170,13 +161,7 @@ instantiate_single_cta_select_and_run( INDEX_T* hashmap_ptr, \ size_t small_hash_bitlen, \ size_t small_hash_reset_interval, \ - uint32_t num_random_samplings, \ - uint64_t rand_xor_mask, \ uint32_t num_seeds, \ - size_t itopk_size, \ - size_t search_width, \ - size_t min_iterations, \ - size_t max_iterations, \ SAMPLE_FILTER_T sample_filter, \ cuvs::distance::DistanceType metric, \ cudaStream_t stream); diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index e58167432..a101cdc1f 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -919,6 +919,7 @@ void select_and_run( const uint32_t num_queries, const typename DATASET_DESCRIPTOR_T::INDEX_T* dev_seed_ptr, // [num_queries, num_seeds] uint32_t* const num_executed_iterations, // [num_queries,] + const search_params& ps, uint32_t topk, uint32_t num_itopk_candidates, uint32_t block_size, // @@ -927,20 +928,14 @@ void select_and_run( typename DATASET_DESCRIPTOR_T::INDEX_T* hashmap_ptr, size_t small_hash_bitlen, size_t small_hash_reset_interval, - uint32_t num_random_samplings, - uint64_t rand_xor_mask, uint32_t num_seeds, - size_t itopk_size, - size_t search_width, - size_t min_iterations, - size_t max_iterations, SAMPLE_FILTER_T sample_filter, cuvs::distance::DistanceType metric, cudaStream_t stream) { auto kernel = search_kernel_config:: - choose_itopk_and_mx_candidates(itopk_size, num_itopk_candidates, block_size); + choose_itopk_and_mx_candidates(ps.itopk_size, num_itopk_candidates, block_size); RAFT_CUDA_TRY(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size + DATASET_DESCRIPTOR_T::smem_buffer_size_in_byte)); @@ -955,15 +950,15 @@ void select_and_run( queries_ptr, graph.data_handle(), graph.extent(1), - num_random_samplings, - rand_xor_mask, + ps.num_random_samplings, + ps.rand_xor_mask, dev_seed_ptr, num_seeds, hashmap_ptr, - itopk_size, - search_width, - min_iterations, - max_iterations, + ps.itopk_size, + ps.search_width, + ps.min_iterations, + ps.max_iterations, num_executed_iterations, hash_bitlen, small_hash_bitlen,