Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-25.02' into cccl-2.7.0-rc2
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice committed Dec 3, 2024
2 parents 75fd152 + 76e057d commit 8e3c5c4
Show file tree
Hide file tree
Showing 48 changed files with 1,783 additions and 441 deletions.
1 change: 1 addition & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ RUN apt update -y \
&& rm -rf /tmp/* /var/tmp/* /var/cache/apt/* /var/lib/apt/lists/*;

ENV DEFAULT_VIRTUAL_ENV=rapids
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true

FROM ${BASE} as conda-base

Expand Down
1 change: 1 addition & 0 deletions .github/copy-pr-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/

enabled: true
auto_sync_draft: false
26 changes: 26 additions & 0 deletions .github/workflows/trigger-breaking-change-alert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Trigger Breaking Change Notifications

on:
pull_request_target:
types:
- closed
- reopened
- labeled
- unlabeled

jobs:
trigger-notifier:
if: contains(github.event.pull_request.labels.*.name, 'breaking')
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
sender_login: ${{ github.event.sender.login }}
sender_avatar: ${{ github.event.sender.avatar_url }}
repo: ${{ github.repository }}
pr_number: ${{ github.event.pull_request.number }}
pr_title: "${{ github.event.pull_request.title }}"
pr_body: "${{ github.event.pull_request.body || '_Empty PR description_' }}"
pr_base_ref: ${{ github.event.pull_request.base.ref }}
pr_author: ${{ github.event.pull_request.user.login }}
event_action: ${{ github.event.action }}
pr_merged: ${{ github.event.pull_request.merged }}
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ dependencies:
- gcc_linux-aarch64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev=11.11.3.6
- libcublas=11.11.3.6
- libcurand-dev=10.3.0.86
Expand Down
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ dependencies:
- gcc_linux-64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev=11.11.3.6
- libcublas=11.11.3.6
- libcurand-dev=10.3.0.86
Expand Down
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ dependencies:
- gcc_linux-aarch64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev
- libcurand-dev
- libcusolver-dev
Expand Down
1 change: 0 additions & 1 deletion conda/environments/bench_ann_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ dependencies:
- gcc_linux-64=11.*
- glog>=0.6.0
- h5py>=3.8.0
- hnswlib=0.6.2
- libcublas-dev
- libcurand-dev
- libcusolver-dev
Expand Down
10 changes: 6 additions & 4 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -480,12 +480,13 @@ if(BUILD_SHARED_LIBS)
"$<$<COMPILE_LANGUAGE:CUDA>:${CUVS_CUDA_FLAGS}>"
)
target_link_libraries(
cuvs_objs PUBLIC raft::raft rmm::rmm ${CUVS_CTK_MATH_DEPENDENCIES}
cuvs_objs PUBLIC raft::raft rmm::rmm rmm::rmm_logger ${CUVS_CTK_MATH_DEPENDENCIES}
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
PRIVATE rmm::rmm_logger_impl
)

add_library(cuvs SHARED $<TARGET_OBJECTS:cuvs_objs>)
add_library(cuvs_static STATIC $<TARGET_OBJECTS:cuvs_objs>)
add_library(cuvs SHARED $<FILTER:$<TARGET_OBJECTS:cuvs_objs>,EXCLUDE,rmm.*logger>)
add_library(cuvs_static STATIC $<FILTER:$<TARGET_OBJECTS:cuvs_objs>,EXCLUDE,rmm.*logger>)

target_compile_options(
cuvs INTERFACE $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--expt-extended-lambda
Expand Down Expand Up @@ -577,6 +578,7 @@ if(BUILD_SHARED_LIBS)

if(BUILD_CAGRA_HNSWLIB)
target_link_libraries(cuvs_objs PRIVATE hnswlib::hnswlib)
target_compile_definitions(cuvs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
target_compile_definitions(cuvs_objs PUBLIC CUVS_BUILD_CAGRA_HNSWLIB)
endif()

Expand Down Expand Up @@ -696,7 +698,7 @@ target_compile_definitions(cuvs::cuvs INTERFACE $<$<BOOL:${CUVS_NVTX}>:NVTX_ENAB
target_link_libraries(
cuvs_c
PUBLIC cuvs::cuvs ${CUVS_CTK_MATH_DEPENDENCIES}
PRIVATE raft::raft
PRIVATE raft::raft rmm::rmm_logger_impl
)

# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
Expand Down
12 changes: 8 additions & 4 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ function(ConfigureAnnBench)
$<$<BOOL:${GPU_BUILD}>:CUDA::cudart_static>
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
$<TARGET_NAME_IF_EXISTS:conda_env>
$<TARGET_NAME_IF_EXISTS:cuvs_bench_rmm_logger>
)

set_target_properties(
Expand Down Expand Up @@ -174,6 +175,11 @@ function(ConfigureAnnBench)
add_dependencies(CUVS_ANN_BENCH_ALL ${BENCH_NAME})
endfunction()

if(CUVS_FAISS_ENABLE_GPU)
add_library(cuvs_bench_rmm_logger OBJECT)
target_link_libraries(cuvs_bench_rmm_logger PRIVATE rmm::rmm_logger_impl)
endif()

# ##################################################################################################
# * Configure benchmark targets -------------------------------------------------------------

Expand Down Expand Up @@ -225,9 +231,7 @@ if(CUVS_ANN_BENCH_USE_CUVS_CAGRA)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB)
ConfigureAnnBench(
NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs hnswlib::hnswlib
)
ConfigureAnnBench(NAME CUVS_CAGRA_HNSWLIB PATH src/cuvs/cuvs_cagra_hnswlib.cu LINKS cuvs)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_MG)
Expand Down Expand Up @@ -300,7 +304,7 @@ if(CUVS_ANN_BENCH_SINGLE_EXE)
target_link_libraries(
ANN_BENCH
PRIVATE raft::raft nlohmann_json::nlohmann_json benchmark::benchmark dl fmt::fmt-header-only
spdlog::spdlog_header_only $<$<TARGET_EXISTS:CUDA::nvtx3>:CUDA::nvtx3>
spdlog::spdlog_header_only $<$<TARGET_EXISTS:CUDA::nvtx3>:CUDA::nvtx3> rmm::rmm_logger_impl
)
set_target_properties(
ANN_BENCH
Expand Down
34 changes: 29 additions & 5 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,35 @@

namespace cuvs::bench {

template <typename T, typename IdxT>
void parse_build_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::build_param& param)
{
if (conf.contains("hierarchy")) {
if (conf.at("hierarchy") == "none") {
param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::NONE;
} else if (conf.at("hierarchy") == "cpu") {
param.hnsw_index_params.hierarchy = cuvs::neighbors::hnsw::HnswHierarchy::CPU;
} else {
THROW("Invalid value for hierarchy: %s", conf.at("hierarchy").get<std::string>().c_str());
}
}
if (conf.contains("ef_construction")) {
param.hnsw_index_params.ef_construction = conf.at("ef_construction");
}
if (conf.contains("num_threads")) {
param.hnsw_index_params.num_threads = conf.at("num_threads");
}
}

template <typename T, typename IdxT>
void parse_search_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_cagra_hnswlib<T, IdxT>::search_param& param)
{
param.ef = conf.at("ef");
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
param.hnsw_search_param.ef = conf.at("ef");
if (conf.contains("num_threads")) {
param.hnsw_search_param.num_threads = conf.at("num_threads");
}
}

template <typename T>
Expand All @@ -43,9 +66,10 @@ auto create_algo(const std::string& algo_name,

if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) {
if (algo_name == "raft_cagra_hnswlib" || algo_name == "cuvs_cagra_hnswlib") {
typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param param;
parse_build_param<T, uint32_t>(conf, param);
a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, param);
typename cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>::build_param bparam;
::parse_build_param<T, uint32_t>(conf, bparam.cagra_build_param);
parse_build_param<T, uint32_t>(conf, bparam);
a = std::make_unique<cuvs::bench::cuvs_cagra_hnswlib<T, uint32_t>>(metric, dim, bparam);
}
}

Expand Down
57 changes: 45 additions & 12 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_hnswlib_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
*/
#pragma once

#include "../hnswlib/hnswlib_wrapper.h"
#include "cuvs_cagra_wrapper.h"
#include <cuvs/neighbors/hnsw.hpp>

#include <memory>

Expand All @@ -26,14 +26,20 @@ template <typename T, typename IdxT>
class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
public:
using search_param_base = typename algo<T>::search_param;
using build_param = typename cuvs_cagra<T, IdxT>::build_param;
using search_param = typename hnsw_lib<T>::search_param;

struct build_param {
typename cuvs_cagra<T, IdxT>::build_param cagra_build_param;
cuvs::neighbors::hnsw::index_params hnsw_index_params;
};

struct search_param : public search_param_base {
cuvs::neighbors::hnsw::search_params hnsw_search_param;
};

cuvs_cagra_hnswlib(Metric metric, int dim, const build_param& param, int concurrent_searches = 1)
: algo<T>(metric, dim),
cagra_build_{metric, dim, param, concurrent_searches},
// hnsw_lib param values don't matter since we don't build with hnsw_lib
hnswlib_search_{metric, dim, typename hnsw_lib<T>::build_param{50, 100}}
build_param_{param},
cagra_build_{metric, dim, param.cagra_build_param, concurrent_searches}
{
}

Expand Down Expand Up @@ -69,40 +75,67 @@ class cuvs_cagra_hnswlib : public algo<T>, public algo_gpu {
}

private:
raft::resources handle_{};
build_param build_param_;
search_param search_param_;
cuvs_cagra<T, IdxT> cagra_build_;
hnsw_lib<T> hnswlib_search_;
std::shared_ptr<cuvs::neighbors::hnsw::index<T>> hnsw_index_;
};

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::build(const T* dataset, size_t nrow)
{
cagra_build_.build(dataset, nrow);
auto* cagra_index = cagra_build_.get_index();
auto host_dataset_view = raft::make_host_matrix_view<const T, int64_t>(dataset, nrow, this->dim_);
auto opt_dataset_view =
std::optional<raft::host_matrix_view<const T, int64_t>>(std::move(host_dataset_view));
hnsw_index_ = cuvs::neighbors::hnsw::from_cagra(
handle_, build_param_.hnsw_index_params, *cagra_index, opt_dataset_view);
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::set_search_param(const search_param_base& param_)
{
hnswlib_search_.set_search_param(param_);
search_param_ = dynamic_cast<const search_param&>(param_);
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::save(const std::string& file) const
{
cagra_build_.save_to_hnswlib(file);
cuvs::neighbors::hnsw::serialize(handle_, file, *(hnsw_index_.get()));
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::load(const std::string& file)
{
hnswlib_search_.load(file);
hnswlib_search_.set_base_layer_only();
cuvs::neighbors::hnsw::index<T>* idx = nullptr;
cuvs::neighbors::hnsw::deserialize(handle_,
build_param_.hnsw_index_params,
file,
this->dim_,
parse_metric_type(this->metric_),
&idx);
hnsw_index_ = std::shared_ptr<cuvs::neighbors::hnsw::index<T>>(idx);
}

template <typename T, typename IdxT>
void cuvs_cagra_hnswlib<T, IdxT>::search(
const T* queries, int batch_size, int k, algo_base::index_type* neighbors, float* distances) const
{
hnswlib_search_.search(queries, batch_size, k, neighbors, distances);
// Only Latency mode is supported for now
auto queries_view =
raft::make_host_matrix_view<const T, int64_t>(queries, batch_size, this->dim_);
auto neighbors_view = raft::make_host_matrix_view<uint64_t, int64_t>(
reinterpret_cast<uint64_t*>(neighbors), batch_size, k);
auto distances_view = raft::make_host_matrix_view<float, int64_t>(distances, batch_size, k);

cuvs::neighbors::hnsw::search(handle_,
search_param_.hnsw_search_param,
*(hnsw_index_.get()),
queries_view,
neighbors_view,
distances_view);
}

} // namespace cuvs::bench
2 changes: 2 additions & 0 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ class cuvs_cagra : public algo<T>, public algo_gpu {
void save_to_hnswlib(const std::string& file) const;
std::unique_ptr<algo<T>> copy() override;

auto get_index() const -> const cuvs::neighbors::cagra::index<T, IdxT>* { return index_.get(); }

private:
// handle_ must go first to make sure it dies last and all memory allocated in pool
configured_raft_resources handle_{};
Expand Down
4 changes: 2 additions & 2 deletions cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ void parse_build_param(const nlohmann::json& conf,
{
param.ef_construction = conf.at("efConstruction");
param.m = conf.at("M");
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
}

template <typename T>
void parse_search_param(const nlohmann::json& conf,
typename cuvs::bench::hnsw_lib<T>::search_param& param)
{
param.ef = conf.at("ef");
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
}

template <typename T, template <typename> class Algo>
Expand Down
8 changes: 6 additions & 2 deletions cpp/cmake/modules/ConfigureCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ endif()
# Be very strict when compiling with GCC as host compiler (and thus more lenient when compiling with
# clang)
if(CMAKE_COMPILER_IS_GNUCXX)
list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
list(APPEND CUVS_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
list(APPEND CUVS_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations
-Wno-reorder
)
list(APPEND CUVS_CUDA_FLAGS
-Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations,-Wno-reorder
)

# set warnings as errors
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
Expand Down
Loading

0 comments on commit 8e3c5c4

Please sign in to comment.