Skip to content

Commit

Permalink
Merge branch 'branch-24.04' into fea/remove-supports_streams
Browse files Browse the repository at this point in the history
  • Loading branch information
harrism committed Jan 29, 2024
2 parents 0ed61fd + d4ae271 commit e944280
Show file tree
Hide file tree
Showing 87 changed files with 4,325 additions and 747 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
types_or: [python, cython]
additional_dependencies: ["flake8-force"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v0.971'
rev: 'v1.3.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools]
Expand Down
11 changes: 11 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ option(BUILD_SHARED_LIBS "Build raft shared libraries" ON)
option(BUILD_TESTS "Build raft unit-tests" ON)
option(BUILD_PRIMS_BENCH "Build raft C++ benchmark tests" OFF)
option(BUILD_ANN_BENCH "Build raft ann benchmarks" OFF)
option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON)
option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
option(CUDA_ENABLE_LINEINFO
"Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
Expand Down Expand Up @@ -195,6 +196,10 @@ if(BUILD_PRIMS_BENCH OR BUILD_ANN_BENCH)
rapids_cpm_gbench()
endif()

if(BUILD_CAGRA_HNSWLIB)
include(cmake/thirdparty/get_hnswlib.cmake)
endif()

# ##################################################################################################
# * raft ---------------------------------------------------------------------
add_library(raft INTERFACE)
Expand All @@ -203,6 +208,9 @@ add_library(raft::raft ALIAS raft)
target_include_directories(
raft INTERFACE "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>"
)
if(BUILD_CAGRA_HNSWLIB)
target_link_libraries(raft INTERFACE hnswlib::hnswlib)
endif()

if(NOT BUILD_CPU_ONLY)
# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
Expand Down Expand Up @@ -424,6 +432,8 @@ if(RAFT_COMPILE_LIBRARY)
src/raft_runtime/neighbors/cagra_build.cu
src/raft_runtime/neighbors/cagra_search.cu
src/raft_runtime/neighbors/cagra_serialize.cu
src/raft_runtime/neighbors/eps_neighborhood.cu
$<$<BOOL:${BUILD_CAGRA_HNSWLIB}>:src/raft_runtime/neighbors/hnsw.cpp>
src/raft_runtime/neighbors/ivf_flat_build.cu
src/raft_runtime/neighbors/ivf_flat_search.cu
src/raft_runtime/neighbors/ivf_flat_serialize.cu
Expand All @@ -443,6 +453,7 @@ if(RAFT_COMPILE_LIBRARY)
src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
Expand Down
16 changes: 3 additions & 13 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,7 @@ endfunction()

if(RAFT_ANN_BENCH_USE_HNSWLIB)
ConfigureAnnBench(
NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp
LINKS
hnswlib::hnswlib
NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib
)

endif()
Expand Down Expand Up @@ -276,12 +274,7 @@ endif()

if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
ConfigureAnnBench(
NAME
RAFT_CAGRA_HNSWLIB
PATH
bench/ann/src/raft/raft_cagra_hnswlib.cu
LINKS
raft::compiled
NAME RAFT_CAGRA_HNSWLIB PATH bench/ann/src/raft/raft_cagra_hnswlib.cu LINKS raft::compiled
hnswlib::hnswlib
)
endif()
Expand Down Expand Up @@ -336,10 +329,7 @@ endif()

if(RAFT_ANN_BENCH_USE_GGNN)
include(cmake/thirdparty/get_glog.cmake)
ConfigureAnnBench(
NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu
LINKS glog::glog ggnn::ggnn
)
ConfigureAnnBench(NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu LINKS glog::glog ggnn::ggnn)
endif()

# ##################################################################################################
Expand Down
18 changes: 8 additions & 10 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -287,11 +287,11 @@ void bench_search(::benchmark::State& state,
std::make_shared<buf<std::size_t>>(current_algo_props->query_memory_type, k * query_set_size);

cuda_timer gpu_timer;
auto start = std::chrono::high_resolution_clock::now();
{
nvtx_case nvtx{state.name()};

auto algo = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
auto algo = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
auto start = std::chrono::high_resolution_clock::now();
for (auto _ : state) {
[[maybe_unused]] auto ntx_lap = nvtx.lap();
[[maybe_unused]] auto gpu_lap = gpu_timer.lap();
Expand All @@ -314,17 +314,15 @@ void bench_search(::benchmark::State& state,

queries_processed += n_queries;
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
state.counters.insert({"Latency", {duration, benchmark::Counter::kAvgIterations}});
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::duration<double>>(end - start).count();
if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
state.counters.insert(
{"Latency", {duration / double(state.iterations()), benchmark::Counter::kAvgThreads}});

state.SetItemsProcessed(queries_processed);
if (cudart.found()) {
double gpu_time_per_iteration = gpu_timer.total_time() / (double)state.iterations();
state.counters.insert({"GPU", {gpu_time_per_iteration, benchmark::Counter::kAvgThreads}});
state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
}

// This will be the total number of queries across all threads
Expand Down
25 changes: 1 addition & 24 deletions cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,13 +41,6 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default;
cuda_huge_page_resource& operator=(cuda_huge_page_resource&&) = default;

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
Expand Down Expand Up @@ -104,21 +97,5 @@ class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
{
return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
25 changes: 1 addition & 24 deletions cpp/bench/ann/src/common/cuda_pinned_resource.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -45,13 +45,6 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default;
cuda_pinned_resource& operator=(cuda_pinned_resource&&) = default;

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
Expand Down Expand Up @@ -102,21 +95,5 @@ class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
{
return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
9 changes: 7 additions & 2 deletions cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ struct hnsw_dist_t<uint8_t> {
using type = int;
};

template <>
struct hnsw_dist_t<int8_t> {
using type = int;
};

template <typename T>
class HnswLib : public ANN<T> {
public:
Expand Down Expand Up @@ -135,7 +140,7 @@ void HnswLib<T>::build(const T* dataset, size_t nrow, cudaStream_t)
space_ = std::make_shared<hnswlib::L2Space>(dim_);
}
} else if constexpr (std::is_same_v<T, uint8_t>) {
space_ = std::make_shared<hnswlib::L2SpaceI>(dim_);
space_ = std::make_shared<hnswlib::L2SpaceI<T>>(dim_);
}

appr_alg_ = std::make_shared<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
Expand Down Expand Up @@ -205,7 +210,7 @@ void HnswLib<T>::load(const std::string& path_to_index)
space_ = std::make_shared<hnswlib::L2Space>(dim_);
}
} else if constexpr (std::is_same_v<T, uint8_t>) {
space_ = std::make_shared<hnswlib::L2SpaceI>(dim_);
space_ = std::make_shared<hnswlib::L2SpaceI<T>>(dim_);
}

appr_alg_ = std::make_shared<hnswlib::HierarchicalNSW<typename hnsw_dist_t<T>::type>>(
Expand Down
5 changes: 4 additions & 1 deletion cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -87,6 +87,9 @@ void parse_build_param(const nlohmann::json& conf,
"', should be either 'cluster' or 'subspace'");
}
}
if (conf.contains("max_train_points_per_pq_code")) {
param.max_train_points_per_pq_code = conf.at("max_train_points_per_pq_code");
}
}

template <typename T, typename IdxT>
Expand Down
57 changes: 57 additions & 0 deletions cpp/cmake/patches/hnswlib.diff
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,63 @@
}
}
}
diff --git a/hnswlib/space_l2.h b/hnswlib/space_l2.h
index 4413537..c3240f3 100644
--- a/hnswlib/space_l2.h
+++ b/hnswlib/space_l2.h
@@ -252,13 +252,14 @@ namespace hnswlib {
~L2Space() {}
};

+ template <typename T>
static int
L2SqrI4x(const void *__restrict pVect1, const void *__restrict pVect2, const void *__restrict qty_ptr) {

size_t qty = *((size_t *) qty_ptr);
int res = 0;
- unsigned char *a = (unsigned char *) pVect1;
- unsigned char *b = (unsigned char *) pVect2;
+ T *a = (T *) pVect1;
+ T *b = (T *) pVect2;

qty = qty >> 2;
for (size_t i = 0; i < qty; i++) {
@@ -279,11 +280,12 @@ namespace hnswlib {
return (res);
}

+ template <typename T>
static int L2SqrI(const void* __restrict pVect1, const void* __restrict pVect2, const void* __restrict qty_ptr) {
size_t qty = *((size_t*)qty_ptr);
int res = 0;
- unsigned char* a = (unsigned char*)pVect1;
- unsigned char* b = (unsigned char*)pVect2;
+ T* a = (T*)pVect1;
+ T* b = (T*)pVect2;

for(size_t i = 0; i < qty; i++)
{
@@ -294,6 +296,7 @@ namespace hnswlib {
return (res);
}

+ template <typename T>
class L2SpaceI : public SpaceInterface<int> {

DISTFUNC<int> fstdistfunc_;
@@ -302,10 +305,10 @@ namespace hnswlib {
public:
L2SpaceI(size_t dim) {
if(dim % 4 == 0) {
- fstdistfunc_ = L2SqrI4x;
+ fstdistfunc_ = L2SqrI4x<T>;
}
else {
- fstdistfunc_ = L2SqrI;
+ fstdistfunc_ = L2SqrI<T>;
}
dim_ = dim;
data_size_ = dim * sizeof(unsigned char);
diff --git a/hnswlib/visited_list_pool.h b/hnswlib/visited_list_pool.h
index 5e1a4a5..4195ebd 100644
--- a/hnswlib/visited_list_pool.h
Expand Down
6 changes: 5 additions & 1 deletion cpp/cmake/thirdparty/get_hnswlib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ function(find_and_configure_hnswlib)
rapids_cpm_find(
hnswlib ${PKG_VERSION}
GLOBAL_TARGETS hnswlib::hnswlib
BUILD_EXPORT_SET raft-exports
INSTALL_EXPORT_SET raft-exports
CPM_ARGS
GIT_REPOSITORY ${PKG_REPOSITORY}
GIT_TAG ${PKG_PINNED_TAG}
Expand All @@ -51,11 +53,13 @@ function(find_and_configure_hnswlib)
# write export rules
rapids_export(
BUILD hnswlib
VERSION ${PKG_VERSION}
EXPORT_SET hnswlib-exports
GLOBAL_TARGETS hnswlib
NAMESPACE hnswlib::)
rapids_export(
INSTALL hnswlib
VERSION ${PKG_VERSION}
EXPORT_SET hnswlib-exports
GLOBAL_TARGETS hnswlib
NAMESPACE hnswlib::)
Expand All @@ -74,5 +78,5 @@ endif()
find_and_configure_hnswlib(VERSION 0.6.2
REPOSITORY ${RAFT_HNSWLIB_GIT_REPOSITORY}
PINNED_TAG ${RAFT_HNSWLIB_GIT_TAG}
EXCLUDE_FROM_ALL ON
EXCLUDE_FROM_ALL OFF
)
Loading

0 comments on commit e944280

Please sign in to comment.