Skip to content

Commit

Permalink
Port NN-descent algorithm to use in cagra::build() (#1748)
Browse files Browse the repository at this point in the history
- [x] Build Time comparison of end-to-end RAFT CAGRA+nn-descent against cuANN CAGRA+nn-descent
- [x] Recall comparison of RAFT nn-descent against cuANN nn-descent
- [x] RAFT types/APIs in ported code from cuANN
- [x] End-to-end CAGRA+nn-descent tests
- [x] Docs and code examples
- [x] Add `graph_build_algo` build param to CAGRA ann-bench for benchmarking builds with IVF-PQ or NN-Descent
- [x] All-neighbors knn graph nn-descent tests against brute-force knn

Recall Value comparison of RAFT nn-descent vs cuANN nn-descent
```
Dataset	graph_degree, intermediate_degree	Iterations	cuANN Recall	RAFT Recall
sift-128-euclidean	(64, 98)	            15	        0.9265991875	0.9471194688
sift-128-euclidean	(64, 98)	            50	        0.9831858594	0.9783938594
deep-image-96-inner	(64, 98)	            50	        0.9806211946	0.9801508853
```

Authors:
  - Divye Gala (https://github.com/divyegala)
  - Ray Wang (https://github.com/RayWang96)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Ray Wang (https://github.com/RayWang96)
  - Ray Douglass (https://github.com/raydouglass)
  - Tamas Bela Feher (https://github.com/tfeher)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #1748
  • Loading branch information
divyegala authored Sep 26, 2023
1 parent d4002b0 commit a1002f8
Show file tree
Hide file tree
Showing 21 changed files with 2,294 additions and 89 deletions.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ INSTALL_TARGET=install
BUILD_REPORT_METRICS=""
BUILD_REPORT_INCL_CACHE_STATS=OFF

TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;NEIGHBORS_TEST;NEIGHBORS_ANN_CAGRA_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;NEIGHBORS_TEST;NEIGHBORS_ANN_CAGRA_TEST;NEIGHBORS_ANN_NN_DESCENT_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
BENCH_TARGETS="CLUSTER_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH"

CACHE_ARGS=""
Expand Down
2 changes: 1 addition & 1 deletion ci/build_cpp.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION.
# Copyright (c) 2022-2023, NVIDIA CORPORATION.

set -euo pipefail

Expand Down
7 changes: 7 additions & 0 deletions cpp/bench/ann/src/raft/raft_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ void parse_build_param(const nlohmann::json& conf,
if (conf.contains("intermediate_graph_degree")) {
param.intermediate_graph_degree = conf.at("intermediate_graph_degree");
}
if (conf.contains("graph_build_algo")) {
if (conf.at("graph_build_algo") == "IVF_PQ") {
param.build_algo = raft::neighbors::cagra::graph_build_algo::IVF_PQ;
} else if (conf.at("graph_build_algo") == "NN_DESCENT") {
param.build_algo = raft::neighbors::cagra::graph_build_algo::NN_DESCENT;
}
}
}

template <typename T, typename IdxT>
Expand Down
76 changes: 68 additions & 8 deletions cpp/include/raft/neighbors/cagra.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,11 @@ namespace raft::neighbors::cagra {
*/

/**
* @brief Build a kNN graph.
* @brief Build a kNN graph using IVF-PQ.
*
* The kNN graph is the first building block for CAGRA index.
* This function uses the IVF-PQ method to build a kNN graph.
*
* The output is a dense matrix that stores the neighbor indices for each pont in the dataset.
* The output is a dense matrix that stores the neighbor indices for each point in the dataset.
* Each point has the same number of neighbors.
*
* See [cagra::build](#cagra::build) for an alternative method.
Expand All @@ -52,9 +51,9 @@ namespace raft::neighbors::cagra {
* @code{.cpp}
* using namespace raft::neighbors;
* // use default index parameters
* cagra::index_params build_params;
* cagra::search_params search_params
* auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128);
* ivf_pq::index_params build_params;
* ivf_pq::search_params search_params
* auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128);
* // create knn graph
* cagra::build_knn_graph(res, dataset, knn_graph.view(), 2, build_params, search_params);
* auto optimized_gaph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 64);
Expand All @@ -70,7 +69,7 @@ namespace raft::neighbors::cagra {
* @param[in] res raft resources
* @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim]
* @param[out] knn_graph a host matrix view to store the output knn graph [n_rows, graph_degree]
* @param[in] refine_rate refinement rate for ivf-pq search
* @param[in] refine_rate (optional) refinement rate for ivf-pq search
* @param[in] build_params (optional) ivf_pq index building parameters for knn graph
* @param[in] search_params (optional) ivf_pq search parameters
*/
Expand All @@ -95,6 +94,58 @@ void build_knn_graph(raft::resources const& res,
res, dataset_internal, knn_graph_internal, refine_rate, build_params, search_params);
}

/**
* @brief Build a kNN graph using NN-descent.
*
* The kNN graph is the first building block for CAGRA index.
*
* The output is a dense matrix that stores the neighbor indices for each point in the dataset.
* Each point has the same number of neighbors.
*
* See [cagra::build](#cagra::build) for an alternative method.
*
* The following distance metrics are supported:
* - L2Expanded
*
* Usage example:
* @code{.cpp}
* using namespace raft::neighbors;
* using namespace raft::neighbors::experimental;
* // use default index parameters
* nn_descent::index_params build_params;
* build_params.graph_degree = 128;
* auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128);
* // create knn graph
* cagra::build_knn_graph(res, dataset, knn_graph.view(), build_params);
* auto optimized_gaph = raft::make_host_matrix<IdxT, int64_t>(dataset.extent(0), 64);
* cagra::optimize(res, dataset, nn_descent_index.graph.view(), optimized_graph.view());
* // Construct an index from dataset and optimized knn_graph
* auto index = cagra::index<T, IdxT>(res, build_params.metric(), dataset,
* optimized_graph.view());
* @endcode
*
* @tparam DataT data element type
* @tparam IdxT type of the dataset vector indices
* @tparam accessor host or device accessor_type for the dataset
* @param[in] res raft::resources is an object mangaging resources
* @param[in] dataset input raft::host/device_matrix_view that can be located in
* in host or device memory
* @param[out] knn_graph a host matrix view to store the output knn graph [n_rows, graph_degree]
* @param[in] build_params an instance of experimental::nn_descent::index_params that are parameters
* to run the nn-descent algorithm
*/
template <typename DataT,
typename IdxT = uint32_t,
typename accessor =
host_device_accessor<std::experimental::default_accessor<DataT>, memory_type::device>>
void build_knn_graph(raft::resources const& res,
mdspan<const DataT, matrix_extent<int64_t>, row_major, accessor> dataset,
raft::host_matrix_view<IdxT, int64_t, row_major> knn_graph,
experimental::nn_descent::index_params build_params)
{
detail::build_knn_graph<DataT, IdxT>(res, dataset, knn_graph, build_params);
}

/**
* @brief Sort a KNN graph index.
* Preprocessing step for `cagra::optimize`: If a KNN graph is not built using
Expand Down Expand Up @@ -259,7 +310,16 @@ index<T, IdxT> build(raft::resources const& res,
std::optional<raft::host_matrix<IdxT, int64_t>> knn_graph(
raft::make_host_matrix<IdxT, int64_t>(dataset.extent(0), intermediate_degree));

build_knn_graph(res, dataset, knn_graph->view());
if (params.build_algo == graph_build_algo::IVF_PQ) {
build_knn_graph(res, dataset, knn_graph->view());

} else {
// Use nn-descent to build CAGRA knn graph
auto nn_descent_params = experimental::nn_descent::index_params();
nn_descent_params.graph_degree = intermediate_degree;
nn_descent_params.intermediate_graph_degree = 1.5 * intermediate_degree;
build_knn_graph<T, IdxT>(res, dataset, knn_graph->view(), nn_descent_params);
}

auto cagra_graph = raft::make_host_matrix<IdxT, int64_t>(dataset.extent(0), graph_degree);

Expand Down
14 changes: 14 additions & 0 deletions cpp/include/raft/neighbors/cagra_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,24 @@ namespace raft::neighbors::cagra {
* @{
*/

/**
* @brief ANN algorithm used by CAGRA to build knn graph
*
*/
enum class graph_build_algo {
/* Use IVF-PQ to build all-neighbors knn graph */
IVF_PQ,
/* Experimental, use NN-Descent to build all-neighbors knn graph */
NN_DESCENT
};

struct index_params : ann::index_params {
/** Degree of input graph for pruning. */
size_t intermediate_graph_degree = 128;
/** Degree of output graph. */
size_t graph_degree = 64;
/** ANN algorithm to build knn graph. */
graph_build_algo build_algo = graph_build_algo::IVF_PQ;
};

enum class search_algo {
Expand Down Expand Up @@ -362,6 +375,7 @@ struct index : ann::index {

// TODO: Remove deprecated experimental namespace in 23.12 release
namespace raft::neighbors::experimental::cagra {
using raft::neighbors::cagra::graph_build_algo;
using raft::neighbors::cagra::hash_mode;
using raft::neighbors::cagra::index;
using raft::neighbors::cagra::index_params;
Expand Down
24 changes: 24 additions & 0 deletions cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <raft/neighbors/detail/refine.cuh>
#include <raft/neighbors/ivf_pq.cuh>
#include <raft/neighbors/ivf_pq_types.hpp>
#include <raft/neighbors/nn_descent.cuh>
#include <raft/neighbors/refine.cuh>

namespace raft::neighbors::cagra::detail {
Expand Down Expand Up @@ -240,4 +241,27 @@ void build_knn_graph(raft::resources const& res,
if (!first) RAFT_LOG_DEBUG("# Finished building kNN graph");
}

template <typename DataT, typename IdxT, typename accessor>
void build_knn_graph(raft::resources const& res,
mdspan<const DataT, matrix_extent<int64_t>, row_major, accessor> dataset,
raft::host_matrix_view<IdxT, int64_t, row_major> knn_graph,
experimental::nn_descent::index_params build_params)
{
auto nn_descent_idx = experimental::nn_descent::index<IdxT>(res, knn_graph);
experimental::nn_descent::build<DataT, IdxT>(res, build_params, dataset, nn_descent_idx);

using internal_IdxT = typename std::make_unsigned<IdxT>::type;
using g_accessor = typename decltype(nn_descent_idx.graph())::accessor_type;
using g_accessor_internal =
host_device_accessor<std::experimental::default_accessor<internal_IdxT>, g_accessor::mem_type>;

auto knn_graph_internal =
mdspan<internal_IdxT, matrix_extent<int64_t>, row_major, g_accessor_internal>(
reinterpret_cast<internal_IdxT*>(nn_descent_idx.graph().data_handle()),
nn_descent_idx.graph().extent(0),
nn_descent_idx.graph().extent(1));

graph::sort_knn_graph(res, dataset, knn_graph_internal);
}

} // namespace raft::neighbors::cagra::detail
2 changes: 1 addition & 1 deletion cpp/include/raft/neighbors/detail/cagra/graph_core.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ void sort_knn_graph(raft::resources const& res,
const uint32_t input_graph_degree = knn_graph.extent(1);
IdxT* const input_graph_ptr = knn_graph.data_handle();

auto d_input_graph = raft::make_device_matrix<IdxT, IdxT>(res, graph_size, input_graph_degree);
auto d_input_graph = raft::make_device_matrix<IdxT, int64_t>(res, graph_size, input_graph_degree);

//
// Sorting kNN graph
Expand Down
Loading

0 comments on commit a1002f8

Please sign in to comment.