Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate feature diff for NN Descent from RAFT to cuVS #421

Merged
merged 19 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
300 changes: 294 additions & 6 deletions cpp/include/cuvs/neighbors/nn_descent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ struct index_params : cuvs::neighbors::index_params {
size_t intermediate_graph_degree = 128; // Degree of input graph for pruning.
size_t max_iterations = 20; // Number of nn-descent iterations.
float termination_threshold = 0.0001; // Termination threshold of nn-descent.
bool return_distances = true; // return distances if true
size_t n_clusters = 1; // defaults to not using any batching

/** @brief Construct NN descent parameters for a specific kNN graph degree
*
Expand Down Expand Up @@ -100,14 +102,20 @@ struct index : cuvs::neighbors::index {
* @param res raft::resources is an object mangaging resources
* @param n_rows number of rows in knn-graph
* @param n_cols number of cols in knn-graph
* @param return_distances whether to return distances
*/
index(raft::resources const& res, int64_t n_rows, int64_t n_cols)
index(raft::resources const& res, int64_t n_rows, int64_t n_cols, bool return_distances = false)
: cuvs::neighbors::index(),
res_{res},
metric_{cuvs::distance::DistanceType::L2Expanded},
graph_{raft::make_host_matrix<IdxT, int64_t, raft::row_major>(n_rows, n_cols)},
graph_view_{graph_.view()}
graph_view_{graph_.view()},
return_distances_{return_distances}
{
if (return_distances) {
distances_ = raft::make_device_matrix<float, int64_t>(res_, n_rows, n_cols);
distances_view_ = distances_.value().view();
}
}

/**
Expand All @@ -119,14 +127,22 @@ struct index : cuvs::neighbors::index {
*
* @param res raft::resources is an object mangaging resources
* @param graph_view raft::host_matrix_view<IdxT, int64_t, raft::row_major> for storing knn-graph
* @param distances_view optional raft::device_matrix_view<float, int64_t, row_major> for storing
* distances
* @param return_distances whether to return distances
*/
index(raft::resources const& res,
raft::host_matrix_view<IdxT, int64_t, raft::row_major> graph_view)
raft::host_matrix_view<IdxT, int64_t, raft::row_major> graph_view,
std::optional<raft::device_matrix_view<float, int64_t, row_major>> distances_view =
std::nullopt,
bool return_distances = false)
divyegala marked this conversation as resolved.
Show resolved Hide resolved
: cuvs::neighbors::index(),
res_{res},
metric_{cuvs::distance::DistanceType::L2Expanded},
graph_{raft::make_host_matrix<IdxT, int64_t, raft::row_major>(0, 0)},
graph_view_{graph_view}
graph_view_{graph_view},
distances_view_{distances_view},
return_distances_{return_distances}
{
}

Expand Down Expand Up @@ -155,6 +171,13 @@ struct index : cuvs::neighbors::index {
return graph_view_;
}

/** neighborhood graph distances [size, graph-degree] */
[[nodiscard]] inline auto distances() noexcept
-> std::optional<device_matrix_view<float, int64_t, row_major>>
{
return distances_view_;
}

// Don't allow copying the index for performance reasons (try avoiding copying data)
index(const index&) = delete;
index(index&&) = default;
Expand All @@ -166,8 +189,11 @@ struct index : cuvs::neighbors::index {
raft::resources const& res_;
cuvs::distance::DistanceType metric_;
raft::host_matrix<IdxT, int64_t, raft::row_major> graph_; // graph to return for non-int IdxT
std::optional<raft::device_matrix<float, int64_t, row_major>> distances_;
raft::host_matrix_view<IdxT, int64_t, raft::row_major>
graph_view_; // view of graph for user provided matrix
std::optional<raft::device_matrix_view<float, int64_t, row_major>> distances_view_;
bool return_distances_;
};

/** @} */
Expand Down Expand Up @@ -393,8 +419,6 @@ auto build(raft::resources const& res,
raft::device_matrix_view<const uint8_t, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::nn_descent::index<uint32_t>;

/** @} */

/**
* @brief Build nn-descent Index with dataset in host memory
*
Expand Down Expand Up @@ -427,6 +451,270 @@ auto build(raft::resources const& res,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::nn_descent::index<uint32_t>;

/**
* @brief Build nn-descent Index with dataset in device memory
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @param[in] res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::device_matrix_view input dataset expected to be located
* in device memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::device_matrix_view<const float, int64_t, raft::row_major> dataset,
index<uint32_t>& index);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why doesn't nn-descent return the built index like all the other index types?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does, there's an API for that as well. We need this API as well especially for CAGRA because it needs to own the knn graph that it sends to NN Descent. For that, it needs to construct an index first and that's why we need this API.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the usage docs for this function don't reflect that. The whole idea behind these build functon is to abstract away the index so that the user (and anyone using the public APIs) don't need to think about the underlying index object. Instead of having the user construct the index object on their own, we shuold have them pass an optional knn graph into the build function that it then uses when it constructs the underlying index instance underneath.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in latest commit


/**
* @brief Build nn-descent Index with dataset in host memory
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure all of these are exposed through the docs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, they are part of the doxygen group that is already in the docs source.

*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @tparam T data-type of the input dataset
* @tparam IdxT data-type for the output index
* @param res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::host_matrix_view input dataset expected to be located
* in host memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::host_matrix_view<const float, int64_t, raft::row_major> dataset,
index<uint32_t>& index);
divyegala marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Build nn-descent Index with dataset in host memory
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @tparam T data-type of the input dataset
* @tparam IdxT data-type for the output index
* @param res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::host_matrix_view input dataset expected to be located
* in host memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::device_matrix_view<const half, int64_t, raft::row_major> dataset,
index<uint32_t>& index);

/**
* @brief Build nn-descent Index with dataset in host memory
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @tparam T data-type of the input dataset
* @tparam IdxT data-type for the output index
* @param res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::host_matrix_view input dataset expected to be located
* in host memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::host_matrix_view<const half, int64_t, raft::row_major> dataset,
index<uint32_t>& index);

/**
* @brief Build nn-descent Index with dataset in host memory
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @tparam T data-type of the input dataset
* @tparam IdxT data-type for the output index
* @param res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::host_matrix_view input dataset expected to be located
* in host memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::device_matrix_view<const int8_t, int64_t, raft::row_major> dataset,
index<uint32_t>& index);

/**
* @brief Build nn-descent Index with dataset in host memory
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @tparam T data-type of the input dataset
* @tparam IdxT data-type for the output index
* @param res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::host_matrix_view input dataset expected to be located
* in host memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::host_matrix_view<const int8_t, int64_t, raft::row_major> dataset,
index<uint32_t>& index);

/**
* @brief Build nn-descent Index with dataset in host memory
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @tparam T data-type of the input dataset
* @tparam IdxT data-type for the output index
* @param res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::host_matrix_view input dataset expected to be located
* in host memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::device_matrix_view<const uint8_t, int64_t, raft::row_major> dataset,
index<uint32_t>& index);

/**
* @brief Build nn-descent Index with dataset in host memory
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters
* nn_descent::index_params index_params;
* // create and fill the index from a [N, D] raft::device_matrix_view dataset
* auto index = nn_descent::index(res, index_params, N, D);
* // index.graph() provides a raft::host_matrix_view of an
* // all-neighbors knn graph of dimensions [N, k] of the input
* // dataset
* nn_descent::build(res, index_params, dataset, index);
* @endcode
*
* @tparam T data-type of the input dataset
* @tparam IdxT data-type for the output index
* @param res raft::resources is an object mangaging resources
* @param[in] params an instance of nn_descent::index_params that are parameters
* to run the nn-descent algorithm
* @param[in] dataset raft::host_matrix_view input dataset expected to be located
* in host memory
* @param[out] index index containing all-neighbors knn graph in host memory
*/
void build(raft::resources const& res,
index_params const& params,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> dataset,
index<uint32_t>& index);

/** @} */

/**
* @brief Test if we have enough GPU memory to run NN descent algorithm.
*
Expand Down
10 changes: 6 additions & 4 deletions cpp/src/neighbors/detail/cagra/cagra_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
#include <cuvs/neighbors/ivf_pq.hpp>
#include <cuvs/neighbors/refine.hpp>

// TODO: Fixme- this needs to be migrated
#include "../../nn_descent.cuh"
#include <cuvs/neighbors/nn_descent.hpp>

// TODO: This shouldn't be calling spatial/knn APIs
#include "../ann_utils.cuh"
Expand Down Expand Up @@ -356,8 +355,10 @@ void build_knn_graph(
raft::host_matrix_view<IdxT, int64_t, raft::row_major> knn_graph,
cuvs::neighbors::nn_descent::index_params build_params)
{
auto nn_descent_idx = cuvs::neighbors::nn_descent::index<IdxT>(res, knn_graph);
cuvs::neighbors::nn_descent::build<DataT, IdxT>(res, build_params, dataset, nn_descent_idx);
std::optional<raft::device_matrix_view<float, int64_t, row_major>> distances_view = std::nullopt;
auto nn_descent_idx =
cuvs::neighbors::nn_descent::index<IdxT>(res, knn_graph, distances_view, false);
cuvs::neighbors::nn_descent::build(res, build_params, dataset, nn_descent_idx);

using internal_IdxT = typename std::make_unsigned<IdxT>::type;
using g_accessor = typename decltype(nn_descent_idx.graph())::accessor_type;
Expand Down Expand Up @@ -471,6 +472,7 @@ index<T, IdxT> build(
}

// Use nn-descent to build CAGRA knn graph
nn_descent_params.return_distances = false;
build_knn_graph<T, IdxT>(res, dataset, knn_graph->view(), nn_descent_params);
}

Expand Down
Loading
Loading