diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d9bea1864..78c67d9c8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -290,9 +290,14 @@ target_compile_options( add_library( cuvs SHARED src/cluster/kmeans_balanced_fit_float.cu + src/cluster/kmeans_fit_mg_float.cu + src/cluster/kmeans_fit_mg_double.cu + src/cluster/kmeans_fit_double.cu src/cluster/kmeans_fit_float.cu src/cluster/kmeans_auto_find_k_float.cu + src/cluster/kmeans_fit_predict_double.cu src/cluster/kmeans_fit_predict_float.cu + src/cluster/kmeans_predict_double.cu src/cluster/kmeans_predict_float.cu src/cluster/kmeans_balanced_fit_float.cu src/cluster/kmeans_balanced_fit_predict_float.cu @@ -300,6 +305,7 @@ add_library( src/cluster/kmeans_balanced_fit_int8.cu src/cluster/kmeans_balanced_fit_predict_int8.cu src/cluster/kmeans_balanced_predict_int8.cu + src/cluster/kmeans_transform_double.cu src/cluster/kmeans_transform_float.cu src/cluster/single_linkage_float.cu src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu @@ -342,6 +348,8 @@ add_library( src/distance/detail/pairwise_matrix/dispatch_russel_rao_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_rbf.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu + src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu src/distance/detail/fused_distance_nn.cu src/distance/distance.cu src/distance/pairwise_distance.cu diff --git a/cpp/include/cuvs/cluster/kmeans.hpp b/cpp/include/cuvs/cluster/kmeans.hpp index 75205fa4f..89b3acc24 100644 --- a/cpp/include/cuvs/cluster/kmeans.hpp +++ b/cpp/include/cuvs/cluster/kmeans.hpp @@ -153,7 +153,7 @@ struct balanced_params : base_params { * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::params params; + * cuvs::cluster::kmeans::params params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * @@ -203,7 +203,159 @@ void fit(raft::resources const& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::params params; + * cuvs::cluster::kmeans::params params; + * int64_t n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, + * n_features); + * + * kmeans::fit(handle, + * params, + * X, + * std::nullopt, + * centroids, + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers. + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +/** + * @brief Find clusters with k-means algorithm. + * Initial centroids are chosen with k-means++ algorithm. Empty + * clusters are reinitialized by choosing new centroids with + * k-means++ algorithm. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; + * int n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); + * + * kmeans::fit(handle, + * params, + * X, + * std::nullopt, + * centroids, + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers. + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +/** + * @brief Find clusters with k-means algorithm. + * Initial centroids are chosen with k-means++ algorithm. Empty + * clusters are reinitialized by choosing new centroids with + * k-means++ algorithm. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; + * int64_t n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, + * n_features); + * + * kmeans::fit(handle, + * params, + * X, + * std::nullopt, + * centroids, + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers. + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +/** + * @brief Find clusters with k-means algorithm. + * Initial centroids are chosen with k-means++ algorithm. Empty + * clusters are reinitialized by choosing new centroids with + * k-means++ algorithm. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * @@ -250,7 +402,7 @@ void fit(raft::resources const& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::balanced_params params; + * cuvs::cluster::kmeans::balanced_params params; * int n_features = 15; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * @@ -284,7 +436,7 @@ void fit(const raft::resources& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::balanced_params params; + * cuvs::cluster::kmeans::balanced_params params; * int n_features = 15; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * @@ -308,7 +460,6 @@ void fit(const raft::resources& handle, cuvs::cluster::kmeans::balanced_params const& params, raft::device_matrix_view X, raft::device_matrix_view centroids); - /** * @brief Predict the closest cluster each sample in X belongs to. * @@ -318,7 +469,7 @@ void fit(const raft::resources& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::params params; + * cuvs::cluster::kmeans::params params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * @@ -363,7 +514,7 @@ void predict(raft::resources const& handle, raft::device_matrix_view X, std::optional> sample_weight, raft::device_matrix_view centroids, - raft::device_vector_view labels, + raft::device_vector_view labels, bool normalize_weight, raft::host_scalar_view inertia); @@ -376,7 +527,7 @@ void predict(raft::resources const& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::params params; + * cuvs::cluster::kmeans::params params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * @@ -388,7 +539,7 @@ void predict(raft::resources const& handle, * raft::make_scalar_view(&inertia), * raft::make_scalar_view(&n_iter)); * ... - * auto labels = raft::make_device_vector(handle, X.extent(0)); + * auto labels = raft::make_device_vector(handle, X.extent(0)); * * kmeans::predict(handle, * params, @@ -404,18 +555,26 @@ void predict(raft::resources const& handle, * @param[in] params Parameters for KMeans model. * @param[in] X New data to predict. * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] * @param[in] centroids Cluster centroids. The data must be in * row-major format. * [dim = n_clusters x n_features] + * @param[in] normalize_weight True if the weights should be normalized * @param[out] labels Index of the cluster each sample in X * belongs to. * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to + * their closest cluster center. */ -void predict(const raft::resources& handle, - cuvs::cluster::kmeans::balanced_params const& params, - raft::device_matrix_view X, +void predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, raft::device_matrix_view centroids, - raft::device_vector_view labels); + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia); /** * @brief Predict the closest cluster each sample in X belongs to. @@ -426,22 +585,144 @@ void predict(const raft::resources& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::balanced_params params; - * int n_features = 15; + * cuvs::cluster::kmeans::params params; + * int n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); + * + * kmeans::fit(handle, + * params, + * X, + * std::nullopt, + * centroids.view(), + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * ... + * auto labels = raft::make_device_vector(handle, X.extent(0)); + * + * kmeans::predict(handle, + * params, + * X, + * std::nullopt, + * centroids.view(), + * false, + * labels.view(), + * raft::make_scalar_view(&ineratia)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X New data to predict. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[in] centroids Cluster centroids. The data must be in + * row-major format. + * [dim = n_clusters x n_features] + * @param[in] normalize_weight True if the weights should be normalized + * @param[out] labels Index of the cluster each sample in X + * belongs to. + * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to + * their closest cluster center. + */ +void predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia); + +/** + * @brief Predict the closest cluster each sample in X belongs to. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; + * int n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); + * + * kmeans::fit(handle, + * params, + * X, + * std::nullopt, + * centroids.view(), + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * ... + * auto labels = raft::make_device_vector(handle, X.extent(0)); + * + * kmeans::predict(handle, + * params, + * X, + * std::nullopt, + * centroids.view(), + * false, + * labels.view(), + * raft::make_scalar_view(&ineratia)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X New data to predict. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[in] centroids Cluster centroids. The data must be in + * row-major format. + * [dim = n_clusters x n_features] + * @param[in] normalize_weight True if the weights should be normalized + * @param[out] labels Index of the cluster each sample in X + * belongs to. + * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to + * their closest cluster center. + */ +void predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia); + +/** + * @brief Predict the closest cluster each sample in X belongs to. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; + * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * * kmeans::fit(handle, * params, * X, - * centroids.view()); + * std::nullopt, + * centroids.view(), + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); * ... * auto labels = raft::make_device_vector(handle, X.extent(0)); * * kmeans::predict(handle, * params, * X, + * std::nullopt, * centroids.view(), - * labels.view()); + * false, + * labels.view(), + * raft::make_scalar_view(&ineratia)); * @endcode * * @param[in] handle The raft handle. @@ -457,7 +738,7 @@ void predict(const raft::resources& handle, */ void predict(const raft::resources& handle, cuvs::cluster::kmeans::balanced_params const& params, - raft::device_matrix_view X, + raft::device_matrix_view X, raft::device_matrix_view centroids, raft::device_vector_view labels); @@ -471,7 +752,7 @@ void predict(const raft::resources& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::params params; + * cuvs::cluster::kmeans::params params; * int n_features = 15, inertia, n_iter; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * auto labels = raft::make_device_vector(handle, X.extent(0)); @@ -516,6 +797,171 @@ void fit_predict(raft::resources const& handle, raft::host_scalar_view inertia, raft::host_scalar_view n_iter); +/** + * @brief Compute k-means clustering and predicts cluster index for each sample + * in the input. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; + * int64_t n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, + * n_features); auto labels = raft::make_device_vector(handle, X.extent(0)); + * + * kmeans::fit_predict(handle, + * params, + * X, + * std::nullopt, + * centroids.view(), + * labels.view(), + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must be + * in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids Optional + * [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] labels Index of the cluster each sample in X belongs + * to. + * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +void fit_predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +/** + * @brief Compute k-means clustering and predicts cluster index for each sample + * in the input. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; + * int n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); + * auto labels = raft::make_device_vector(handle, X.extent(0)); + * + * kmeans::fit_predict(handle, + * params, + * X, + * std::nullopt, + * centroids.view(), + * labels.view(), + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must be + * in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids Optional + * [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] labels Index of the cluster each sample in X belongs + * to. + * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +void fit_predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +/** + * @brief Compute k-means clustering and predicts cluster index for each sample + * in the input. + * + * @code{.cpp} + * #include + * #include + * using namespace cuvs::cluster; + * ... + * raft::resources handle; + * cuvs::cluster::kmeans::params params; + * int64_t n_features = 15, inertia, n_iter; + * auto centroids = raft::make_device_matrix(handle, params.n_clusters, + * n_features); auto labels = raft::make_device_vector(handle, X.extent(0)); + * + * kmeans::fit_predict(handle, + * params, + * X, + * std::nullopt, + * centroids.view(), + * labels.view(), + * raft::make_scalar_view(&inertia), + * raft::make_scalar_view(&n_iter)); + * @endcode + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must be + * in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids Optional + * [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] labels Index of the cluster each sample in X belongs + * to. + * [len = n_samples] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +void fit_predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + /** * @brief Compute balanced k-means clustering and predicts cluster index for each sample * in the input. @@ -526,7 +972,7 @@ void fit_predict(raft::resources const& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::balanced_params params; + * cuvs::cluster::kmeans::balanced_params params; * int n_features = 15; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * auto labels = raft::make_device_vector(handle, X.extent(0)); @@ -570,7 +1016,7 @@ void fit_predict(const raft::resources& handle, * using namespace cuvs::cluster; * ... * raft::resources handle; - * cuvs::cluster::kmeans::balanced_params params; + * cuvs::cluster::kmeans::balanced_params params; * int n_features = 15; * auto centroids = raft::make_device_matrix(handle, params.n_clusters, n_features); * auto labels = raft::make_device_vector(handle, X.extent(0)); @@ -623,6 +1069,24 @@ void transform(raft::resources const& handle, raft::device_matrix_view centroids, raft::device_matrix_view X_new); +/** + * @brief Transform X to a cluster-distance space. + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format + * [dim = n_samples x n_features] + * @param[in] centroids Cluster centroids. The data must be in row-major format. + * [dim = n_clusters x n_features] + * @param[out] X_new X transformed in the new space. + * [dim = n_samples x n_features] + */ +void transform(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_matrix_view X_new); /** * @} */ diff --git a/cpp/src/cluster/detail/connectivities.cuh b/cpp/src/cluster/detail/connectivities.cuh index ada424192..e61c9166f 100644 --- a/cpp/src/cluster/detail/connectivities.cuh +++ b/cpp/src/cluster/detail/connectivities.cuh @@ -16,7 +16,7 @@ #pragma once -#include "../../distance/distance.cuh" +#include "./kmeans_common.cuh" #include #include #include @@ -153,7 +153,11 @@ void pairwise_distances(const raft::resources& handle, // TODO: It would ultimately be nice if the MST could accept // dense inputs directly so we don't need to double the memory // usage to hand it a sparse array here. - distance::pairwise_distance(handle, X, X, data, m, m, n, metric); + auto X_view = raft::make_device_matrix_view(X, m, n); + + cuvs::cluster::kmeans::detail::pairwise_distance_kmeans( + handle, X_view, X_view, raft::make_device_matrix_view(data, m, m), metric); + // self-loops get max distance auto transform_in = thrust::make_zip_iterator(thrust::make_tuple(thrust::make_counting_iterator(0), data)); diff --git a/cpp/src/cluster/detail/kmeans.cuh b/cpp/src/cluster/detail/kmeans.cuh index e7d4bdf76..9b673bca3 100644 --- a/cpp/src/cluster/detail/kmeans.cuh +++ b/cpp/src/cluster/detail/kmeans.cuh @@ -198,7 +198,7 @@ void kmeansPlusPlus(raft::resources const& handle, // Output - pwd [n_trials x n_samples] auto pwd = distBuffer.view(); cuvs::cluster::kmeans::detail::pairwise_distance_kmeans( - handle, centroidCandidates.view(), X, pwd, workspace, metric); + handle, centroidCandidates.view(), X, pwd, metric); // Update nearest cluster distance for each centroid candidate // Note pwd and minDistBuf points to same buffer which currently holds pairwise distance values. @@ -1247,7 +1247,7 @@ void kmeans_transform(raft::resources const& handle, // calculate pairwise distance between cluster centroids and current batch // of input dataset pairwise_distance_kmeans( - handle, datasetView, centroids, pairwiseDistanceView, workspace, metric); + handle, datasetView, centroids, pairwiseDistanceView, metric); } } diff --git a/cpp/src/cluster/detail/kmeans_common.cuh b/cpp/src/cluster/detail/kmeans_common.cuh index 04c1a6802..eec71b5d2 100644 --- a/cpp/src/cluster/detail/kmeans_common.cuh +++ b/cpp/src/cluster/detail/kmeans_common.cuh @@ -293,7 +293,6 @@ void pairwise_distance_kmeans(raft::resources const& handle, raft::device_matrix_view X, raft::device_matrix_view centroids, raft::device_matrix_view pairwiseDistance, - rmm::device_uvector& workspace, cuvs::distance::DistanceType metric) { auto n_samples = X.extent(0); @@ -303,15 +302,23 @@ void pairwise_distance_kmeans(raft::resources const& handle, ASSERT(X.extent(1) == centroids.extent(1), "# features in dataset and centroids are different (must be same)"); - cuvs::distance::pairwise_distance(handle, - X.data_handle(), - centroids.data_handle(), - pairwiseDistance.data_handle(), - n_samples, - n_clusters, - n_features, - workspace, - metric); + if (metric == cuvs::distance::DistanceType::L2Expanded) { + cuvs::distance::distance(handle, X, centroids, pairwiseDistance); + } else if (metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + cuvs::distance::distance(handle, X, centroids, pairwiseDistance); + } else { + RAFT_FAIL("kmeans requires L2Expanded or L2SqrtExpanded distance, have %i", metric); + } } // shuffle and randomly select 'n_samples_to_gather' from input 'in' and stores @@ -461,7 +468,7 @@ void minClusterAndDistanceCompute( // calculate pairwise distance between current tile of cluster centroids // and input dataset pairwise_distance_kmeans( - handle, datasetView, centroidsView, pairwiseDistanceView, workspace, metric); + handle, datasetView, centroidsView, pairwiseDistanceView, metric); // argmin reduction returning pair // calculates the closest centroid and the distance to the closest @@ -591,7 +598,7 @@ void minClusterDistanceCompute(raft::resources const& handle, // calculate pairwise distance between current tile of cluster centroids // and input dataset pairwise_distance_kmeans( - handle, datasetView, centroidsView, pairwiseDistanceView, workspace, metric); + handle, datasetView, centroidsView, pairwiseDistanceView, metric); raft::linalg::coalescedReduction(minClusterDistanceView.data_handle(), pairwiseDistanceView.data_handle(), diff --git a/cpp/src/cluster/detail/kmeans_mg.cuh b/cpp/src/cluster/detail/kmeans_mg.cuh new file mode 100644 index 000000000..b0f435502 --- /dev/null +++ b/cpp/src/cluster/detail/kmeans_mg.cuh @@ -0,0 +1,781 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "../kmeans.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cuvs::cluster::kmeans::mg::detail { + +#define CUVS_LOG_KMEANS(handle, fmt, ...) \ + do { \ + bool isRoot = true; \ + if (raft::resource::comms_initialized(handle)) { \ + const auto& comm = raft::resource::get_comms(handle); \ + const int my_rank = comm.get_rank(); \ + isRoot = my_rank == 0; \ + } \ + if (isRoot) { RAFT_LOG_DEBUG(fmt, ##__VA_ARGS__); } \ + } while (0) + +template +struct KeyValueIndexOp { + __host__ __device__ __forceinline__ IndexT + operator()(const raft::KeyValuePair& a) const + { + return a.key; + } +}; + +#define KMEANS_COMM_ROOT 0 + +static cuvs::cluster::kmeans::params default_params; + +// Selects 'n_clusters' samples randomly from X +template +void initRandom(const raft::resources& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids) +{ + const auto& comm = raft::resource::get_comms(handle); + cudaStream_t stream = raft::resource::get_cuda_stream(handle); + auto n_local_samples = X.extent(0); + auto n_features = X.extent(1); + auto n_clusters = params.n_clusters; + + const int my_rank = comm.get_rank(); + const int n_ranks = comm.get_size(); + + std::vector nCentroidsSampledByRank(n_ranks, 0); + std::vector nCentroidsElementsToReceiveFromRank(n_ranks, 0); + + const int nranks_reqd = std::min(n_ranks, n_clusters); + ASSERT(KMEANS_COMM_ROOT < nranks_reqd, "KMEANS_COMM_ROOT must be in [0, %d)\n", nranks_reqd); + + for (int rank = 0; rank < nranks_reqd; ++rank) { + int nCentroidsSampledInRank = n_clusters / nranks_reqd; + if (rank == KMEANS_COMM_ROOT) { + nCentroidsSampledInRank += n_clusters - nCentroidsSampledInRank * nranks_reqd; + } + nCentroidsSampledByRank[rank] = nCentroidsSampledInRank; + nCentroidsElementsToReceiveFromRank[rank] = nCentroidsSampledInRank * n_features; + } + + auto nCentroidsSampledInRank = nCentroidsSampledByRank[my_rank]; + ASSERT((IndexT)nCentroidsSampledInRank <= (IndexT)n_local_samples, + "# random samples requested from rank-%d is larger than the available " + "samples at the rank (requested is %lu, available is %lu)", + my_rank, + (size_t)nCentroidsSampledInRank, + (size_t)n_local_samples); + + auto centroidsSampledInRank = + raft::make_device_matrix(handle, nCentroidsSampledInRank, n_features); + + cuvs::cluster::kmeans::shuffle_and_gather( + handle, X, centroidsSampledInRank.view(), nCentroidsSampledInRank, params.rng_state.seed); + + std::vector displs(n_ranks); + thrust::exclusive_scan(thrust::host, + nCentroidsElementsToReceiveFromRank.begin(), + nCentroidsElementsToReceiveFromRank.end(), + displs.begin()); + + // gather centroids from all ranks + comm.allgatherv(centroidsSampledInRank.data_handle(), // sendbuff + centroids.data_handle(), // recvbuff + nCentroidsElementsToReceiveFromRank.data(), // recvcount + displs.data(), + stream); +} + +/* + * @brief Selects 'n_clusters' samples from X using scalable kmeans++ algorithm + * Scalable kmeans++ pseudocode + * 1: C = sample a point uniformly at random from X + * 2: psi = phi_X (C) + * 3: for O( log(psi) ) times do + * 4: C' = sample each point x in X independently with probability + * p_x = l * ( d^2(x, C) / phi_X (C) ) + * 5: C = C U C' + * 6: end for + * 7: For x in C, set w_x to be the number of points in X closer to x than any + * other point in C + * 8: Recluster the weighted points in C into k clusters + */ +template +void initKMeansPlusPlus(const raft::resources& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + raft::device_matrix_view centroidsRawData, + rmm::device_uvector& workspace) +{ + const auto& comm = raft::resource::get_comms(handle); + cudaStream_t stream = raft::resource::get_cuda_stream(handle); + const int my_rank = comm.get_rank(); + const int n_rank = comm.get_size(); + + auto n_samples = X.extent(0); + auto n_features = X.extent(1); + auto n_clusters = params.n_clusters; + auto metric = params.metric; + + raft::random::RngState rng(params.rng_state.seed, raft::random::GeneratorType::GenPhilox); + + // <<<< Step-1 >>> : C <- sample a point uniformly at random from X + // 1.1 - Select a rank r' at random from the available n_rank ranks with a + // probability of 1/n_rank [Note - with same seed all rank selects + // the same r' which avoids a call to comm] + // 1.2 - Rank r' samples a point uniformly at random from the local dataset + // X which will be used as the initial centroid for kmeans++ + // 1.3 - Communicate the initial centroid chosen by rank-r' to all other + // ranks + std::mt19937 gen(params.rng_state.seed); + std::uniform_int_distribution<> dis(0, n_rank - 1); + int rp = dis(gen); + + // buffer to flag the sample that is chosen as initial centroids + std::vector h_isSampleCentroid(n_samples); + std::fill(h_isSampleCentroid.begin(), h_isSampleCentroid.end(), 0); + + auto initialCentroid = raft::make_device_matrix(handle, 1, n_features); + CUVS_LOG_KMEANS( + handle, "@Rank-%d : KMeans|| : initial centroid is sampled at rank-%d\n", my_rank, rp); + + // 1.2 - Rank r' samples a point uniformly at random from the local dataset + // X which will be used as the initial centroid for kmeans++ + if (my_rank == rp) { + std::mt19937 gen(params.rng_state.seed); + std::uniform_int_distribution<> dis(0, n_samples - 1); + + int cIdx = dis(gen); + auto centroidsView = raft::make_device_matrix_view( + X.data_handle() + cIdx * n_features, 1, n_features); + + raft::copy( + initialCentroid.data_handle(), centroidsView.data_handle(), centroidsView.size(), stream); + + h_isSampleCentroid[cIdx] = 1; + } + + // 1.3 - Communicate the initial centroid chosen by rank-r' to all other ranks + comm.bcast(initialCentroid.data_handle(), initialCentroid.size(), rp, stream); + + // device buffer to flag the sample that is chosen as initial centroid + auto isSampleCentroid = raft::make_device_vector(handle, n_samples); + + raft::copy( + isSampleCentroid.data_handle(), h_isSampleCentroid.data(), isSampleCentroid.size(), stream); + + rmm::device_uvector centroidsBuf(0, stream); + + // reset buffer to store the chosen centroid + centroidsBuf.resize(initialCentroid.size(), stream); + raft::copy(centroidsBuf.begin(), initialCentroid.data_handle(), initialCentroid.size(), stream); + + auto potentialCentroids = raft::make_device_matrix_view( + centroidsBuf.data(), initialCentroid.extent(0), initialCentroid.extent(1)); + // <<< End of Step-1 >>> + + rmm::device_uvector L2NormBuf_OR_DistBuf(0, stream); + + // L2 norm of X: ||x||^2 + auto L2NormX = raft::make_device_vector(handle, n_samples); + if (metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + raft::linalg::rowNorm(L2NormX.data_handle(), + X.data_handle(), + X.extent(1), + X.extent(0), + raft::linalg::L2Norm, + true, + stream); + } + + auto minClusterDistance = raft::make_device_vector(handle, n_samples); + auto uniformRands = raft::make_device_vector(handle, n_samples); + + // <<< Step-2 >>>: psi <- phi_X (C) + auto clusterCost = raft::make_device_scalar(handle, 0); + + cuvs::cluster::kmeans::min_cluster_distance(handle, + X, + potentialCentroids, + minClusterDistance.view(), + L2NormX.view(), + L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, + workspace); + + // compute partial cluster cost from the samples in rank + cuvs::cluster::kmeans::cluster_cost( + handle, + minClusterDistance.view(), + workspace, + clusterCost.view(), + cuda::proclaim_return_type( + [] __device__(const DataT& a, const DataT& b) { return a + b; })); + + // compute total cluster cost by accumulating the partial cost from all the + // ranks + comm.allreduce( + clusterCost.data_handle(), clusterCost.data_handle(), 1, raft::comms::op_t::SUM, stream); + + DataT psi = 0; + raft::copy(&psi, clusterCost.data_handle(), 1, stream); + + // <<< End of Step-2 >>> + + ASSERT(comm.sync_stream(stream) == raft::comms::status_t::SUCCESS, + "An error occurred in the distributed operation. This can result from " + "a failed rank"); + + // Scalable kmeans++ paper claims 8 rounds is sufficient + int niter = std::min(8, (int)ceil(log(psi))); + CUVS_LOG_KMEANS(handle, + "@Rank-%d:KMeans|| :phi - %f, max # of iterations for kmeans++ loop - " + "%d\n", + my_rank, + psi, + niter); + + // <<<< Step-3 >>> : for O( log(psi) ) times do + for (int iter = 0; iter < niter; ++iter) { + CUVS_LOG_KMEANS(handle, + "@Rank-%d:KMeans|| - Iteration %d: # potential centroids sampled - " + "%d\n", + my_rank, + iter, + potentialCentroids.extent(0)); + + cuvs::cluster::kmeans::min_cluster_distance(handle, + X, + potentialCentroids, + minClusterDistance.view(), + L2NormX.view(), + L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, + workspace); + + cuvs::cluster::kmeans::cluster_cost( + handle, + minClusterDistance.view(), + workspace, + clusterCost.view(), + cuda::proclaim_return_type( + [] __device__(const DataT& a, const DataT& b) { return a + b; })); + comm.allreduce( + clusterCost.data_handle(), clusterCost.data_handle(), 1, raft::comms::op_t::SUM, stream); + raft::copy(&psi, clusterCost.data_handle(), 1, stream); + ASSERT(comm.sync_stream(stream) == raft::comms::status_t::SUCCESS, + "An error occurred in the distributed operation. This can result " + "from a failed rank"); + + // <<<< Step-4 >>> : Sample each point x in X independently and identify new + // potentialCentroids + raft::random::uniform( + handle, rng, uniformRands.data_handle(), uniformRands.extent(0), (DataT)0, (DataT)1); + cuvs::cluster::kmeans::SamplingOp select_op(psi, + params.oversampling_factor, + n_clusters, + uniformRands.data_handle(), + isSampleCentroid.data_handle()); + + rmm::device_uvector inRankCp(0, stream); + cuvs::cluster::kmeans::sample_centroids(handle, + X, + minClusterDistance.view(), + isSampleCentroid.view(), + select_op, + inRankCp, + workspace); + /// <<<< End of Step-4 >>>> + + int* nPtsSampledByRank; + RAFT_CUDA_TRY(cudaMallocHost(&nPtsSampledByRank, n_rank * sizeof(int))); + + /// <<<< Step-5 >>> : C = C U C' + // append the data in Cp from all ranks to the buffer holding the + // potentialCentroids + // RAFT_CUDA_TRY(cudaMemsetAsync(nPtsSampledByRank, 0, n_rank * sizeof(int), stream)); + std::fill(nPtsSampledByRank, nPtsSampledByRank + n_rank, 0); + nPtsSampledByRank[my_rank] = inRankCp.size() / n_features; + comm.allgather(&(nPtsSampledByRank[my_rank]), nPtsSampledByRank, 1, stream); + ASSERT(comm.sync_stream(stream) == raft::comms::status_t::SUCCESS, + "An error occurred in the distributed operation. This can result " + "from a failed rank"); + + auto nPtsSampled = + thrust::reduce(thrust::host, nPtsSampledByRank, nPtsSampledByRank + n_rank, 0); + + // gather centroids from all ranks + std::vector sizes(n_rank); + thrust::transform( + thrust::host, nPtsSampledByRank, nPtsSampledByRank + n_rank, sizes.begin(), [&](int val) { + return val * n_features; + }); + + RAFT_CUDA_TRY_NO_THROW(cudaFreeHost(nPtsSampledByRank)); + + std::vector displs(n_rank); + thrust::exclusive_scan(thrust::host, sizes.begin(), sizes.end(), displs.begin()); + + centroidsBuf.resize(centroidsBuf.size() + nPtsSampled * n_features, stream); + comm.allgatherv(inRankCp.data(), + centroidsBuf.end() - nPtsSampled * n_features, + sizes.data(), + displs.data(), + stream); + + auto tot_centroids = potentialCentroids.extent(0) + nPtsSampled; + potentialCentroids = + raft::make_device_matrix_view(centroidsBuf.data(), tot_centroids, n_features); + /// <<<< End of Step-5 >>> + } /// <<<< Step-6 >>> + + CUVS_LOG_KMEANS(handle, + "@Rank-%d:KMeans||: # potential centroids sampled - %d\n", + my_rank, + potentialCentroids.extent(0)); + + if ((IndexT)potentialCentroids.extent(0) > (IndexT)n_clusters) { + // <<< Step-7 >>>: For x in C, set w_x to be the number of pts closest to X + // temporary buffer to store the sample count per cluster, destructor + // releases the resource + + auto weight = raft::make_device_vector(handle, potentialCentroids.extent(0)); + + cuvs::cluster::kmeans::count_samples_in_cluster( + handle, params, X, L2NormX.view(), potentialCentroids, workspace, weight.view()); + + // merge the local histogram from all ranks + comm.allreduce(weight.data_handle(), // sendbuff + weight.data_handle(), // recvbuff + weight.size(), // count + raft::comms::op_t::SUM, + stream); + + // <<< end of Step-7 >>> + + // Step-8: Recluster the weighted points in C into k clusters + // Note - reclustering step is duplicated across all ranks and with the same + // seed they should generate the same potentialCentroids + auto const_centroids = raft::make_device_matrix_view( + potentialCentroids.data_handle(), potentialCentroids.extent(0), potentialCentroids.extent(1)); + cuvs::cluster::kmeans::init_plus_plus( + handle, params, const_centroids, centroidsRawData, workspace); + + auto inertia = raft::make_host_scalar(0); + auto n_iter = raft::make_host_scalar(0); + auto weight_view = + raft::make_device_vector_view(weight.data_handle(), weight.extent(0)); + cuvs::cluster::kmeans::params params_copy = params; + params_copy.rng_state = default_params.rng_state; + + cuvs::cluster::kmeans::fit_main(handle, + params_copy, + const_centroids, + weight_view, + centroidsRawData, + inertia.view(), + n_iter.view(), + workspace); + + } else if ((IndexT)potentialCentroids.extent(0) < (IndexT)n_clusters) { + // supplement with random + auto n_random_clusters = n_clusters - potentialCentroids.extent(0); + CUVS_LOG_KMEANS(handle, + "[Warning!] KMeans||: found fewer than %d centroids during " + "initialization (found %d centroids, remaining %d centroids will be " + "chosen randomly from input samples)\n", + n_clusters, + potentialCentroids.extent(0), + n_random_clusters); + + // generate `n_random_clusters` centroids + cuvs::cluster::kmeans::params rand_params = params; + rand_params.rng_state = default_params.rng_state; + rand_params.init = cuvs::cluster::kmeans::params::InitMethod::Random; + rand_params.n_clusters = n_random_clusters; + initRandom(handle, rand_params, X, centroidsRawData); + + // copy centroids generated during kmeans|| iteration to the buffer + raft::copy(centroidsRawData.data_handle() + n_random_clusters * n_features, + potentialCentroids.data_handle(), + potentialCentroids.size(), + stream); + + } else { + // found the required n_clusters + raft::copy(centroidsRawData.data_handle(), + potentialCentroids.data_handle(), + potentialCentroids.size(), + stream); + } +} + +template +void checkWeights(const raft::resources& handle, + rmm::device_uvector& workspace, + raft::device_vector_view weight) +{ + cudaStream_t stream = raft::resource::get_cuda_stream(handle); + rmm::device_scalar wt_aggr(stream); + + const auto& comm = raft::resource::get_comms(handle); + + auto n_samples = weight.extent(0); + size_t temp_storage_bytes = 0; + RAFT_CUDA_TRY(cub::DeviceReduce::Sum( + nullptr, temp_storage_bytes, weight.data_handle(), wt_aggr.data(), n_samples, stream)); + + workspace.resize(temp_storage_bytes, stream); + + RAFT_CUDA_TRY(cub::DeviceReduce::Sum( + workspace.data(), temp_storage_bytes, weight.data_handle(), wt_aggr.data(), n_samples, stream)); + + comm.allreduce(wt_aggr.data(), // sendbuff + wt_aggr.data(), // recvbuff + 1, // count + raft::comms::op_t::SUM, + stream); + DataT wt_sum = wt_aggr.value(stream); + raft::resource::sync_stream(handle, stream); + + if (wt_sum != n_samples) { + CUVS_LOG_KMEANS(handle, + "[Warning!] KMeans: normalizing the user provided sample weights to " + "sum up to %d samples", + n_samples); + + DataT scale = n_samples / wt_sum; + raft::linalg::unaryOp( + weight.data_handle(), + weight.data_handle(), + weight.size(), + cuda::proclaim_return_type([=] __device__(const DataT& wt) { return wt * scale; }), + stream); + } +} + +template +void fit(const raft::resources& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter, + rmm::device_uvector& workspace) +{ + const auto& comm = raft::resource::get_comms(handle); + cudaStream_t stream = raft::resource::get_cuda_stream(handle); + auto n_samples = X.extent(0); + auto n_features = X.extent(1); + auto n_clusters = params.n_clusters; + auto metric = params.metric; + + auto weight = raft::make_device_vector(handle, n_samples); + if (sample_weight) { + raft::copy(weight.data_handle(), sample_weight->data_handle(), n_samples, stream); + } else { + thrust::fill(raft::resource::get_thrust_policy(handle), + weight.data_handle(), + weight.data_handle() + weight.size(), + 1); + } + + // check if weights sum up to n_samples + checkWeights(handle, workspace, weight.view()); + + if (params.init == cuvs::cluster::kmeans::params::InitMethod::Random) { + // initializing with random samples from input dataset + CUVS_LOG_KMEANS(handle, + "KMeans.fit: initialize cluster centers by randomly choosing from the " + "input data.\n"); + initRandom(handle, params, X, centroids); + } else if (params.init == cuvs::cluster::kmeans::params::InitMethod::KMeansPlusPlus) { + // default method to initialize is kmeans++ + CUVS_LOG_KMEANS(handle, "KMeans.fit: initialize cluster centers using k-means++ algorithm.\n"); + initKMeansPlusPlus(handle, params, X, centroids, workspace); + } else if (params.init == cuvs::cluster::kmeans::params::InitMethod::Array) { + CUVS_LOG_KMEANS(handle, + "KMeans.fit: initialize cluster centers from the ndarray array input " + "passed to init argument.\n"); + + } else { + THROW("unknown initialization method to select initial centers"); + } + + // stores (key, value) pair corresponding to each sample where + // - key is the index of nearest cluster + // - value is the distance to the nearest cluster + auto minClusterAndDistance = + raft::make_device_vector, IndexT>(handle, n_samples); + + // temporary buffer to store L2 norm of centroids or distance matrix, + // destructor releases the resource + rmm::device_uvector L2NormBuf_OR_DistBuf(0, stream); + + // temporary buffer to store intermediate centroids, destructor releases the + // resource + auto newCentroids = raft::make_device_matrix(handle, n_clusters, n_features); + + // temporary buffer to store the weights per cluster, destructor releases + // the resource + auto wtInCluster = raft::make_device_vector(handle, n_clusters); + + // L2 norm of X: ||x||^2 + auto L2NormX = raft::make_device_vector(handle, n_samples); + if (metric == cuvs::distance::DistanceType::L2Expanded || + metric == cuvs::distance::DistanceType::L2SqrtExpanded) { + raft::linalg::rowNorm(L2NormX.data_handle(), + X.data_handle(), + X.extent(1), + X.extent(0), + raft::linalg::L2Norm, + true, + stream); + } + + DataT priorClusteringCost = 0; + for (n_iter[0] = 1; n_iter[0] <= params.max_iter; ++n_iter[0]) { + CUVS_LOG_KMEANS(handle, + "KMeans.fit: Iteration-%d: fitting the model using the initialize " + "cluster centers\n", + n_iter[0]); + + auto const_centroids = raft::make_device_matrix_view( + centroids.data_handle(), centroids.extent(0), centroids.extent(1)); + // computes minClusterAndDistance[0:n_samples) where + // minClusterAndDistance[i] is a pair where + // 'key' is index to an sample in 'centroids' (index of the nearest + // centroid) and 'value' is the distance between the sample 'X[i]' and the + // 'centroid[key]' + cuvs::cluster::kmeans::min_cluster_and_distance(handle, + X, + const_centroids, + minClusterAndDistance.view(), + L2NormX.view(), + L2NormBuf_OR_DistBuf, + params.metric, + params.batch_samples, + params.batch_centroids, + workspace); + + // Using TransformInputIteratorT to dereference an array of + // cub::KeyValuePair and converting them to just return the Key to be used + // in reduce_rows_by_key prims + KeyValueIndexOp conversion_op; + cub::TransformInputIterator, + raft::KeyValuePair*> + itr(minClusterAndDistance.data_handle(), conversion_op); + + workspace.resize(n_samples, stream); + + // Calculates weighted sum of all the samples assigned to cluster-i and + // store the result in newCentroids[i] + raft::linalg::reduce_rows_by_key((DataT*)X.data_handle(), + X.extent(1), + itr, + weight.data_handle(), + workspace.data(), + X.extent(0), + X.extent(1), + static_cast(n_clusters), + newCentroids.data_handle(), + stream); + + // Reduce weights by key to compute weight in each cluster + raft::linalg::reduce_cols_by_key(weight.data_handle(), + itr, + wtInCluster.data_handle(), + (IndexT)1, + (IndexT)weight.extent(0), + (IndexT)n_clusters, + stream); + + // merge the local histogram from all ranks + comm.allreduce(wtInCluster.data_handle(), // sendbuff + wtInCluster.data_handle(), // recvbuff + wtInCluster.size(), // count + raft::comms::op_t::SUM, + stream); + + // reduces newCentroids from all ranks + comm.allreduce(newCentroids.data_handle(), // sendbuff + newCentroids.data_handle(), // recvbuff + newCentroids.size(), // count + raft::comms::op_t::SUM, + stream); + + // Computes newCentroids[i] = newCentroids[i]/wtInCluster[i] where + // newCentroids[n_clusters x n_features] - 2D array, newCentroids[i] has + // sum of all the samples assigned to cluster-i + // wtInCluster[n_clusters] - 1D array, wtInCluster[i] contains # of + // samples in cluster-i. + // Note - when wtInCluster[i] is 0, newCentroid[i] is reset to 0 + + raft::linalg::matrixVectorOp( + newCentroids.data_handle(), + newCentroids.data_handle(), + wtInCluster.data_handle(), + newCentroids.extent(1), + newCentroids.extent(0), + true, + false, + cuda::proclaim_return_type([=] __device__(DataT mat, DataT vec) { + if (vec == 0) + return DataT(0); + else + return mat / vec; + }), + stream); + + // copy the centroids[i] to newCentroids[i] when wtInCluster[i] is 0 + cub::ArgIndexInputIterator itr_wt(wtInCluster.data_handle()); + raft::matrix::gather_if( + centroids.data_handle(), + centroids.extent(1), + centroids.extent(0), + itr_wt, + itr_wt, + wtInCluster.extent(0), + newCentroids.data_handle(), + cuda::proclaim_return_type( + [=] __device__(raft::KeyValuePair map) { // predicate + // copy when the # of samples in the cluster is 0 + if (map.value == 0) + return true; + else + return false; + }), + cuda::proclaim_return_type( + [=] __device__(raft::KeyValuePair map) { // map + return map.key; + }), + stream); + + // compute the squared norm between the newCentroids and the original + // centroids, destructor releases the resource + auto sqrdNorm = raft::make_device_scalar(handle, 1); + raft::linalg::mapThenSumReduce( + sqrdNorm.data_handle(), + newCentroids.size(), + cuda::proclaim_return_type([=] __device__(const DataT a, const DataT b) { + DataT diff = a - b; + return diff * diff; + }), + stream, + centroids.data_handle(), + newCentroids.data_handle()); + + DataT sqrdNormError = 0; + raft::copy(&sqrdNormError, sqrdNorm.data_handle(), sqrdNorm.size(), stream); + + raft::copy(centroids.data_handle(), newCentroids.data_handle(), newCentroids.size(), stream); + + bool done = false; + if (params.inertia_check) { + rmm::device_scalar> clusterCostD(stream); + + // calculate cluster cost phi_x(C) + cuvs::cluster::kmeans::cluster_cost( + handle, + minClusterAndDistance.view(), + workspace, + raft::make_device_scalar_view(clusterCostD.data()), + cuda::proclaim_return_type>( + [] __device__(const raft::KeyValuePair& a, + const raft::KeyValuePair& b) { + raft::KeyValuePair res; + res.key = 0; + res.value = a.value + b.value; + return res; + })); + + // Cluster cost phi_x(C) from all ranks + comm.allreduce(&(clusterCostD.data()->value), + &(clusterCostD.data()->value), + 1, + raft::comms::op_t::SUM, + stream); + + DataT curClusteringCost = 0; + raft::copy(&curClusteringCost, &(clusterCostD.data()->value), 1, stream); + + ASSERT(comm.sync_stream(stream) == raft::comms::status_t::SUCCESS, + "An error occurred in the distributed operation. This can result " + "from a failed rank"); + ASSERT(curClusteringCost != (DataT)0.0, + "Too few points and centroids being found is getting 0 cost from " + "centers\n"); + + if (n_iter[0] > 0) { + DataT delta = curClusteringCost / priorClusteringCost; + if (delta > 1 - params.tol) done = true; + } + priorClusteringCost = curClusteringCost; + } + + raft::resource::sync_stream(handle, stream); + if (sqrdNormError < params.tol) done = true; + + if (done) { + CUVS_LOG_KMEANS( + handle, "Threshold triggered after %d iterations. Terminating early.\n", n_iter[0]); + break; + } + } +} + +}; // namespace cuvs::cluster::kmeans::mg::detail diff --git a/cpp/src/cluster/kmeans.cuh b/cpp/src/cluster/kmeans.cuh index 1d12142da..5e6d756cc 100644 --- a/cpp/src/cluster/kmeans.cuh +++ b/cpp/src/cluster/kmeans.cuh @@ -17,10 +17,12 @@ #include "detail/kmeans.cuh" #include "detail/kmeans_auto_find_k.cuh" +#include "kmeans_mg.hpp" #include #include #include #include +#include #include #include @@ -94,8 +96,13 @@ void fit(raft::resources const& handle, raft::host_scalar_view inertia, raft::host_scalar_view n_iter) { - cuvs::cluster::kmeans::detail::kmeans_fit( - handle, params, X, sample_weight, centroids, inertia, n_iter); + // use the mnmg kmeans fit if we have comms initialize, single gpu otherwise + if (raft::resource::comms_initialized(handle)) { + cuvs::cluster::kmeans::mg::fit(handle, params, X, sample_weight, centroids, inertia, n_iter); + } else { + cuvs::cluster::kmeans::detail::kmeans_fit( + handle, params, X, sample_weight, centroids, inertia, n_iter); + } } /** diff --git a/cpp/src/cluster/kmeans_fit_double.cu b/cpp/src/cluster/kmeans_fit_double.cu new file mode 100644 index 000000000..4f193da09 --- /dev/null +++ b/cpp/src/cluster/kmeans_fit_double.cu @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kmeans.cuh" +#include + +namespace cuvs::cluster::kmeans { + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + cuvs::cluster::kmeans::fit( + handle, params, X, sample_weight, centroids, inertia, n_iter); +} + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + cuvs::cluster::kmeans::fit( + handle, params, X, sample_weight, centroids, inertia, n_iter); +} +} // namespace cuvs::cluster::kmeans diff --git a/cpp/src/cluster/kmeans_fit_float.cu b/cpp/src/cluster/kmeans_fit_float.cu index 89862a46c..3888ae492 100644 --- a/cpp/src/cluster/kmeans_fit_float.cu +++ b/cpp/src/cluster/kmeans_fit_float.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,4 +30,16 @@ void fit(raft::resources const& handle, cuvs::cluster::kmeans::fit( handle, params, X, sample_weight, centroids, inertia, n_iter); } + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + cuvs::cluster::kmeans::fit( + handle, params, X, sample_weight, centroids, inertia, n_iter); +} } // namespace cuvs::cluster::kmeans diff --git a/cpp/src/cluster/kmeans_fit_mg_double.cu b/cpp/src/cluster/kmeans_fit_mg_double.cu new file mode 100644 index 000000000..15081dfba --- /dev/null +++ b/cpp/src/cluster/kmeans_fit_mg_double.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./detail/kmeans_mg.cuh" +#include "kmeans_mg.hpp" +#include + +namespace cuvs::cluster::kmeans::mg { + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + rmm::device_uvector workspace(0, raft::resource::get_cuda_stream(handle)); + + cuvs::cluster::kmeans::mg::detail::fit( + handle, params, X, sample_weight, centroids, inertia, n_iter, workspace); +} + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + rmm::device_uvector workspace(0, raft::resource::get_cuda_stream(handle)); + + cuvs::cluster::kmeans::mg::detail::fit( + handle, params, X, sample_weight, centroids, inertia, n_iter, workspace); +} +} // namespace cuvs::cluster::kmeans::mg diff --git a/cpp/src/cluster/kmeans_fit_mg_float.cu b/cpp/src/cluster/kmeans_fit_mg_float.cu new file mode 100644 index 000000000..54fbd6763 --- /dev/null +++ b/cpp/src/cluster/kmeans_fit_mg_float.cu @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./detail/kmeans_mg.cuh" +#include "kmeans_mg.hpp" +#include + +namespace cuvs::cluster::kmeans::mg { + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + rmm::device_uvector workspace(0, raft::resource::get_cuda_stream(handle)); + + cuvs::cluster::kmeans::mg::detail::fit( + handle, params, X, sample_weight, centroids, inertia, n_iter, workspace); +} + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) +{ + rmm::device_uvector workspace(0, raft::resource::get_cuda_stream(handle)); + + cuvs::cluster::kmeans::mg::detail::fit( + handle, params, X, sample_weight, centroids, inertia, n_iter, workspace); +} +} // namespace cuvs::cluster::kmeans::mg diff --git a/cpp/src/cluster/kmeans_fit_predict_double.cu b/cpp/src/cluster/kmeans_fit_predict_double.cu new file mode 100644 index 000000000..28a1d70c0 --- /dev/null +++ b/cpp/src/cluster/kmeans_fit_predict_double.cu @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kmeans.cuh" +#include + +namespace cuvs::cluster::kmeans { + +void fit_predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) + +{ + cuvs::cluster::kmeans::fit_predict( + handle, params, X, sample_weight, centroids, labels, inertia, n_iter); +} + +void fit_predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) + +{ + cuvs::cluster::kmeans::fit_predict( + handle, params, X, sample_weight, centroids, labels, inertia, n_iter); +} +} // namespace cuvs::cluster::kmeans diff --git a/cpp/src/cluster/kmeans_fit_predict_float.cu b/cpp/src/cluster/kmeans_fit_predict_float.cu index f043f7624..be3652db5 100644 --- a/cpp/src/cluster/kmeans_fit_predict_float.cu +++ b/cpp/src/cluster/kmeans_fit_predict_float.cu @@ -32,4 +32,18 @@ void fit_predict(raft::resources const& handle, cuvs::cluster::kmeans::fit_predict( handle, params, X, sample_weight, centroids, labels, inertia, n_iter); } + +void fit_predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + std::optional> centroids, + raft::device_vector_view labels, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter) + +{ + cuvs::cluster::kmeans::fit_predict( + handle, params, X, sample_weight, centroids, labels, inertia, n_iter); +} } // namespace cuvs::cluster::kmeans diff --git a/cpp/src/cluster/kmeans_mg.hpp b/cpp/src/cluster/kmeans_mg.hpp new file mode 100644 index 000000000..34f38314a --- /dev/null +++ b/cpp/src/cluster/kmeans_mg.hpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include + +namespace cuvs::cluster::kmeans::mg { + +/** + * @brief MNMG kmeans fit + * + * @param[in] handle The raft handle. + * @param[in] params Parameters for KMeans model. + * @param[in] X Training instances to cluster. The data must + * be in row-major format. + * [dim = n_samples x n_features] + * @param[in] sample_weight Optional weights for each observation in X. + * [len = n_samples] + * @param[inout] centroids [in] When init is InitMethod::Array, use + * centroids as the initial cluster centers. + * [out] The generated centroids from the + * kmeans algorithm are stored at the address + * pointed by 'centroids'. + * [dim = n_clusters x n_features] + * @param[out] inertia Sum of squared distances of samples to their + * closest cluster center. + * @param[out] n_iter Number of iterations run. + */ +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); + +void fit(raft::resources const& handle, + const cuvs::cluster::kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::host_scalar_view inertia, + raft::host_scalar_view n_iter); +} // namespace cuvs::cluster::kmeans::mg diff --git a/cpp/src/cluster/kmeans_predict_double.cu b/cpp/src/cluster/kmeans_predict_double.cu new file mode 100644 index 000000000..1fcc393ac --- /dev/null +++ b/cpp/src/cluster/kmeans_predict_double.cu @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kmeans.cuh" +#include + +namespace cuvs::cluster::kmeans { + +void predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia) + +{ + cuvs::cluster::kmeans::predict( + handle, params, X, sample_weight, centroids, labels, normalize_weight, inertia); +} + +void predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia) + +{ + cuvs::cluster::kmeans::predict( + handle, params, X, sample_weight, centroids, labels, normalize_weight, inertia); +} +} // namespace cuvs::cluster::kmeans diff --git a/cpp/src/cluster/kmeans_predict_float.cu b/cpp/src/cluster/kmeans_predict_float.cu index d092152f1..b5f9f9e51 100644 --- a/cpp/src/cluster/kmeans_predict_float.cu +++ b/cpp/src/cluster/kmeans_predict_float.cu @@ -32,4 +32,17 @@ void predict(raft::resources const& handle, cuvs::cluster::kmeans::predict( handle, params, X, sample_weight, centroids, labels, normalize_weight, inertia); } +void predict(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + std::optional> sample_weight, + raft::device_matrix_view centroids, + raft::device_vector_view labels, + bool normalize_weight, + raft::host_scalar_view inertia) + +{ + cuvs::cluster::kmeans::predict( + handle, params, X, sample_weight, centroids, labels, normalize_weight, inertia); +} } // namespace cuvs::cluster::kmeans diff --git a/cpp/src/cluster/kmeans_transform_double.cu b/cpp/src/cluster/kmeans_transform_double.cu new file mode 100644 index 000000000..4a026812e --- /dev/null +++ b/cpp/src/cluster/kmeans_transform_double.cu @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kmeans.cuh" +#include + +namespace cuvs::cluster::kmeans { + +void transform(raft::resources const& handle, + const kmeans::params& params, + raft::device_matrix_view X, + raft::device_matrix_view centroids, + raft::device_matrix_view X_new) + +{ + cuvs::cluster::kmeans::transform(handle, params, X, centroids, X_new); +} +} // namespace cuvs::cluster::kmeans diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh index 3107f0fa4..4fd194f6c 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -111,6 +111,8 @@ instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( cuvs::distance::detail::ops::l1_distance_op, int); instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( cuvs::distance::detail::ops::l2_exp_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::l2_exp_distance_op, int64_t); instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( cuvs::distance::detail::ops::l2_unexp_distance_op, int); instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( @@ -124,5 +126,8 @@ instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo( int64_t, cuvs::distance::kernels::detail::rbf_fin_op); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::l2_exp_distance_op, int64_t); + #undef instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo #undef instantiate_cuvs_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py index 1bd51aef9..d0913833f 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py @@ -15,7 +15,7 @@ # NOTE: this template is not perfectly formatted. Use pre-commit to get # everything in shape again. header = """/* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -197,4 +197,30 @@ def arch_headers(archs): f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") -print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") + + print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") + +# L2 with int64_t indices for kmeans code +int64_t_op_instances = [ + dict( + path_prefix="l2_expanded", + OpT="cuvs::distance::detail::ops::l2_exp_distance_op", + archs = [60, 80], + )] + +for op in int64_t_op_instances: + for dt in data_type_instances: + DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); + + IdxT = "int64_t" + path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu" + with open(path, "w") as f: + f.write(header) + f.write(arch_headers(op["archs"])) + f.write(macro) + + OpT = op['OpT'] + FinOpT = "raft::identity_op" + f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") + f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") + print(f"src/distance/detail/pairwise_matrix/{path}") diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu new file mode 100644 index 000000000..756739158 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int64_t.cu @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include "dispatch_sm80.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::l2_exp_distance_op, + double, + double, + double, + raft::identity_op, + int64_t); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu new file mode 100644 index 000000000..94910875c --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int64_t.cu @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include "dispatch_sm80.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::l2_exp_distance_op, float, float, float, raft::identity_op, int64_t); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu index 1cb0ed8ae..3c8f25109 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_rbf.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/distance-ext.cuh b/cpp/src/distance/distance-ext.cuh index e7fa30f03..8ce7ef690 100644 --- a/cpp/src/distance/distance-ext.cuh +++ b/cpp/src/distance/distance-ext.cuh @@ -244,6 +244,15 @@ instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Linf); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::LpUnexpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::RusselRaoExpanded); +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2Expanded, float, float, float, int64_t); +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2Expanded, double, double, double, int64_t); +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int64_t); +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int64_t); + #undef instantiate_cuvs_distance_distance_by_algo #undef instantiate_cuvs_distance_distance diff --git a/cpp/src/distance/distance.cu b/cpp/src/distance/distance.cu index 72be93f10..c1d39f360 100644 --- a/cpp/src/distance/distance.cu +++ b/cpp/src/distance/distance.cu @@ -105,6 +105,16 @@ instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Linf); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::LpUnexpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::RusselRaoExpanded); +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2Expanded, float, float, float, int64_t); +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2Expanded, double, double, double, int64_t); + +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2SqrtExpanded, float, float, float, int64_t); +instantiate_cuvs_distance_distance( + cuvs::distance::DistanceType::L2SqrtExpanded, double, double, double, int64_t); + #undef instantiate_cuvs_distance_distance_by_algo #undef instantiate_cuvs_distance_distance diff --git a/cpp/test/cluster/kmeans_mg.cu b/cpp/test/cluster/kmeans_mg.cu new file mode 100644 index 000000000..b9e06b2f1 --- /dev/null +++ b/cpp/test/cluster/kmeans_mg.cu @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include + +#include + +#define NCCLCHECK(cmd) \ + do { \ + ncclResult_t res = cmd; \ + if (res != ncclSuccess) { \ + printf("Failed, NCCL error %s:%d '%s'\n", __FILE__, __LINE__, ncclGetErrorString(res)); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +namespace cuvs { + +template +struct KmeansInputs { + int n_row; + int n_col; + int n_clusters; + T tol; + bool weighted; +}; + +template +class KmeansTest : public ::testing::TestWithParam> { + protected: + KmeansTest() + : stream(handle.get_stream()), + d_labels(0, stream), + d_labels_ref(0, stream), + d_centroids(0, stream), + d_sample_weight(0, stream) + { + } + + void basicTest() + { + testparams = ::testing::TestWithParam>::GetParam(); + ncclComm_t nccl_comm; + NCCLCHECK(ncclCommInitAll(&nccl_comm, 1, {0})); + raft::comms::build_comms_nccl_only(&handle, nccl_comm, 1, 0); + + int n_samples = testparams.n_row; + int n_features = testparams.n_col; + params.n_clusters = testparams.n_clusters; + params.tol = testparams.tol; + params.n_init = 5; + params.rng_state.seed = 1; + params.oversampling_factor = 1; + + auto stream = handle.get_stream(); + rmm::device_uvector X(n_samples * n_features, stream); + rmm::device_uvector labels(n_samples, stream); + + raft::random::make_blobs(handle, + X.data(), + labels.data(), + n_samples, + n_features, + params.n_clusters, + true, + nullptr, + nullptr, + 1.0, + false, + -10.0f, + 10.0f, + 1234ULL); + + d_labels.resize(n_samples, stream); + d_labels_ref.resize(n_samples, stream); + d_centroids.resize(params.n_clusters * n_features, stream); + + std::optional> d_sw = std::nullopt; + if (testparams.weighted) { + d_sample_weight.resize(n_samples, stream); + thrust::fill(thrust::cuda::par.on(stream), + d_sample_weight.data(), + d_sample_weight.data() + n_samples, + 1); + d_sw = raft::make_device_vector_view(d_sample_weight.data(), n_samples); + } + raft::copy(d_labels_ref.data(), labels.data(), n_samples, stream); + + handle.sync_stream(stream); + + T inertia = 0; + int n_iter = 0; + + auto X_view = raft::make_device_matrix_view(X.data(), n_samples, n_features); + auto centroids_view = + raft::make_device_matrix_view(d_centroids.data(), params.n_clusters, n_features); + + cuvs::cluster::kmeans::fit(handle, + params, + X_view, + d_sw, + centroids_view, + raft::make_host_scalar_view(&inertia), + raft::make_host_scalar_view(&n_iter)); + + cuvs::cluster::kmeans::predict( + handle, + params, + X_view, + d_sw, + d_centroids.data(), + raft::make_device_vector_view(d_labels.data(), n_samples), + true, + raft::make_host_scalar_view(&inertia)); + score = raft::stats::adjusted_rand_index( + d_labels_ref.data(), d_labels.data(), n_samples, raft::resource::get_cuda_stream(handle)); + handle.sync_stream(stream); + + if (score < 0.99) { + std::cout << "Expected: " << raft::arr2Str(d_labels_ref.data(), 25, "d_labels_ref", stream) + << std::endl; + std::cout << "Actual: " << raft::arr2Str(d_labels.data(), 25, "d_labels", stream) + << std::endl; + std::cout << "score = " << score << std::endl; + } + ncclCommDestroy(nccl_comm); + } + + void SetUp() override { basicTest(); } + + protected: + raft::handle_t handle; + cudaStream_t stream; + KmeansInputs testparams; + rmm::device_uvector d_labels; + rmm::device_uvector d_labels_ref; + rmm::device_uvector d_centroids; + rmm::device_uvector d_sample_weight; + double score; + cuvs::cluster::kmeans::params params; +}; + +const std::vector> inputsf2 = {{1000, 32, 5, 0.0001, true}, + {1000, 32, 5, 0.0001, false}, + {1000, 100, 20, 0.0001, true}, + {1000, 100, 20, 0.0001, false}, + {10000, 32, 10, 0.0001, true}, + {10000, 32, 10, 0.0001, false}, + {10000, 100, 50, 0.0001, true}, + {10000, 100, 50, 0.0001, false}}; + +const std::vector> inputsd2 = {{1000, 32, 5, 0.0001, true}, + {1000, 32, 5, 0.0001, false}, + {1000, 100, 20, 0.0001, true}, + {1000, 100, 20, 0.0001, false}, + {10000, 32, 10, 0.0001, true}, + {10000, 32, 10, 0.0001, false}, + {10000, 100, 50, 0.0001, true}, + {10000, 100, 50, 0.0001, false}}; + +typedef KmeansTest KmeansTestF; +TEST_P(KmeansTestF, Result) { ASSERT_TRUE(score >= 0.99); } + +typedef KmeansTest KmeansTestD; +TEST_P(KmeansTestD, Result) { ASSERT_TRUE(score >= 0.99); } + +INSTANTIATE_TEST_CASE_P(KmeansTests, KmeansTestF, ::testing::ValuesIn(inputsf2)); + +INSTANTIATE_TEST_CASE_P(KmeansTests, KmeansTestD, ::testing::ValuesIn(inputsd2)); + +} // end namespace cuvs